1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20
21 #include <algorithm>
22
23 #define DEBUG_TYPE "format-parser"
24
25 namespace clang {
26 namespace format {
27
28 class FormatTokenSource {
29 public:
~FormatTokenSource()30 virtual ~FormatTokenSource() {}
31 virtual FormatToken *getNextToken() = 0;
32
33 virtual unsigned getPosition() = 0;
34 virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36
37 namespace {
38
39 class ScopedDeclarationState {
40 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42 bool MustBeDeclaration)
43 : Line(Line), Stack(Stack) {
44 Line.MustBeDeclaration = MustBeDeclaration;
45 Stack.push_back(MustBeDeclaration);
46 }
~ScopedDeclarationState()47 ~ScopedDeclarationState() {
48 Stack.pop_back();
49 if (!Stack.empty())
50 Line.MustBeDeclaration = Stack.back();
51 else
52 Line.MustBeDeclaration = true;
53 }
54
55 private:
56 UnwrappedLine &Line;
57 std::vector<bool> &Stack;
58 };
59
isLineComment(const FormatToken & FormatTok)60 static bool isLineComment(const FormatToken &FormatTok) {
61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)67 static bool continuesLineComment(const FormatToken &FormatTok,
68 const FormatToken *Previous,
69 const FormatToken *MinColumnToken) {
70 if (!Previous || !MinColumnToken)
71 return false;
72 unsigned MinContinueColumn =
73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75 isLineComment(*Previous) &&
76 FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78
79 class ScopedMacroState : public FormatTokenSource {
80 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82 FormatToken *&ResetToken)
83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85 Token(nullptr), PreviousToken(nullptr) {
86 FakeEOF.Tok.startToken();
87 FakeEOF.Tok.setKind(tok::eof);
88 TokenSource = this;
89 Line.Level = 0;
90 Line.InPPDirective = true;
91 }
92
~ScopedMacroState()93 ~ScopedMacroState() override {
94 TokenSource = PreviousTokenSource;
95 ResetToken = Token;
96 Line.InPPDirective = false;
97 Line.Level = PreviousLineLevel;
98 }
99
getNextToken()100 FormatToken *getNextToken() override {
101 // The \c UnwrappedLineParser guards against this by never calling
102 // \c getNextToken() after it has encountered the first eof token.
103 assert(!eof());
104 PreviousToken = Token;
105 Token = PreviousTokenSource->getNextToken();
106 if (eof())
107 return &FakeEOF;
108 return Token;
109 }
110
getPosition()111 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112
setPosition(unsigned Position)113 FormatToken *setPosition(unsigned Position) override {
114 PreviousToken = nullptr;
115 Token = PreviousTokenSource->setPosition(Position);
116 return Token;
117 }
118
119 private:
eof()120 bool eof() {
121 return Token && Token->HasUnescapedNewline &&
122 !continuesLineComment(*Token, PreviousToken,
123 /*MinColumnToken=*/PreviousToken);
124 }
125
126 FormatToken FakeEOF;
127 UnwrappedLine &Line;
128 FormatTokenSource *&TokenSource;
129 FormatToken *&ResetToken;
130 unsigned PreviousLineLevel;
131 FormatTokenSource *PreviousTokenSource;
132
133 FormatToken *Token;
134 FormatToken *PreviousToken;
135 };
136
137 } // end anonymous namespace
138
139 class ScopedLineState {
140 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)141 ScopedLineState(UnwrappedLineParser &Parser,
142 bool SwitchToPreprocessorLines = false)
143 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144 if (SwitchToPreprocessorLines)
145 Parser.CurrentLines = &Parser.PreprocessorDirectives;
146 else if (!Parser.Line->Tokens.empty())
147 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148 PreBlockLine = std::move(Parser.Line);
149 Parser.Line = std::make_unique<UnwrappedLine>();
150 Parser.Line->Level = PreBlockLine->Level;
151 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152 }
153
~ScopedLineState()154 ~ScopedLineState() {
155 if (!Parser.Line->Tokens.empty()) {
156 Parser.addUnwrappedLine();
157 }
158 assert(Parser.Line->Tokens.empty());
159 Parser.Line = std::move(PreBlockLine);
160 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161 Parser.MustBreakBeforeNextToken = true;
162 Parser.CurrentLines = OriginalLines;
163 }
164
165 private:
166 UnwrappedLineParser &Parser;
167
168 std::unique_ptr<UnwrappedLine> PreBlockLine;
169 SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171
172 class CompoundStatementIndenter {
173 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)174 CompoundStatementIndenter(UnwrappedLineParser *Parser,
175 const FormatStyle &Style, unsigned &LineLevel)
176 : CompoundStatementIndenter(Parser, LineLevel,
177 Style.BraceWrapping.AfterControlStatement,
178 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180 bool WrapBrace, bool IndentBrace)
181 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182 if (WrapBrace)
183 Parser->addUnwrappedLine();
184 if (IndentBrace)
185 ++LineLevel;
186 }
~CompoundStatementIndenter()187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188
189 private:
190 unsigned &LineLevel;
191 unsigned OldLineLevel;
192 };
193
194 namespace {
195
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199 : Tokens(Tokens), Position(-1) {}
200
getNextToken()201 FormatToken *getNextToken() override {
202 ++Position;
203 return Tokens[Position];
204 }
205
getPosition()206 unsigned getPosition() override {
207 assert(Position >= 0);
208 return Position;
209 }
210
setPosition(unsigned P)211 FormatToken *setPosition(unsigned P) override {
212 Position = P;
213 return Tokens[Position];
214 }
215
reset()216 void reset() { Position = -1; }
217
218 private:
219 ArrayRef<FormatToken *> Tokens;
220 int Position;
221 };
222
223 } // end anonymous namespace
224
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226 const AdditionalKeywords &Keywords,
227 unsigned FirstStartColumn,
228 ArrayRef<FormatToken *> Tokens,
229 UnwrappedLineConsumer &Callback)
230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235 ? IG_Rejected
236 : IG_Inited),
237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238
reset()239 void UnwrappedLineParser::reset() {
240 PPBranchLevel = -1;
241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242 ? IG_Rejected
243 : IG_Inited;
244 IncludeGuardToken = nullptr;
245 Line.reset(new UnwrappedLine);
246 CommentsBeforeNextToken.clear();
247 FormatTok = nullptr;
248 MustBreakBeforeNextToken = false;
249 PreprocessorDirectives.clear();
250 CurrentLines = &Lines;
251 DeclarationScopeStack.clear();
252 PPStack.clear();
253 Line->FirstStartColumn = FirstStartColumn;
254 }
255
parse()256 void UnwrappedLineParser::parse() {
257 IndexedTokenSource TokenSource(AllTokens);
258 Line->FirstStartColumn = FirstStartColumn;
259 do {
260 LLVM_DEBUG(llvm::dbgs() << "----\n");
261 reset();
262 Tokens = &TokenSource;
263 TokenSource.reset();
264
265 readToken();
266 parseFile();
267
268 // If we found an include guard then all preprocessor directives (other than
269 // the guard) are over-indented by one.
270 if (IncludeGuard == IG_Found)
271 for (auto &Line : Lines)
272 if (Line.InPPDirective && Line.Level > 0)
273 --Line.Level;
274
275 // Create line with eof token.
276 pushToken(FormatTok);
277 addUnwrappedLine();
278
279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280 E = Lines.end();
281 I != E; ++I) {
282 Callback.consumeUnwrappedLine(*I);
283 }
284 Callback.finishRun();
285 Lines.clear();
286 while (!PPLevelBranchIndex.empty() &&
287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290 }
291 if (!PPLevelBranchIndex.empty()) {
292 ++PPLevelBranchIndex.back();
293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295 }
296 } while (!PPLevelBranchIndex.empty());
297 }
298
parseFile()299 void UnwrappedLineParser::parseFile() {
300 // The top-level context in a file always has declarations, except for pre-
301 // processor directives and JavaScript files.
302 bool MustBeDeclaration =
303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305 MustBeDeclaration);
306 if (Style.Language == FormatStyle::LK_TextProto)
307 parseBracedList();
308 else
309 parseLevel(/*HasOpeningBrace=*/false);
310 // Make sure to format the remaining tokens.
311 //
312 // LK_TextProto is special since its top-level is parsed as the body of a
313 // braced list, which does not necessarily have natural line separators such
314 // as a semicolon. Comments after the last entry that have been determined to
315 // not belong to that line, as in:
316 // key: value
317 // // endfile comment
318 // do not have a chance to be put on a line of their own until this point.
319 // Here we add this newline before end-of-file comments.
320 if (Style.Language == FormatStyle::LK_TextProto &&
321 !CommentsBeforeNextToken.empty())
322 addUnwrappedLine();
323 flushComments(true);
324 addUnwrappedLine();
325 }
326
parseCSharpGenericTypeConstraint()327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328 do {
329 switch (FormatTok->Tok.getKind()) {
330 case tok::l_brace:
331 return;
332 default:
333 if (FormatTok->is(Keywords.kw_where)) {
334 addUnwrappedLine();
335 nextToken();
336 parseCSharpGenericTypeConstraint();
337 break;
338 }
339 nextToken();
340 break;
341 }
342 } while (!eof());
343 }
344
parseCSharpAttribute()345 void UnwrappedLineParser::parseCSharpAttribute() {
346 int UnpairedSquareBrackets = 1;
347 do {
348 switch (FormatTok->Tok.getKind()) {
349 case tok::r_square:
350 nextToken();
351 --UnpairedSquareBrackets;
352 if (UnpairedSquareBrackets == 0) {
353 addUnwrappedLine();
354 return;
355 }
356 break;
357 case tok::l_square:
358 ++UnpairedSquareBrackets;
359 nextToken();
360 break;
361 default:
362 nextToken();
363 break;
364 }
365 } while (!eof());
366 }
367
parseLevel(bool HasOpeningBrace)368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369 bool SwitchLabelEncountered = false;
370 do {
371 tok::TokenKind kind = FormatTok->Tok.getKind();
372 if (FormatTok->getType() == TT_MacroBlockBegin) {
373 kind = tok::l_brace;
374 } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375 kind = tok::r_brace;
376 }
377
378 switch (kind) {
379 case tok::comment:
380 nextToken();
381 addUnwrappedLine();
382 break;
383 case tok::l_brace:
384 // FIXME: Add parameter whether this can happen - if this happens, we must
385 // be in a non-declaration context.
386 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387 continue;
388 parseBlock(/*MustBeDeclaration=*/false);
389 addUnwrappedLine();
390 break;
391 case tok::r_brace:
392 if (HasOpeningBrace)
393 return;
394 nextToken();
395 addUnwrappedLine();
396 break;
397 case tok::kw_default: {
398 unsigned StoredPosition = Tokens->getPosition();
399 FormatToken *Next;
400 do {
401 Next = Tokens->getNextToken();
402 } while (Next && Next->is(tok::comment));
403 FormatTok = Tokens->setPosition(StoredPosition);
404 if (Next && Next->isNot(tok::colon)) {
405 // default not followed by ':' is not a case label; treat it like
406 // an identifier.
407 parseStructuralElement();
408 break;
409 }
410 // Else, if it is 'default:', fall through to the case handling.
411 LLVM_FALLTHROUGH;
412 }
413 case tok::kw_case:
414 if (Style.Language == FormatStyle::LK_JavaScript &&
415 Line->MustBeDeclaration) {
416 // A 'case: string' style field declaration.
417 parseStructuralElement();
418 break;
419 }
420 if (!SwitchLabelEncountered &&
421 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422 ++Line->Level;
423 SwitchLabelEncountered = true;
424 parseStructuralElement();
425 break;
426 case tok::l_square:
427 if (Style.isCSharp()) {
428 nextToken();
429 parseCSharpAttribute();
430 break;
431 }
432 LLVM_FALLTHROUGH;
433 default:
434 parseStructuralElement(/*IsTopLevel=*/true);
435 break;
436 }
437 } while (!eof());
438 }
439
calculateBraceTypes(bool ExpectClassBody)440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441 // We'll parse forward through the tokens until we hit
442 // a closing brace or eof - note that getNextToken() will
443 // parse macros, so this will magically work inside macro
444 // definitions, too.
445 unsigned StoredPosition = Tokens->getPosition();
446 FormatToken *Tok = FormatTok;
447 const FormatToken *PrevTok = Tok->Previous;
448 // Keep a stack of positions of lbrace tokens. We will
449 // update information about whether an lbrace starts a
450 // braced init list or a different block during the loop.
451 SmallVector<FormatToken *, 8> LBraceStack;
452 assert(Tok->Tok.is(tok::l_brace));
453 do {
454 // Get next non-comment token.
455 FormatToken *NextTok;
456 unsigned ReadTokens = 0;
457 do {
458 NextTok = Tokens->getNextToken();
459 ++ReadTokens;
460 } while (NextTok->is(tok::comment));
461
462 switch (Tok->Tok.getKind()) {
463 case tok::l_brace:
464 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465 if (PrevTok->isOneOf(tok::colon, tok::less))
466 // A ':' indicates this code is in a type, or a braced list
467 // following a label in an object literal ({a: {b: 1}}).
468 // A '<' could be an object used in a comparison, but that is nonsense
469 // code (can never return true), so more likely it is a generic type
470 // argument (`X<{a: string; b: number}>`).
471 // The code below could be confused by semicolons between the
472 // individual members in a type member list, which would normally
473 // trigger BK_Block. In both cases, this must be parsed as an inline
474 // braced init.
475 Tok->setBlockKind(BK_BracedInit);
476 else if (PrevTok->is(tok::r_paren))
477 // `) { }` can only occur in function or method declarations in JS.
478 Tok->setBlockKind(BK_Block);
479 } else {
480 Tok->setBlockKind(BK_Unknown);
481 }
482 LBraceStack.push_back(Tok);
483 break;
484 case tok::r_brace:
485 if (LBraceStack.empty())
486 break;
487 if (LBraceStack.back()->is(BK_Unknown)) {
488 bool ProbablyBracedList = false;
489 if (Style.Language == FormatStyle::LK_Proto) {
490 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491 } else {
492 // Using OriginalColumn to distinguish between ObjC methods and
493 // binary operators is a bit hacky.
494 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495 NextTok->OriginalColumn == 0;
496
497 // If there is a comma, semicolon or right paren after the closing
498 // brace, we assume this is a braced initializer list. Note that
499 // regardless how we mark inner braces here, we will overwrite the
500 // BlockKind later if we parse a braced list (where all blocks
501 // inside are by default braced lists), or when we explicitly detect
502 // blocks (for example while parsing lambdas).
503 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504 // braced list in JS.
505 ProbablyBracedList =
506 (Style.Language == FormatStyle::LK_JavaScript &&
507 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508 Keywords.kw_as)) ||
509 (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511 tok::r_paren, tok::r_square, tok::l_brace,
512 tok::ellipsis) ||
513 (NextTok->is(tok::identifier) &&
514 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515 (NextTok->is(tok::semi) &&
516 (!ExpectClassBody || LBraceStack.size() != 1)) ||
517 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519 // We can have an array subscript after a braced init
520 // list, but C++11 attributes are expected after blocks.
521 NextTok = Tokens->getNextToken();
522 ++ReadTokens;
523 ProbablyBracedList = NextTok->isNot(tok::l_square);
524 }
525 }
526 if (ProbablyBracedList) {
527 Tok->setBlockKind(BK_BracedInit);
528 LBraceStack.back()->setBlockKind(BK_BracedInit);
529 } else {
530 Tok->setBlockKind(BK_Block);
531 LBraceStack.back()->setBlockKind(BK_Block);
532 }
533 }
534 LBraceStack.pop_back();
535 break;
536 case tok::identifier:
537 if (!Tok->is(TT_StatementMacro))
538 break;
539 LLVM_FALLTHROUGH;
540 case tok::at:
541 case tok::semi:
542 case tok::kw_if:
543 case tok::kw_while:
544 case tok::kw_for:
545 case tok::kw_switch:
546 case tok::kw_try:
547 case tok::kw___try:
548 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549 LBraceStack.back()->setBlockKind(BK_Block);
550 break;
551 default:
552 break;
553 }
554 PrevTok = Tok;
555 Tok = NextTok;
556 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557
558 // Assume other blocks for all unclosed opening braces.
559 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560 if (LBraceStack[i]->is(BK_Unknown))
561 LBraceStack[i]->setBlockKind(BK_Block);
562 }
563
564 FormatTok = Tokens->setPosition(StoredPosition);
565 }
566
567 template <class T>
hash_combine(std::size_t & seed,const T & v)568 static inline void hash_combine(std::size_t &seed, const T &v) {
569 std::hash<T> hasher;
570 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572
computePPHash() const573 size_t UnwrappedLineParser::computePPHash() const {
574 size_t h = 0;
575 for (const auto &i : PPStack) {
576 hash_combine(h, size_t(i.Kind));
577 hash_combine(h, i.Line);
578 }
579 return h;
580 }
581
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool UnindentWhitesmithsBraces)582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
583 bool MunchSemi,
584 bool UnindentWhitesmithsBraces) {
585 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
586 "'{' or macro block token expected");
587 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
588 FormatTok->setBlockKind(BK_Block);
589
590 // For Whitesmiths mode, jump to the next level prior to skipping over the
591 // braces.
592 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
593 ++Line->Level;
594
595 size_t PPStartHash = computePPHash();
596
597 unsigned InitialLevel = Line->Level;
598 nextToken(/*LevelDifference=*/AddLevels);
599
600 if (MacroBlock && FormatTok->is(tok::l_paren))
601 parseParens();
602
603 size_t NbPreprocessorDirectives =
604 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
605 addUnwrappedLine();
606 size_t OpeningLineIndex =
607 CurrentLines->empty()
608 ? (UnwrappedLine::kInvalidIndex)
609 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
610
611 // Whitesmiths is weird here. The brace needs to be indented for the namespace
612 // block, but the block itself may not be indented depending on the style
613 // settings. This allows the format to back up one level in those cases.
614 if (UnindentWhitesmithsBraces)
615 --Line->Level;
616
617 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
618 MustBeDeclaration);
619 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
620 Line->Level += AddLevels;
621 parseLevel(/*HasOpeningBrace=*/true);
622
623 if (eof())
624 return;
625
626 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
627 : !FormatTok->is(tok::r_brace)) {
628 Line->Level = InitialLevel;
629 FormatTok->setBlockKind(BK_Block);
630 return;
631 }
632
633 size_t PPEndHash = computePPHash();
634
635 // Munch the closing brace.
636 nextToken(/*LevelDifference=*/-AddLevels);
637
638 if (MacroBlock && FormatTok->is(tok::l_paren))
639 parseParens();
640
641 if (FormatTok->is(tok::arrow)) {
642 // Following the } we can find a trailing return type arrow
643 // as part of an implicit conversion constraint.
644 nextToken();
645 parseStructuralElement();
646 }
647
648 if (MunchSemi && FormatTok->Tok.is(tok::semi))
649 nextToken();
650
651 Line->Level = InitialLevel;
652
653 if (PPStartHash == PPEndHash) {
654 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
655 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
656 // Update the opening line to add the forward reference as well
657 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
658 CurrentLines->size() - 1;
659 }
660 }
661 }
662
isGoogScope(const UnwrappedLine & Line)663 static bool isGoogScope(const UnwrappedLine &Line) {
664 // FIXME: Closure-library specific stuff should not be hard-coded but be
665 // configurable.
666 if (Line.Tokens.size() < 4)
667 return false;
668 auto I = Line.Tokens.begin();
669 if (I->Tok->TokenText != "goog")
670 return false;
671 ++I;
672 if (I->Tok->isNot(tok::period))
673 return false;
674 ++I;
675 if (I->Tok->TokenText != "scope")
676 return false;
677 ++I;
678 return I->Tok->is(tok::l_paren);
679 }
680
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)681 static bool isIIFE(const UnwrappedLine &Line,
682 const AdditionalKeywords &Keywords) {
683 // Look for the start of an immediately invoked anonymous function.
684 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
685 // This is commonly done in JavaScript to create a new, anonymous scope.
686 // Example: (function() { ... })()
687 if (Line.Tokens.size() < 3)
688 return false;
689 auto I = Line.Tokens.begin();
690 if (I->Tok->isNot(tok::l_paren))
691 return false;
692 ++I;
693 if (I->Tok->isNot(Keywords.kw_function))
694 return false;
695 ++I;
696 return I->Tok->is(tok::l_paren);
697 }
698
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)699 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
700 const FormatToken &InitialToken) {
701 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
702 return Style.BraceWrapping.AfterNamespace;
703 if (InitialToken.is(tok::kw_class))
704 return Style.BraceWrapping.AfterClass;
705 if (InitialToken.is(tok::kw_union))
706 return Style.BraceWrapping.AfterUnion;
707 if (InitialToken.is(tok::kw_struct))
708 return Style.BraceWrapping.AfterStruct;
709 return false;
710 }
711
parseChildBlock()712 void UnwrappedLineParser::parseChildBlock() {
713 FormatTok->setBlockKind(BK_Block);
714 nextToken();
715 {
716 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
717 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
718 ScopedLineState LineState(*this);
719 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
720 /*MustBeDeclaration=*/false);
721 Line->Level += SkipIndent ? 0 : 1;
722 parseLevel(/*HasOpeningBrace=*/true);
723 flushComments(isOnNewLine(*FormatTok));
724 Line->Level -= SkipIndent ? 0 : 1;
725 }
726 nextToken();
727 }
728
parsePPDirective()729 void UnwrappedLineParser::parsePPDirective() {
730 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
731 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
732
733 nextToken();
734
735 if (!FormatTok->Tok.getIdentifierInfo()) {
736 parsePPUnknown();
737 return;
738 }
739
740 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
741 case tok::pp_define:
742 parsePPDefine();
743 return;
744 case tok::pp_if:
745 parsePPIf(/*IfDef=*/false);
746 break;
747 case tok::pp_ifdef:
748 case tok::pp_ifndef:
749 parsePPIf(/*IfDef=*/true);
750 break;
751 case tok::pp_else:
752 parsePPElse();
753 break;
754 case tok::pp_elifdef:
755 case tok::pp_elifndef:
756 case tok::pp_elif:
757 parsePPElIf();
758 break;
759 case tok::pp_endif:
760 parsePPEndIf();
761 break;
762 default:
763 parsePPUnknown();
764 break;
765 }
766 }
767
conditionalCompilationCondition(bool Unreachable)768 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
769 size_t Line = CurrentLines->size();
770 if (CurrentLines == &PreprocessorDirectives)
771 Line += Lines.size();
772
773 if (Unreachable ||
774 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
775 PPStack.push_back({PP_Unreachable, Line});
776 else
777 PPStack.push_back({PP_Conditional, Line});
778 }
779
conditionalCompilationStart(bool Unreachable)780 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
781 ++PPBranchLevel;
782 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
783 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
784 PPLevelBranchIndex.push_back(0);
785 PPLevelBranchCount.push_back(0);
786 }
787 PPChainBranchIndex.push(0);
788 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
789 conditionalCompilationCondition(Unreachable || Skip);
790 }
791
conditionalCompilationAlternative()792 void UnwrappedLineParser::conditionalCompilationAlternative() {
793 if (!PPStack.empty())
794 PPStack.pop_back();
795 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
796 if (!PPChainBranchIndex.empty())
797 ++PPChainBranchIndex.top();
798 conditionalCompilationCondition(
799 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
800 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
801 }
802
conditionalCompilationEnd()803 void UnwrappedLineParser::conditionalCompilationEnd() {
804 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
805 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
806 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
807 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
808 }
809 }
810 // Guard against #endif's without #if.
811 if (PPBranchLevel > -1)
812 --PPBranchLevel;
813 if (!PPChainBranchIndex.empty())
814 PPChainBranchIndex.pop();
815 if (!PPStack.empty())
816 PPStack.pop_back();
817 }
818
parsePPIf(bool IfDef)819 void UnwrappedLineParser::parsePPIf(bool IfDef) {
820 bool IfNDef = FormatTok->is(tok::pp_ifndef);
821 nextToken();
822 bool Unreachable = false;
823 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
824 Unreachable = true;
825 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
826 Unreachable = true;
827 conditionalCompilationStart(Unreachable);
828 FormatToken *IfCondition = FormatTok;
829 // If there's a #ifndef on the first line, and the only lines before it are
830 // comments, it could be an include guard.
831 bool MaybeIncludeGuard = IfNDef;
832 if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
833 for (auto &Line : Lines) {
834 if (!Line.Tokens.front().Tok->is(tok::comment)) {
835 MaybeIncludeGuard = false;
836 IncludeGuard = IG_Rejected;
837 break;
838 }
839 }
840 --PPBranchLevel;
841 parsePPUnknown();
842 ++PPBranchLevel;
843 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
844 IncludeGuard = IG_IfNdefed;
845 IncludeGuardToken = IfCondition;
846 }
847 }
848
parsePPElse()849 void UnwrappedLineParser::parsePPElse() {
850 // If a potential include guard has an #else, it's not an include guard.
851 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
852 IncludeGuard = IG_Rejected;
853 conditionalCompilationAlternative();
854 if (PPBranchLevel > -1)
855 --PPBranchLevel;
856 parsePPUnknown();
857 ++PPBranchLevel;
858 }
859
parsePPElIf()860 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
861
parsePPEndIf()862 void UnwrappedLineParser::parsePPEndIf() {
863 conditionalCompilationEnd();
864 parsePPUnknown();
865 // If the #endif of a potential include guard is the last thing in the file,
866 // then we found an include guard.
867 unsigned TokenPosition = Tokens->getPosition();
868 FormatToken *PeekNext = AllTokens[TokenPosition];
869 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
870 PeekNext->is(tok::eof) &&
871 Style.IndentPPDirectives != FormatStyle::PPDIS_None)
872 IncludeGuard = IG_Found;
873 }
874
parsePPDefine()875 void UnwrappedLineParser::parsePPDefine() {
876 nextToken();
877
878 if (!FormatTok->Tok.getIdentifierInfo()) {
879 IncludeGuard = IG_Rejected;
880 IncludeGuardToken = nullptr;
881 parsePPUnknown();
882 return;
883 }
884
885 if (IncludeGuard == IG_IfNdefed &&
886 IncludeGuardToken->TokenText == FormatTok->TokenText) {
887 IncludeGuard = IG_Defined;
888 IncludeGuardToken = nullptr;
889 for (auto &Line : Lines) {
890 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
891 IncludeGuard = IG_Rejected;
892 break;
893 }
894 }
895 }
896
897 nextToken();
898 if (FormatTok->Tok.getKind() == tok::l_paren &&
899 FormatTok->WhitespaceRange.getBegin() ==
900 FormatTok->WhitespaceRange.getEnd()) {
901 parseParens();
902 }
903 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
904 Line->Level += PPBranchLevel + 1;
905 addUnwrappedLine();
906 ++Line->Level;
907
908 // Errors during a preprocessor directive can only affect the layout of the
909 // preprocessor directive, and thus we ignore them. An alternative approach
910 // would be to use the same approach we use on the file level (no
911 // re-indentation if there was a structural error) within the macro
912 // definition.
913 parseFile();
914 }
915
parsePPUnknown()916 void UnwrappedLineParser::parsePPUnknown() {
917 do {
918 nextToken();
919 } while (!eof());
920 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
921 Line->Level += PPBranchLevel + 1;
922 addUnwrappedLine();
923 }
924
925 // Here we exclude certain tokens that are not usually the first token in an
926 // unwrapped line. This is used in attempt to distinguish macro calls without
927 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)928 static bool tokenCanStartNewLine(const FormatToken &Tok) {
929 // Semicolon can be a null-statement, l_square can be a start of a macro or
930 // a C++11 attribute, but this doesn't seem to be common.
931 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
932 Tok.isNot(TT_AttributeSquare) &&
933 // Tokens that can only be used as binary operators and a part of
934 // overloaded operator names.
935 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
936 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
937 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
938 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
939 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
940 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
941 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
942 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
943 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
944 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
945 Tok.isNot(tok::lesslessequal) &&
946 // Colon is used in labels, base class lists, initializer lists,
947 // range-based for loops, ternary operator, but should never be the
948 // first token in an unwrapped line.
949 Tok.isNot(tok::colon) &&
950 // 'noexcept' is a trailing annotation.
951 Tok.isNot(tok::kw_noexcept);
952 }
953
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)954 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
955 const FormatToken *FormatTok) {
956 // FIXME: This returns true for C/C++ keywords like 'struct'.
957 return FormatTok->is(tok::identifier) &&
958 (FormatTok->Tok.getIdentifierInfo() == nullptr ||
959 !FormatTok->isOneOf(
960 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
961 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
962 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
963 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
964 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
965 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
966 Keywords.kw_from));
967 }
968
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)969 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
970 const FormatToken *FormatTok) {
971 return FormatTok->Tok.isLiteral() ||
972 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
973 mustBeJSIdent(Keywords, FormatTok);
974 }
975
976 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
977 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)978 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
979 const FormatToken *FormatTok) {
980 return FormatTok->isOneOf(
981 tok::kw_return, Keywords.kw_yield,
982 // conditionals
983 tok::kw_if, tok::kw_else,
984 // loops
985 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
986 // switch/case
987 tok::kw_switch, tok::kw_case,
988 // exceptions
989 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
990 // declaration
991 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
992 Keywords.kw_async, Keywords.kw_function,
993 // import/export
994 Keywords.kw_import, tok::kw_export);
995 }
996
997 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)998 static bool isC78Type(const FormatToken &Tok) {
999 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1000 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1001 tok::identifier);
1002 }
1003
1004 // This function checks whether a token starts the first parameter declaration
1005 // in a K&R C (aka C78) function definition, e.g.:
1006 // int f(a, b)
1007 // short a, b;
1008 // {
1009 // return a + b;
1010 // }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1011 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1012 const FormatToken *FuncName) {
1013 assert(Tok);
1014 assert(Next);
1015 assert(FuncName);
1016
1017 if (FuncName->isNot(tok::identifier))
1018 return false;
1019
1020 const FormatToken *Prev = FuncName->Previous;
1021 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1022 return false;
1023
1024 if (!isC78Type(*Tok) &&
1025 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1026 return false;
1027
1028 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1029 return false;
1030
1031 Tok = Tok->Previous;
1032 if (!Tok || Tok->isNot(tok::r_paren))
1033 return false;
1034
1035 Tok = Tok->Previous;
1036 if (!Tok || Tok->isNot(tok::identifier))
1037 return false;
1038
1039 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1040 }
1041
1042 // readTokenWithJavaScriptASI reads the next token and terminates the current
1043 // line if JavaScript Automatic Semicolon Insertion must
1044 // happen between the current token and the next token.
1045 //
1046 // This method is conservative - it cannot cover all edge cases of JavaScript,
1047 // but only aims to correctly handle certain well known cases. It *must not*
1048 // return true in speculative cases.
readTokenWithJavaScriptASI()1049 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1050 FormatToken *Previous = FormatTok;
1051 readToken();
1052 FormatToken *Next = FormatTok;
1053
1054 bool IsOnSameLine =
1055 CommentsBeforeNextToken.empty()
1056 ? Next->NewlinesBefore == 0
1057 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1058 if (IsOnSameLine)
1059 return;
1060
1061 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1062 bool PreviousStartsTemplateExpr =
1063 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1064 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1065 // If the line contains an '@' sign, the previous token might be an
1066 // annotation, which can precede another identifier/value.
1067 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1068 [](UnwrappedLineNode &LineNode) {
1069 return LineNode.Tok->is(tok::at);
1070 }) != Line->Tokens.end();
1071 if (HasAt)
1072 return;
1073 }
1074 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1075 return addUnwrappedLine();
1076 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1077 bool NextEndsTemplateExpr =
1078 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1079 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1080 (PreviousMustBeValue ||
1081 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1082 tok::minusminus)))
1083 return addUnwrappedLine();
1084 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1085 isJSDeclOrStmt(Keywords, Next))
1086 return addUnwrappedLine();
1087 }
1088
parseStructuralElement(bool IsTopLevel)1089 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1090 assert(!FormatTok->is(tok::l_brace));
1091 if (Style.Language == FormatStyle::LK_TableGen &&
1092 FormatTok->is(tok::pp_include)) {
1093 nextToken();
1094 if (FormatTok->is(tok::string_literal))
1095 nextToken();
1096 addUnwrappedLine();
1097 return;
1098 }
1099 switch (FormatTok->Tok.getKind()) {
1100 case tok::kw_asm:
1101 nextToken();
1102 if (FormatTok->is(tok::l_brace)) {
1103 FormatTok->setType(TT_InlineASMBrace);
1104 nextToken();
1105 while (FormatTok && FormatTok->isNot(tok::eof)) {
1106 if (FormatTok->is(tok::r_brace)) {
1107 FormatTok->setType(TT_InlineASMBrace);
1108 nextToken();
1109 addUnwrappedLine();
1110 break;
1111 }
1112 FormatTok->Finalized = true;
1113 nextToken();
1114 }
1115 }
1116 break;
1117 case tok::kw_namespace:
1118 parseNamespace();
1119 return;
1120 case tok::kw_public:
1121 case tok::kw_protected:
1122 case tok::kw_private:
1123 if (Style.Language == FormatStyle::LK_Java ||
1124 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1125 nextToken();
1126 else
1127 parseAccessSpecifier();
1128 return;
1129 case tok::kw_if:
1130 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1131 // field/method declaration.
1132 break;
1133 parseIfThenElse();
1134 return;
1135 case tok::kw_for:
1136 case tok::kw_while:
1137 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1138 // field/method declaration.
1139 break;
1140 parseForOrWhileLoop();
1141 return;
1142 case tok::kw_do:
1143 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1144 // field/method declaration.
1145 break;
1146 parseDoWhile();
1147 return;
1148 case tok::kw_switch:
1149 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1150 // 'switch: string' field declaration.
1151 break;
1152 parseSwitch();
1153 return;
1154 case tok::kw_default:
1155 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1156 // 'default: string' field declaration.
1157 break;
1158 nextToken();
1159 if (FormatTok->is(tok::colon)) {
1160 parseLabel();
1161 return;
1162 }
1163 // e.g. "default void f() {}" in a Java interface.
1164 break;
1165 case tok::kw_case:
1166 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1167 // 'case: string' field declaration.
1168 break;
1169 parseCaseLabel();
1170 return;
1171 case tok::kw_try:
1172 case tok::kw___try:
1173 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1174 // field/method declaration.
1175 break;
1176 parseTryCatch();
1177 return;
1178 case tok::kw_extern:
1179 nextToken();
1180 if (FormatTok->Tok.is(tok::string_literal)) {
1181 nextToken();
1182 if (FormatTok->Tok.is(tok::l_brace)) {
1183 if (!Style.IndentExternBlock) {
1184 if (Style.BraceWrapping.AfterExternBlock) {
1185 addUnwrappedLine();
1186 }
1187 unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1188 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1189 } else {
1190 unsigned AddLevels =
1191 Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1192 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1193 }
1194 addUnwrappedLine();
1195 return;
1196 }
1197 }
1198 break;
1199 case tok::kw_export:
1200 if (Style.Language == FormatStyle::LK_JavaScript) {
1201 parseJavaScriptEs6ImportExport();
1202 return;
1203 }
1204 if (!Style.isCpp())
1205 break;
1206 // Handle C++ "(inline|export) namespace".
1207 LLVM_FALLTHROUGH;
1208 case tok::kw_inline:
1209 nextToken();
1210 if (FormatTok->Tok.is(tok::kw_namespace)) {
1211 parseNamespace();
1212 return;
1213 }
1214 break;
1215 case tok::identifier:
1216 if (FormatTok->is(TT_ForEachMacro)) {
1217 parseForOrWhileLoop();
1218 return;
1219 }
1220 if (FormatTok->is(TT_MacroBlockBegin)) {
1221 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1222 /*MunchSemi=*/false);
1223 return;
1224 }
1225 if (FormatTok->is(Keywords.kw_import)) {
1226 if (Style.Language == FormatStyle::LK_JavaScript) {
1227 parseJavaScriptEs6ImportExport();
1228 return;
1229 }
1230 if (Style.Language == FormatStyle::LK_Proto) {
1231 nextToken();
1232 if (FormatTok->is(tok::kw_public))
1233 nextToken();
1234 if (!FormatTok->is(tok::string_literal))
1235 return;
1236 nextToken();
1237 if (FormatTok->is(tok::semi))
1238 nextToken();
1239 addUnwrappedLine();
1240 return;
1241 }
1242 }
1243 if (Style.isCpp() &&
1244 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1245 Keywords.kw_slots, Keywords.kw_qslots)) {
1246 nextToken();
1247 if (FormatTok->is(tok::colon)) {
1248 nextToken();
1249 addUnwrappedLine();
1250 return;
1251 }
1252 }
1253 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1254 parseStatementMacro();
1255 return;
1256 }
1257 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1258 parseNamespace();
1259 return;
1260 }
1261 // In all other cases, parse the declaration.
1262 break;
1263 default:
1264 break;
1265 }
1266 do {
1267 const FormatToken *Previous = FormatTok->Previous;
1268 switch (FormatTok->Tok.getKind()) {
1269 case tok::at:
1270 nextToken();
1271 if (FormatTok->Tok.is(tok::l_brace)) {
1272 nextToken();
1273 parseBracedList();
1274 break;
1275 } else if (Style.Language == FormatStyle::LK_Java &&
1276 FormatTok->is(Keywords.kw_interface)) {
1277 nextToken();
1278 break;
1279 }
1280 switch (FormatTok->Tok.getObjCKeywordID()) {
1281 case tok::objc_public:
1282 case tok::objc_protected:
1283 case tok::objc_package:
1284 case tok::objc_private:
1285 return parseAccessSpecifier();
1286 case tok::objc_interface:
1287 case tok::objc_implementation:
1288 return parseObjCInterfaceOrImplementation();
1289 case tok::objc_protocol:
1290 if (parseObjCProtocol())
1291 return;
1292 break;
1293 case tok::objc_end:
1294 return; // Handled by the caller.
1295 case tok::objc_optional:
1296 case tok::objc_required:
1297 nextToken();
1298 addUnwrappedLine();
1299 return;
1300 case tok::objc_autoreleasepool:
1301 nextToken();
1302 if (FormatTok->Tok.is(tok::l_brace)) {
1303 if (Style.BraceWrapping.AfterControlStatement ==
1304 FormatStyle::BWACS_Always)
1305 addUnwrappedLine();
1306 parseBlock(/*MustBeDeclaration=*/false);
1307 }
1308 addUnwrappedLine();
1309 return;
1310 case tok::objc_synchronized:
1311 nextToken();
1312 if (FormatTok->Tok.is(tok::l_paren))
1313 // Skip synchronization object
1314 parseParens();
1315 if (FormatTok->Tok.is(tok::l_brace)) {
1316 if (Style.BraceWrapping.AfterControlStatement ==
1317 FormatStyle::BWACS_Always)
1318 addUnwrappedLine();
1319 parseBlock(/*MustBeDeclaration=*/false);
1320 }
1321 addUnwrappedLine();
1322 return;
1323 case tok::objc_try:
1324 // This branch isn't strictly necessary (the kw_try case below would
1325 // do this too after the tok::at is parsed above). But be explicit.
1326 parseTryCatch();
1327 return;
1328 default:
1329 break;
1330 }
1331 break;
1332 case tok::kw_concept:
1333 parseConcept();
1334 break;
1335 case tok::kw_requires:
1336 parseRequires();
1337 break;
1338 case tok::kw_enum:
1339 // Ignore if this is part of "template <enum ...".
1340 if (Previous && Previous->is(tok::less)) {
1341 nextToken();
1342 break;
1343 }
1344
1345 // parseEnum falls through and does not yet add an unwrapped line as an
1346 // enum definition can start a structural element.
1347 if (!parseEnum())
1348 break;
1349 // This only applies for C++.
1350 if (!Style.isCpp()) {
1351 addUnwrappedLine();
1352 return;
1353 }
1354 break;
1355 case tok::kw_typedef:
1356 nextToken();
1357 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1358 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1359 Keywords.kw_CF_CLOSED_ENUM,
1360 Keywords.kw_NS_CLOSED_ENUM))
1361 parseEnum();
1362 break;
1363 case tok::kw_struct:
1364 case tok::kw_union:
1365 case tok::kw_class:
1366 if (parseStructLike()) {
1367 return;
1368 }
1369 break;
1370 case tok::period:
1371 nextToken();
1372 // In Java, classes have an implicit static member "class".
1373 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1374 FormatTok->is(tok::kw_class))
1375 nextToken();
1376 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1377 FormatTok->Tok.getIdentifierInfo())
1378 // JavaScript only has pseudo keywords, all keywords are allowed to
1379 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1380 nextToken();
1381 break;
1382 case tok::semi:
1383 nextToken();
1384 addUnwrappedLine();
1385 return;
1386 case tok::r_brace:
1387 addUnwrappedLine();
1388 return;
1389 case tok::l_paren: {
1390 parseParens();
1391 // Break the unwrapped line if a K&R C function definition has a parameter
1392 // declaration.
1393 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1394 break;
1395 const unsigned Position = Tokens->getPosition() + 1;
1396 assert(Position < AllTokens.size());
1397 if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
1398 addUnwrappedLine();
1399 return;
1400 }
1401 break;
1402 }
1403 case tok::kw_operator:
1404 nextToken();
1405 if (FormatTok->isBinaryOperator())
1406 nextToken();
1407 break;
1408 case tok::caret:
1409 nextToken();
1410 if (FormatTok->Tok.isAnyIdentifier() ||
1411 FormatTok->isSimpleTypeSpecifier())
1412 nextToken();
1413 if (FormatTok->is(tok::l_paren))
1414 parseParens();
1415 if (FormatTok->is(tok::l_brace))
1416 parseChildBlock();
1417 break;
1418 case tok::l_brace:
1419 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1420 // A block outside of parentheses must be the last part of a
1421 // structural element.
1422 // FIXME: Figure out cases where this is not true, and add projections
1423 // for them (the one we know is missing are lambdas).
1424 if (Style.BraceWrapping.AfterFunction)
1425 addUnwrappedLine();
1426 FormatTok->setType(TT_FunctionLBrace);
1427 parseBlock(/*MustBeDeclaration=*/false);
1428 addUnwrappedLine();
1429 return;
1430 }
1431 // Otherwise this was a braced init list, and the structural
1432 // element continues.
1433 break;
1434 case tok::kw_try:
1435 if (Style.Language == FormatStyle::LK_JavaScript &&
1436 Line->MustBeDeclaration) {
1437 // field/method declaration.
1438 nextToken();
1439 break;
1440 }
1441 // We arrive here when parsing function-try blocks.
1442 if (Style.BraceWrapping.AfterFunction)
1443 addUnwrappedLine();
1444 parseTryCatch();
1445 return;
1446 case tok::identifier: {
1447 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1448 Line->MustBeDeclaration) {
1449 addUnwrappedLine();
1450 parseCSharpGenericTypeConstraint();
1451 break;
1452 }
1453 if (FormatTok->is(TT_MacroBlockEnd)) {
1454 addUnwrappedLine();
1455 return;
1456 }
1457
1458 // Function declarations (as opposed to function expressions) are parsed
1459 // on their own unwrapped line by continuing this loop. Function
1460 // expressions (functions that are not on their own line) must not create
1461 // a new unwrapped line, so they are special cased below.
1462 size_t TokenCount = Line->Tokens.size();
1463 if (Style.Language == FormatStyle::LK_JavaScript &&
1464 FormatTok->is(Keywords.kw_function) &&
1465 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1466 Keywords.kw_async)))) {
1467 tryToParseJSFunction();
1468 break;
1469 }
1470 if ((Style.Language == FormatStyle::LK_JavaScript ||
1471 Style.Language == FormatStyle::LK_Java) &&
1472 FormatTok->is(Keywords.kw_interface)) {
1473 if (Style.Language == FormatStyle::LK_JavaScript) {
1474 // In JavaScript/TypeScript, "interface" can be used as a standalone
1475 // identifier, e.g. in `var interface = 1;`. If "interface" is
1476 // followed by another identifier, it is very like to be an actual
1477 // interface declaration.
1478 unsigned StoredPosition = Tokens->getPosition();
1479 FormatToken *Next = Tokens->getNextToken();
1480 FormatTok = Tokens->setPosition(StoredPosition);
1481 if (Next && !mustBeJSIdent(Keywords, Next)) {
1482 nextToken();
1483 break;
1484 }
1485 }
1486 parseRecord();
1487 addUnwrappedLine();
1488 return;
1489 }
1490
1491 if (FormatTok->is(Keywords.kw_interface)) {
1492 if (parseStructLike()) {
1493 return;
1494 }
1495 break;
1496 }
1497
1498 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1499 parseStatementMacro();
1500 return;
1501 }
1502
1503 // See if the following token should start a new unwrapped line.
1504 StringRef Text = FormatTok->TokenText;
1505 nextToken();
1506
1507 // JS doesn't have macros, and within classes colons indicate fields, not
1508 // labels.
1509 if (Style.Language == FormatStyle::LK_JavaScript)
1510 break;
1511
1512 TokenCount = Line->Tokens.size();
1513 if (TokenCount == 1 ||
1514 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1515 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1516 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1517 parseLabel(!Style.IndentGotoLabels);
1518 return;
1519 }
1520 // Recognize function-like macro usages without trailing semicolon as
1521 // well as free-standing macros like Q_OBJECT.
1522 bool FunctionLike = FormatTok->is(tok::l_paren);
1523 if (FunctionLike)
1524 parseParens();
1525
1526 bool FollowedByNewline =
1527 CommentsBeforeNextToken.empty()
1528 ? FormatTok->NewlinesBefore > 0
1529 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1530
1531 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1532 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1533 addUnwrappedLine();
1534 return;
1535 }
1536 }
1537 break;
1538 }
1539 case tok::equal:
1540 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1541 // TT_FatArrow. They always start an expression or a child block if
1542 // followed by a curly brace.
1543 if (FormatTok->is(TT_FatArrow)) {
1544 nextToken();
1545 if (FormatTok->is(tok::l_brace)) {
1546 // C# may break after => if the next character is a newline.
1547 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1548 // calling `addUnwrappedLine()` here causes odd parsing errors.
1549 FormatTok->MustBreakBefore = true;
1550 }
1551 parseChildBlock();
1552 }
1553 break;
1554 }
1555
1556 nextToken();
1557 if (FormatTok->Tok.is(tok::l_brace)) {
1558 // Block kind should probably be set to BK_BracedInit for any language.
1559 // C# needs this change to ensure that array initialisers and object
1560 // initialisers are indented the same way.
1561 if (Style.isCSharp())
1562 FormatTok->setBlockKind(BK_BracedInit);
1563 nextToken();
1564 parseBracedList();
1565 } else if (Style.Language == FormatStyle::LK_Proto &&
1566 FormatTok->Tok.is(tok::less)) {
1567 nextToken();
1568 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1569 /*ClosingBraceKind=*/tok::greater);
1570 }
1571 break;
1572 case tok::l_square:
1573 parseSquare();
1574 break;
1575 case tok::kw_new:
1576 parseNew();
1577 break;
1578 default:
1579 nextToken();
1580 break;
1581 }
1582 } while (!eof());
1583 }
1584
tryToParsePropertyAccessor()1585 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1586 assert(FormatTok->is(tok::l_brace));
1587 if (!Style.isCSharp())
1588 return false;
1589 // See if it's a property accessor.
1590 if (FormatTok->Previous->isNot(tok::identifier))
1591 return false;
1592
1593 // See if we are inside a property accessor.
1594 //
1595 // Record the current tokenPosition so that we can advance and
1596 // reset the current token. `Next` is not set yet so we need
1597 // another way to advance along the token stream.
1598 unsigned int StoredPosition = Tokens->getPosition();
1599 FormatToken *Tok = Tokens->getNextToken();
1600
1601 // A trivial property accessor is of the form:
1602 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1603 // Track these as they do not require line breaks to be introduced.
1604 bool HasGetOrSet = false;
1605 bool IsTrivialPropertyAccessor = true;
1606 while (!eof()) {
1607 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1608 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1609 Keywords.kw_set)) {
1610 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1611 HasGetOrSet = true;
1612 Tok = Tokens->getNextToken();
1613 continue;
1614 }
1615 if (Tok->isNot(tok::r_brace))
1616 IsTrivialPropertyAccessor = false;
1617 break;
1618 }
1619
1620 if (!HasGetOrSet) {
1621 Tokens->setPosition(StoredPosition);
1622 return false;
1623 }
1624
1625 // Try to parse the property accessor:
1626 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1627 Tokens->setPosition(StoredPosition);
1628 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1629 addUnwrappedLine();
1630 nextToken();
1631 do {
1632 switch (FormatTok->Tok.getKind()) {
1633 case tok::r_brace:
1634 nextToken();
1635 if (FormatTok->is(tok::equal)) {
1636 while (!eof() && FormatTok->isNot(tok::semi))
1637 nextToken();
1638 nextToken();
1639 }
1640 addUnwrappedLine();
1641 return true;
1642 case tok::l_brace:
1643 ++Line->Level;
1644 parseBlock(/*MustBeDeclaration=*/true);
1645 addUnwrappedLine();
1646 --Line->Level;
1647 break;
1648 case tok::equal:
1649 if (FormatTok->is(TT_FatArrow)) {
1650 ++Line->Level;
1651 do {
1652 nextToken();
1653 } while (!eof() && FormatTok->isNot(tok::semi));
1654 nextToken();
1655 addUnwrappedLine();
1656 --Line->Level;
1657 break;
1658 }
1659 nextToken();
1660 break;
1661 default:
1662 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1663 !IsTrivialPropertyAccessor) {
1664 // Non-trivial get/set needs to be on its own line.
1665 addUnwrappedLine();
1666 }
1667 nextToken();
1668 }
1669 } while (!eof());
1670
1671 // Unreachable for well-formed code (paired '{' and '}').
1672 return true;
1673 }
1674
tryToParseLambda()1675 bool UnwrappedLineParser::tryToParseLambda() {
1676 if (!Style.isCpp()) {
1677 nextToken();
1678 return false;
1679 }
1680 assert(FormatTok->is(tok::l_square));
1681 FormatToken &LSquare = *FormatTok;
1682 if (!tryToParseLambdaIntroducer())
1683 return false;
1684
1685 bool SeenArrow = false;
1686
1687 while (FormatTok->isNot(tok::l_brace)) {
1688 if (FormatTok->isSimpleTypeSpecifier()) {
1689 nextToken();
1690 continue;
1691 }
1692 switch (FormatTok->Tok.getKind()) {
1693 case tok::l_brace:
1694 break;
1695 case tok::l_paren:
1696 parseParens();
1697 break;
1698 case tok::amp:
1699 case tok::star:
1700 case tok::kw_const:
1701 case tok::comma:
1702 case tok::less:
1703 case tok::greater:
1704 case tok::identifier:
1705 case tok::numeric_constant:
1706 case tok::coloncolon:
1707 case tok::kw_class:
1708 case tok::kw_mutable:
1709 case tok::kw_noexcept:
1710 case tok::kw_template:
1711 case tok::kw_typename:
1712 nextToken();
1713 break;
1714 // Specialization of a template with an integer parameter can contain
1715 // arithmetic, logical, comparison and ternary operators.
1716 //
1717 // FIXME: This also accepts sequences of operators that are not in the scope
1718 // of a template argument list.
1719 //
1720 // In a C++ lambda a template type can only occur after an arrow. We use
1721 // this as an heuristic to distinguish between Objective-C expressions
1722 // followed by an `a->b` expression, such as:
1723 // ([obj func:arg] + a->b)
1724 // Otherwise the code below would parse as a lambda.
1725 //
1726 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1727 // explicit template lists: []<bool b = true && false>(U &&u){}
1728 case tok::plus:
1729 case tok::minus:
1730 case tok::exclaim:
1731 case tok::tilde:
1732 case tok::slash:
1733 case tok::percent:
1734 case tok::lessless:
1735 case tok::pipe:
1736 case tok::pipepipe:
1737 case tok::ampamp:
1738 case tok::caret:
1739 case tok::equalequal:
1740 case tok::exclaimequal:
1741 case tok::greaterequal:
1742 case tok::lessequal:
1743 case tok::question:
1744 case tok::colon:
1745 case tok::ellipsis:
1746 case tok::kw_true:
1747 case tok::kw_false:
1748 if (SeenArrow) {
1749 nextToken();
1750 break;
1751 }
1752 return true;
1753 case tok::arrow:
1754 // This might or might not actually be a lambda arrow (this could be an
1755 // ObjC method invocation followed by a dereferencing arrow). We might
1756 // reset this back to TT_Unknown in TokenAnnotator.
1757 FormatTok->setType(TT_LambdaArrow);
1758 SeenArrow = true;
1759 nextToken();
1760 break;
1761 default:
1762 return true;
1763 }
1764 }
1765 FormatTok->setType(TT_LambdaLBrace);
1766 LSquare.setType(TT_LambdaLSquare);
1767 parseChildBlock();
1768 return true;
1769 }
1770
tryToParseLambdaIntroducer()1771 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1772 const FormatToken *Previous = FormatTok->Previous;
1773 if (Previous &&
1774 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1775 tok::kw_delete, tok::l_square) ||
1776 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1777 Previous->isSimpleTypeSpecifier())) {
1778 nextToken();
1779 return false;
1780 }
1781 nextToken();
1782 if (FormatTok->is(tok::l_square)) {
1783 return false;
1784 }
1785 parseSquare(/*LambdaIntroducer=*/true);
1786 return true;
1787 }
1788
tryToParseJSFunction()1789 void UnwrappedLineParser::tryToParseJSFunction() {
1790 assert(FormatTok->is(Keywords.kw_function) ||
1791 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1792 if (FormatTok->is(Keywords.kw_async))
1793 nextToken();
1794 // Consume "function".
1795 nextToken();
1796
1797 // Consume * (generator function). Treat it like C++'s overloaded operators.
1798 if (FormatTok->is(tok::star)) {
1799 FormatTok->setType(TT_OverloadedOperator);
1800 nextToken();
1801 }
1802
1803 // Consume function name.
1804 if (FormatTok->is(tok::identifier))
1805 nextToken();
1806
1807 if (FormatTok->isNot(tok::l_paren))
1808 return;
1809
1810 // Parse formal parameter list.
1811 parseParens();
1812
1813 if (FormatTok->is(tok::colon)) {
1814 // Parse a type definition.
1815 nextToken();
1816
1817 // Eat the type declaration. For braced inline object types, balance braces,
1818 // otherwise just parse until finding an l_brace for the function body.
1819 if (FormatTok->is(tok::l_brace))
1820 tryToParseBracedList();
1821 else
1822 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1823 nextToken();
1824 }
1825
1826 if (FormatTok->is(tok::semi))
1827 return;
1828
1829 parseChildBlock();
1830 }
1831
tryToParseBracedList()1832 bool UnwrappedLineParser::tryToParseBracedList() {
1833 if (FormatTok->is(BK_Unknown))
1834 calculateBraceTypes();
1835 assert(FormatTok->isNot(BK_Unknown));
1836 if (FormatTok->is(BK_Block))
1837 return false;
1838 nextToken();
1839 parseBracedList();
1840 return true;
1841 }
1842
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)1843 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1844 bool IsEnum,
1845 tok::TokenKind ClosingBraceKind) {
1846 bool HasError = false;
1847
1848 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1849 // replace this by using parseAssignmentExpression() inside.
1850 do {
1851 if (Style.isCSharp()) {
1852 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1853 // TT_FatArrow. They always start an expression or a child block if
1854 // followed by a curly brace.
1855 if (FormatTok->is(TT_FatArrow)) {
1856 nextToken();
1857 if (FormatTok->is(tok::l_brace)) {
1858 // C# may break after => if the next character is a newline.
1859 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1860 // calling `addUnwrappedLine()` here causes odd parsing errors.
1861 FormatTok->MustBreakBefore = true;
1862 }
1863 parseChildBlock();
1864 continue;
1865 }
1866 }
1867 }
1868 if (Style.Language == FormatStyle::LK_JavaScript) {
1869 if (FormatTok->is(Keywords.kw_function) ||
1870 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1871 tryToParseJSFunction();
1872 continue;
1873 }
1874 if (FormatTok->is(TT_FatArrow)) {
1875 nextToken();
1876 // Fat arrows can be followed by simple expressions or by child blocks
1877 // in curly braces.
1878 if (FormatTok->is(tok::l_brace)) {
1879 parseChildBlock();
1880 continue;
1881 }
1882 }
1883 if (FormatTok->is(tok::l_brace)) {
1884 // Could be a method inside of a braced list `{a() { return 1; }}`.
1885 if (tryToParseBracedList())
1886 continue;
1887 parseChildBlock();
1888 }
1889 }
1890 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1891 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1892 addUnwrappedLine();
1893 nextToken();
1894 return !HasError;
1895 }
1896 switch (FormatTok->Tok.getKind()) {
1897 case tok::caret:
1898 nextToken();
1899 if (FormatTok->is(tok::l_brace)) {
1900 parseChildBlock();
1901 }
1902 break;
1903 case tok::l_square:
1904 if (Style.isCSharp())
1905 parseSquare();
1906 else
1907 tryToParseLambda();
1908 break;
1909 case tok::l_paren:
1910 parseParens();
1911 // JavaScript can just have free standing methods and getters/setters in
1912 // object literals. Detect them by a "{" following ")".
1913 if (Style.Language == FormatStyle::LK_JavaScript) {
1914 if (FormatTok->is(tok::l_brace))
1915 parseChildBlock();
1916 break;
1917 }
1918 break;
1919 case tok::l_brace:
1920 // Assume there are no blocks inside a braced init list apart
1921 // from the ones we explicitly parse out (like lambdas).
1922 FormatTok->setBlockKind(BK_BracedInit);
1923 nextToken();
1924 parseBracedList();
1925 break;
1926 case tok::less:
1927 if (Style.Language == FormatStyle::LK_Proto) {
1928 nextToken();
1929 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1930 /*ClosingBraceKind=*/tok::greater);
1931 } else {
1932 nextToken();
1933 }
1934 break;
1935 case tok::semi:
1936 // JavaScript (or more precisely TypeScript) can have semicolons in braced
1937 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1938 // used for error recovery if we have otherwise determined that this is
1939 // a braced list.
1940 if (Style.Language == FormatStyle::LK_JavaScript) {
1941 nextToken();
1942 break;
1943 }
1944 HasError = true;
1945 if (!ContinueOnSemicolons)
1946 return !HasError;
1947 nextToken();
1948 break;
1949 case tok::comma:
1950 nextToken();
1951 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1952 addUnwrappedLine();
1953 break;
1954 default:
1955 nextToken();
1956 break;
1957 }
1958 } while (!eof());
1959 return false;
1960 }
1961
parseParens()1962 void UnwrappedLineParser::parseParens() {
1963 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1964 nextToken();
1965 do {
1966 switch (FormatTok->Tok.getKind()) {
1967 case tok::l_paren:
1968 parseParens();
1969 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1970 parseChildBlock();
1971 break;
1972 case tok::r_paren:
1973 nextToken();
1974 return;
1975 case tok::r_brace:
1976 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1977 return;
1978 case tok::l_square:
1979 tryToParseLambda();
1980 break;
1981 case tok::l_brace:
1982 if (!tryToParseBracedList())
1983 parseChildBlock();
1984 break;
1985 case tok::at:
1986 nextToken();
1987 if (FormatTok->Tok.is(tok::l_brace)) {
1988 nextToken();
1989 parseBracedList();
1990 }
1991 break;
1992 case tok::equal:
1993 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
1994 parseStructuralElement();
1995 else
1996 nextToken();
1997 break;
1998 case tok::kw_class:
1999 if (Style.Language == FormatStyle::LK_JavaScript)
2000 parseRecord(/*ParseAsExpr=*/true);
2001 else
2002 nextToken();
2003 break;
2004 case tok::identifier:
2005 if (Style.Language == FormatStyle::LK_JavaScript &&
2006 (FormatTok->is(Keywords.kw_function) ||
2007 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2008 tryToParseJSFunction();
2009 else
2010 nextToken();
2011 break;
2012 default:
2013 nextToken();
2014 break;
2015 }
2016 } while (!eof());
2017 }
2018
parseSquare(bool LambdaIntroducer)2019 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2020 if (!LambdaIntroducer) {
2021 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2022 if (tryToParseLambda())
2023 return;
2024 }
2025 do {
2026 switch (FormatTok->Tok.getKind()) {
2027 case tok::l_paren:
2028 parseParens();
2029 break;
2030 case tok::r_square:
2031 nextToken();
2032 return;
2033 case tok::r_brace:
2034 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2035 return;
2036 case tok::l_square:
2037 parseSquare();
2038 break;
2039 case tok::l_brace: {
2040 if (!tryToParseBracedList())
2041 parseChildBlock();
2042 break;
2043 }
2044 case tok::at:
2045 nextToken();
2046 if (FormatTok->Tok.is(tok::l_brace)) {
2047 nextToken();
2048 parseBracedList();
2049 }
2050 break;
2051 default:
2052 nextToken();
2053 break;
2054 }
2055 } while (!eof());
2056 }
2057
parseIfThenElse()2058 void UnwrappedLineParser::parseIfThenElse() {
2059 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2060 nextToken();
2061 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2062 nextToken();
2063 if (FormatTok->Tok.is(tok::l_paren))
2064 parseParens();
2065 // handle [[likely]] / [[unlikely]]
2066 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2067 parseSquare();
2068 bool NeedsUnwrappedLine = false;
2069 if (FormatTok->Tok.is(tok::l_brace)) {
2070 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2071 parseBlock(/*MustBeDeclaration=*/false);
2072 if (Style.BraceWrapping.BeforeElse)
2073 addUnwrappedLine();
2074 else
2075 NeedsUnwrappedLine = true;
2076 } else {
2077 addUnwrappedLine();
2078 ++Line->Level;
2079 parseStructuralElement();
2080 --Line->Level;
2081 }
2082 if (FormatTok->Tok.is(tok::kw_else)) {
2083 nextToken();
2084 // handle [[likely]] / [[unlikely]]
2085 if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2086 parseSquare();
2087 if (FormatTok->Tok.is(tok::l_brace)) {
2088 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2089 parseBlock(/*MustBeDeclaration=*/false);
2090 addUnwrappedLine();
2091 } else if (FormatTok->Tok.is(tok::kw_if)) {
2092 FormatToken *Previous = AllTokens[Tokens->getPosition() - 1];
2093 bool PrecededByComment = Previous->is(tok::comment);
2094 if (PrecededByComment) {
2095 addUnwrappedLine();
2096 ++Line->Level;
2097 }
2098 parseIfThenElse();
2099 if (PrecededByComment)
2100 --Line->Level;
2101 } else {
2102 addUnwrappedLine();
2103 ++Line->Level;
2104 parseStructuralElement();
2105 if (FormatTok->is(tok::eof))
2106 addUnwrappedLine();
2107 --Line->Level;
2108 }
2109 } else if (NeedsUnwrappedLine) {
2110 addUnwrappedLine();
2111 }
2112 }
2113
parseTryCatch()2114 void UnwrappedLineParser::parseTryCatch() {
2115 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2116 nextToken();
2117 bool NeedsUnwrappedLine = false;
2118 if (FormatTok->is(tok::colon)) {
2119 // We are in a function try block, what comes is an initializer list.
2120 nextToken();
2121
2122 // In case identifiers were removed by clang-tidy, what might follow is
2123 // multiple commas in sequence - before the first identifier.
2124 while (FormatTok->is(tok::comma))
2125 nextToken();
2126
2127 while (FormatTok->is(tok::identifier)) {
2128 nextToken();
2129 if (FormatTok->is(tok::l_paren))
2130 parseParens();
2131 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2132 FormatTok->is(tok::l_brace)) {
2133 do {
2134 nextToken();
2135 } while (!FormatTok->is(tok::r_brace));
2136 nextToken();
2137 }
2138
2139 // In case identifiers were removed by clang-tidy, what might follow is
2140 // multiple commas in sequence - after the first identifier.
2141 while (FormatTok->is(tok::comma))
2142 nextToken();
2143 }
2144 }
2145 // Parse try with resource.
2146 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2147 parseParens();
2148 }
2149 if (FormatTok->is(tok::l_brace)) {
2150 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2151 parseBlock(/*MustBeDeclaration=*/false);
2152 if (Style.BraceWrapping.BeforeCatch) {
2153 addUnwrappedLine();
2154 } else {
2155 NeedsUnwrappedLine = true;
2156 }
2157 } else if (!FormatTok->is(tok::kw_catch)) {
2158 // The C++ standard requires a compound-statement after a try.
2159 // If there's none, we try to assume there's a structuralElement
2160 // and try to continue.
2161 addUnwrappedLine();
2162 ++Line->Level;
2163 parseStructuralElement();
2164 --Line->Level;
2165 }
2166 while (1) {
2167 if (FormatTok->is(tok::at))
2168 nextToken();
2169 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2170 tok::kw___finally) ||
2171 ((Style.Language == FormatStyle::LK_Java ||
2172 Style.Language == FormatStyle::LK_JavaScript) &&
2173 FormatTok->is(Keywords.kw_finally)) ||
2174 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2175 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2176 break;
2177 nextToken();
2178 while (FormatTok->isNot(tok::l_brace)) {
2179 if (FormatTok->is(tok::l_paren)) {
2180 parseParens();
2181 continue;
2182 }
2183 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2184 return;
2185 nextToken();
2186 }
2187 NeedsUnwrappedLine = false;
2188 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2189 parseBlock(/*MustBeDeclaration=*/false);
2190 if (Style.BraceWrapping.BeforeCatch)
2191 addUnwrappedLine();
2192 else
2193 NeedsUnwrappedLine = true;
2194 }
2195 if (NeedsUnwrappedLine)
2196 addUnwrappedLine();
2197 }
2198
parseNamespace()2199 void UnwrappedLineParser::parseNamespace() {
2200 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2201 "'namespace' expected");
2202
2203 const FormatToken &InitialToken = *FormatTok;
2204 nextToken();
2205 if (InitialToken.is(TT_NamespaceMacro)) {
2206 parseParens();
2207 } else {
2208 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2209 tok::l_square)) {
2210 if (FormatTok->is(tok::l_square))
2211 parseSquare();
2212 else
2213 nextToken();
2214 }
2215 }
2216 if (FormatTok->Tok.is(tok::l_brace)) {
2217 if (ShouldBreakBeforeBrace(Style, InitialToken))
2218 addUnwrappedLine();
2219
2220 unsigned AddLevels =
2221 Style.NamespaceIndentation == FormatStyle::NI_All ||
2222 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2223 DeclarationScopeStack.size() > 1)
2224 ? 1u
2225 : 0u;
2226 bool ManageWhitesmithsBraces =
2227 AddLevels == 0u &&
2228 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2229
2230 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2231 // the whole block.
2232 if (ManageWhitesmithsBraces)
2233 ++Line->Level;
2234
2235 parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2236 /*MunchSemi=*/true,
2237 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2238
2239 // Munch the semicolon after a namespace. This is more common than one would
2240 // think. Putting the semicolon into its own line is very ugly.
2241 if (FormatTok->Tok.is(tok::semi))
2242 nextToken();
2243
2244 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2245
2246 if (ManageWhitesmithsBraces)
2247 --Line->Level;
2248 }
2249 // FIXME: Add error handling.
2250 }
2251
parseNew()2252 void UnwrappedLineParser::parseNew() {
2253 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2254 nextToken();
2255
2256 if (Style.isCSharp()) {
2257 do {
2258 if (FormatTok->is(tok::l_brace))
2259 parseBracedList();
2260
2261 if (FormatTok->isOneOf(tok::semi, tok::comma))
2262 return;
2263
2264 nextToken();
2265 } while (!eof());
2266 }
2267
2268 if (Style.Language != FormatStyle::LK_Java)
2269 return;
2270
2271 // In Java, we can parse everything up to the parens, which aren't optional.
2272 do {
2273 // There should not be a ;, { or } before the new's open paren.
2274 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2275 return;
2276
2277 // Consume the parens.
2278 if (FormatTok->is(tok::l_paren)) {
2279 parseParens();
2280
2281 // If there is a class body of an anonymous class, consume that as child.
2282 if (FormatTok->is(tok::l_brace))
2283 parseChildBlock();
2284 return;
2285 }
2286 nextToken();
2287 } while (!eof());
2288 }
2289
parseForOrWhileLoop()2290 void UnwrappedLineParser::parseForOrWhileLoop() {
2291 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2292 "'for', 'while' or foreach macro expected");
2293 nextToken();
2294 // JS' for await ( ...
2295 if (Style.Language == FormatStyle::LK_JavaScript &&
2296 FormatTok->is(Keywords.kw_await))
2297 nextToken();
2298 if (FormatTok->Tok.is(tok::l_paren))
2299 parseParens();
2300 if (FormatTok->Tok.is(tok::l_brace)) {
2301 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2302 parseBlock(/*MustBeDeclaration=*/false);
2303 addUnwrappedLine();
2304 } else {
2305 addUnwrappedLine();
2306 ++Line->Level;
2307 parseStructuralElement();
2308 --Line->Level;
2309 }
2310 }
2311
parseDoWhile()2312 void UnwrappedLineParser::parseDoWhile() {
2313 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2314 nextToken();
2315 if (FormatTok->Tok.is(tok::l_brace)) {
2316 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2317 parseBlock(/*MustBeDeclaration=*/false);
2318 if (Style.BraceWrapping.BeforeWhile)
2319 addUnwrappedLine();
2320 } else {
2321 addUnwrappedLine();
2322 ++Line->Level;
2323 parseStructuralElement();
2324 --Line->Level;
2325 }
2326
2327 // FIXME: Add error handling.
2328 if (!FormatTok->Tok.is(tok::kw_while)) {
2329 addUnwrappedLine();
2330 return;
2331 }
2332
2333 // If in Whitesmiths mode, the line with the while() needs to be indented
2334 // to the same level as the block.
2335 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2336 ++Line->Level;
2337
2338 nextToken();
2339 parseStructuralElement();
2340 }
2341
parseLabel(bool LeftAlignLabel)2342 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2343 nextToken();
2344 unsigned OldLineLevel = Line->Level;
2345 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2346 --Line->Level;
2347 if (LeftAlignLabel)
2348 Line->Level = 0;
2349
2350 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2351 FormatTok->Tok.is(tok::l_brace)) {
2352
2353 CompoundStatementIndenter Indenter(this, Line->Level,
2354 Style.BraceWrapping.AfterCaseLabel,
2355 Style.BraceWrapping.IndentBraces);
2356 parseBlock(/*MustBeDeclaration=*/false);
2357 if (FormatTok->Tok.is(tok::kw_break)) {
2358 if (Style.BraceWrapping.AfterControlStatement ==
2359 FormatStyle::BWACS_Always) {
2360 addUnwrappedLine();
2361 if (!Style.IndentCaseBlocks &&
2362 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2363 Line->Level++;
2364 }
2365 }
2366 parseStructuralElement();
2367 }
2368 addUnwrappedLine();
2369 } else {
2370 if (FormatTok->is(tok::semi))
2371 nextToken();
2372 addUnwrappedLine();
2373 }
2374 Line->Level = OldLineLevel;
2375 if (FormatTok->isNot(tok::l_brace)) {
2376 parseStructuralElement();
2377 addUnwrappedLine();
2378 }
2379 }
2380
parseCaseLabel()2381 void UnwrappedLineParser::parseCaseLabel() {
2382 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2383
2384 // FIXME: fix handling of complex expressions here.
2385 do {
2386 nextToken();
2387 } while (!eof() && !FormatTok->Tok.is(tok::colon));
2388 parseLabel();
2389 }
2390
parseSwitch()2391 void UnwrappedLineParser::parseSwitch() {
2392 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2393 nextToken();
2394 if (FormatTok->Tok.is(tok::l_paren))
2395 parseParens();
2396 if (FormatTok->Tok.is(tok::l_brace)) {
2397 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2398 parseBlock(/*MustBeDeclaration=*/false);
2399 addUnwrappedLine();
2400 } else {
2401 addUnwrappedLine();
2402 ++Line->Level;
2403 parseStructuralElement();
2404 --Line->Level;
2405 }
2406 }
2407
parseAccessSpecifier()2408 void UnwrappedLineParser::parseAccessSpecifier() {
2409 nextToken();
2410 // Understand Qt's slots.
2411 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2412 nextToken();
2413 // Otherwise, we don't know what it is, and we'd better keep the next token.
2414 if (FormatTok->Tok.is(tok::colon))
2415 nextToken();
2416 addUnwrappedLine();
2417 }
2418
parseConcept()2419 void UnwrappedLineParser::parseConcept() {
2420 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2421 nextToken();
2422 if (!FormatTok->Tok.is(tok::identifier))
2423 return;
2424 nextToken();
2425 if (!FormatTok->Tok.is(tok::equal))
2426 return;
2427 nextToken();
2428 if (FormatTok->Tok.is(tok::kw_requires)) {
2429 nextToken();
2430 parseRequiresExpression(Line->Level);
2431 } else {
2432 parseConstraintExpression(Line->Level);
2433 }
2434 }
2435
parseRequiresExpression(unsigned int OriginalLevel)2436 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2437 // requires (R range)
2438 if (FormatTok->Tok.is(tok::l_paren)) {
2439 parseParens();
2440 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2441 addUnwrappedLine();
2442 --Line->Level;
2443 }
2444 }
2445
2446 if (FormatTok->Tok.is(tok::l_brace)) {
2447 if (Style.BraceWrapping.AfterFunction)
2448 addUnwrappedLine();
2449 FormatTok->setType(TT_FunctionLBrace);
2450 parseBlock(/*MustBeDeclaration=*/false);
2451 addUnwrappedLine();
2452 } else {
2453 parseConstraintExpression(OriginalLevel);
2454 }
2455 }
2456
parseConstraintExpression(unsigned int OriginalLevel)2457 void UnwrappedLineParser::parseConstraintExpression(
2458 unsigned int OriginalLevel) {
2459 // requires Id<T> && Id<T> || Id<T>
2460 while (
2461 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2462 nextToken();
2463 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2464 tok::greater, tok::comma, tok::ellipsis)) {
2465 if (FormatTok->Tok.is(tok::less)) {
2466 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2467 /*ClosingBraceKind=*/tok::greater);
2468 continue;
2469 }
2470 nextToken();
2471 }
2472 if (FormatTok->Tok.is(tok::kw_requires)) {
2473 parseRequiresExpression(OriginalLevel);
2474 }
2475 if (FormatTok->Tok.is(tok::less)) {
2476 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2477 /*ClosingBraceKind=*/tok::greater);
2478 }
2479
2480 if (FormatTok->Tok.is(tok::l_paren)) {
2481 parseParens();
2482 }
2483 if (FormatTok->Tok.is(tok::l_brace)) {
2484 if (Style.BraceWrapping.AfterFunction)
2485 addUnwrappedLine();
2486 FormatTok->setType(TT_FunctionLBrace);
2487 parseBlock(/*MustBeDeclaration=*/false);
2488 }
2489 if (FormatTok->Tok.is(tok::semi)) {
2490 // Eat any trailing semi.
2491 nextToken();
2492 addUnwrappedLine();
2493 }
2494 if (FormatTok->Tok.is(tok::colon)) {
2495 return;
2496 }
2497 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2498 if (FormatTok->Previous &&
2499 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2500 tok::coloncolon)) {
2501 addUnwrappedLine();
2502 }
2503 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2504 --Line->Level;
2505 }
2506 break;
2507 } else {
2508 FormatTok->setType(TT_ConstraintJunctions);
2509 }
2510
2511 nextToken();
2512 }
2513 }
2514
parseRequires()2515 void UnwrappedLineParser::parseRequires() {
2516 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2517
2518 unsigned OriginalLevel = Line->Level;
2519 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2520 addUnwrappedLine();
2521 if (Style.IndentRequires) {
2522 Line->Level++;
2523 }
2524 }
2525 nextToken();
2526
2527 parseRequiresExpression(OriginalLevel);
2528 }
2529
parseEnum()2530 bool UnwrappedLineParser::parseEnum() {
2531 // Won't be 'enum' for NS_ENUMs.
2532 if (FormatTok->Tok.is(tok::kw_enum))
2533 nextToken();
2534
2535 const FormatToken &InitialToken = *FormatTok;
2536
2537 // In TypeScript, "enum" can also be used as property name, e.g. in interface
2538 // declarations. An "enum" keyword followed by a colon would be a syntax
2539 // error and thus assume it is just an identifier.
2540 if (Style.Language == FormatStyle::LK_JavaScript &&
2541 FormatTok->isOneOf(tok::colon, tok::question))
2542 return false;
2543
2544 // In protobuf, "enum" can be used as a field name.
2545 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2546 return false;
2547
2548 // Eat up enum class ...
2549 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2550 nextToken();
2551
2552 while (FormatTok->Tok.getIdentifierInfo() ||
2553 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2554 tok::greater, tok::comma, tok::question)) {
2555 nextToken();
2556 // We can have macros or attributes in between 'enum' and the enum name.
2557 if (FormatTok->is(tok::l_paren))
2558 parseParens();
2559 if (FormatTok->is(tok::identifier)) {
2560 nextToken();
2561 // If there are two identifiers in a row, this is likely an elaborate
2562 // return type. In Java, this can be "implements", etc.
2563 if (Style.isCpp() && FormatTok->is(tok::identifier))
2564 return false;
2565 }
2566 }
2567
2568 // Just a declaration or something is wrong.
2569 if (FormatTok->isNot(tok::l_brace))
2570 return true;
2571 FormatTok->setBlockKind(BK_Block);
2572
2573 if (Style.Language == FormatStyle::LK_Java) {
2574 // Java enums are different.
2575 parseJavaEnumBody();
2576 return true;
2577 }
2578 if (Style.Language == FormatStyle::LK_Proto) {
2579 parseBlock(/*MustBeDeclaration=*/true);
2580 return true;
2581 }
2582
2583 if (!Style.AllowShortEnumsOnASingleLine &&
2584 ShouldBreakBeforeBrace(Style, InitialToken))
2585 addUnwrappedLine();
2586 // Parse enum body.
2587 nextToken();
2588 if (!Style.AllowShortEnumsOnASingleLine) {
2589 addUnwrappedLine();
2590 Line->Level += 1;
2591 }
2592 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2593 /*IsEnum=*/true);
2594 if (!Style.AllowShortEnumsOnASingleLine)
2595 Line->Level -= 1;
2596 if (HasError) {
2597 if (FormatTok->is(tok::semi))
2598 nextToken();
2599 addUnwrappedLine();
2600 }
2601 return true;
2602
2603 // There is no addUnwrappedLine() here so that we fall through to parsing a
2604 // structural element afterwards. Thus, in "enum A {} n, m;",
2605 // "} n, m;" will end up in one unwrapped line.
2606 }
2607
parseStructLike()2608 bool UnwrappedLineParser::parseStructLike() {
2609 // parseRecord falls through and does not yet add an unwrapped line as a
2610 // record declaration or definition can start a structural element.
2611 parseRecord();
2612 // This does not apply to Java, JavaScript and C#.
2613 if (Style.Language == FormatStyle::LK_Java ||
2614 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2615 if (FormatTok->is(tok::semi))
2616 nextToken();
2617 addUnwrappedLine();
2618 return true;
2619 }
2620 return false;
2621 }
2622
2623 namespace {
2624 // A class used to set and restore the Token position when peeking
2625 // ahead in the token source.
2626 class ScopedTokenPosition {
2627 unsigned StoredPosition;
2628 FormatTokenSource *Tokens;
2629
2630 public:
ScopedTokenPosition(FormatTokenSource * Tokens)2631 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2632 assert(Tokens && "Tokens expected to not be null");
2633 StoredPosition = Tokens->getPosition();
2634 }
2635
~ScopedTokenPosition()2636 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2637 };
2638 } // namespace
2639
2640 // Look to see if we have [[ by looking ahead, if
2641 // its not then rewind to the original position.
tryToParseSimpleAttribute()2642 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2643 ScopedTokenPosition AutoPosition(Tokens);
2644 FormatToken *Tok = Tokens->getNextToken();
2645 // We already read the first [ check for the second.
2646 if (Tok && !Tok->is(tok::l_square)) {
2647 return false;
2648 }
2649 // Double check that the attribute is just something
2650 // fairly simple.
2651 while (Tok) {
2652 if (Tok->is(tok::r_square)) {
2653 break;
2654 }
2655 Tok = Tokens->getNextToken();
2656 }
2657 Tok = Tokens->getNextToken();
2658 if (Tok && !Tok->is(tok::r_square)) {
2659 return false;
2660 }
2661 Tok = Tokens->getNextToken();
2662 if (Tok && Tok->is(tok::semi)) {
2663 return false;
2664 }
2665 return true;
2666 }
2667
parseJavaEnumBody()2668 void UnwrappedLineParser::parseJavaEnumBody() {
2669 // Determine whether the enum is simple, i.e. does not have a semicolon or
2670 // constants with class bodies. Simple enums can be formatted like braced
2671 // lists, contracted to a single line, etc.
2672 unsigned StoredPosition = Tokens->getPosition();
2673 bool IsSimple = true;
2674 FormatToken *Tok = Tokens->getNextToken();
2675 while (Tok) {
2676 if (Tok->is(tok::r_brace))
2677 break;
2678 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2679 IsSimple = false;
2680 break;
2681 }
2682 // FIXME: This will also mark enums with braces in the arguments to enum
2683 // constants as "not simple". This is probably fine in practice, though.
2684 Tok = Tokens->getNextToken();
2685 }
2686 FormatTok = Tokens->setPosition(StoredPosition);
2687
2688 if (IsSimple) {
2689 nextToken();
2690 parseBracedList();
2691 addUnwrappedLine();
2692 return;
2693 }
2694
2695 // Parse the body of a more complex enum.
2696 // First add a line for everything up to the "{".
2697 nextToken();
2698 addUnwrappedLine();
2699 ++Line->Level;
2700
2701 // Parse the enum constants.
2702 while (FormatTok) {
2703 if (FormatTok->is(tok::l_brace)) {
2704 // Parse the constant's class body.
2705 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2706 /*MunchSemi=*/false);
2707 } else if (FormatTok->is(tok::l_paren)) {
2708 parseParens();
2709 } else if (FormatTok->is(tok::comma)) {
2710 nextToken();
2711 addUnwrappedLine();
2712 } else if (FormatTok->is(tok::semi)) {
2713 nextToken();
2714 addUnwrappedLine();
2715 break;
2716 } else if (FormatTok->is(tok::r_brace)) {
2717 addUnwrappedLine();
2718 break;
2719 } else {
2720 nextToken();
2721 }
2722 }
2723
2724 // Parse the class body after the enum's ";" if any.
2725 parseLevel(/*HasOpeningBrace=*/true);
2726 nextToken();
2727 --Line->Level;
2728 addUnwrappedLine();
2729 }
2730
parseRecord(bool ParseAsExpr)2731 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2732 const FormatToken &InitialToken = *FormatTok;
2733 nextToken();
2734
2735 // The actual identifier can be a nested name specifier, and in macros
2736 // it is often token-pasted.
2737 // An [[attribute]] can be before the identifier.
2738 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2739 tok::kw___attribute, tok::kw___declspec,
2740 tok::kw_alignas, tok::l_square, tok::r_square) ||
2741 ((Style.Language == FormatStyle::LK_Java ||
2742 Style.Language == FormatStyle::LK_JavaScript) &&
2743 FormatTok->isOneOf(tok::period, tok::comma))) {
2744 if (Style.Language == FormatStyle::LK_JavaScript &&
2745 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2746 // JavaScript/TypeScript supports inline object types in
2747 // extends/implements positions:
2748 // class Foo implements {bar: number} { }
2749 nextToken();
2750 if (FormatTok->is(tok::l_brace)) {
2751 tryToParseBracedList();
2752 continue;
2753 }
2754 }
2755 bool IsNonMacroIdentifier =
2756 FormatTok->is(tok::identifier) &&
2757 FormatTok->TokenText != FormatTok->TokenText.upper();
2758 nextToken();
2759 // We can have macros or attributes in between 'class' and the class name.
2760 if (!IsNonMacroIdentifier) {
2761 if (FormatTok->Tok.is(tok::l_paren)) {
2762 parseParens();
2763 } else if (FormatTok->is(TT_AttributeSquare)) {
2764 parseSquare();
2765 // Consume the closing TT_AttributeSquare.
2766 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2767 nextToken();
2768 }
2769 }
2770 }
2771
2772 // Note that parsing away template declarations here leads to incorrectly
2773 // accepting function declarations as record declarations.
2774 // In general, we cannot solve this problem. Consider:
2775 // class A<int> B() {}
2776 // which can be a function definition or a class definition when B() is a
2777 // macro. If we find enough real-world cases where this is a problem, we
2778 // can parse for the 'template' keyword in the beginning of the statement,
2779 // and thus rule out the record production in case there is no template
2780 // (this would still leave us with an ambiguity between template function
2781 // and class declarations).
2782 if (FormatTok->isOneOf(tok::colon, tok::less)) {
2783 while (!eof()) {
2784 if (FormatTok->is(tok::l_brace)) {
2785 calculateBraceTypes(/*ExpectClassBody=*/true);
2786 if (!tryToParseBracedList())
2787 break;
2788 }
2789 if (FormatTok->Tok.is(tok::semi))
2790 return;
2791 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2792 addUnwrappedLine();
2793 nextToken();
2794 parseCSharpGenericTypeConstraint();
2795 break;
2796 }
2797 nextToken();
2798 }
2799 }
2800 if (FormatTok->Tok.is(tok::l_brace)) {
2801 if (ParseAsExpr) {
2802 parseChildBlock();
2803 } else {
2804 if (ShouldBreakBeforeBrace(Style, InitialToken))
2805 addUnwrappedLine();
2806
2807 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2808 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2809 }
2810 }
2811 // There is no addUnwrappedLine() here so that we fall through to parsing a
2812 // structural element afterwards. Thus, in "class A {} n, m;",
2813 // "} n, m;" will end up in one unwrapped line.
2814 }
2815
parseObjCMethod()2816 void UnwrappedLineParser::parseObjCMethod() {
2817 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2818 "'(' or identifier expected.");
2819 do {
2820 if (FormatTok->Tok.is(tok::semi)) {
2821 nextToken();
2822 addUnwrappedLine();
2823 return;
2824 } else if (FormatTok->Tok.is(tok::l_brace)) {
2825 if (Style.BraceWrapping.AfterFunction)
2826 addUnwrappedLine();
2827 parseBlock(/*MustBeDeclaration=*/false);
2828 addUnwrappedLine();
2829 return;
2830 } else {
2831 nextToken();
2832 }
2833 } while (!eof());
2834 }
2835
parseObjCProtocolList()2836 void UnwrappedLineParser::parseObjCProtocolList() {
2837 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2838 do {
2839 nextToken();
2840 // Early exit in case someone forgot a close angle.
2841 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2842 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2843 return;
2844 } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2845 nextToken(); // Skip '>'.
2846 }
2847
parseObjCUntilAtEnd()2848 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2849 do {
2850 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2851 nextToken();
2852 addUnwrappedLine();
2853 break;
2854 }
2855 if (FormatTok->is(tok::l_brace)) {
2856 parseBlock(/*MustBeDeclaration=*/false);
2857 // In ObjC interfaces, nothing should be following the "}".
2858 addUnwrappedLine();
2859 } else if (FormatTok->is(tok::r_brace)) {
2860 // Ignore stray "}". parseStructuralElement doesn't consume them.
2861 nextToken();
2862 addUnwrappedLine();
2863 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2864 nextToken();
2865 parseObjCMethod();
2866 } else {
2867 parseStructuralElement();
2868 }
2869 } while (!eof());
2870 }
2871
parseObjCInterfaceOrImplementation()2872 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2873 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2874 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2875 nextToken();
2876 nextToken(); // interface name
2877
2878 // @interface can be followed by a lightweight generic
2879 // specialization list, then either a base class or a category.
2880 if (FormatTok->Tok.is(tok::less)) {
2881 parseObjCLightweightGenerics();
2882 }
2883 if (FormatTok->Tok.is(tok::colon)) {
2884 nextToken();
2885 nextToken(); // base class name
2886 // The base class can also have lightweight generics applied to it.
2887 if (FormatTok->Tok.is(tok::less)) {
2888 parseObjCLightweightGenerics();
2889 }
2890 } else if (FormatTok->Tok.is(tok::l_paren))
2891 // Skip category, if present.
2892 parseParens();
2893
2894 if (FormatTok->Tok.is(tok::less))
2895 parseObjCProtocolList();
2896
2897 if (FormatTok->Tok.is(tok::l_brace)) {
2898 if (Style.BraceWrapping.AfterObjCDeclaration)
2899 addUnwrappedLine();
2900 parseBlock(/*MustBeDeclaration=*/true);
2901 }
2902
2903 // With instance variables, this puts '}' on its own line. Without instance
2904 // variables, this ends the @interface line.
2905 addUnwrappedLine();
2906
2907 parseObjCUntilAtEnd();
2908 }
2909
parseObjCLightweightGenerics()2910 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2911 assert(FormatTok->Tok.is(tok::less));
2912 // Unlike protocol lists, generic parameterizations support
2913 // nested angles:
2914 //
2915 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2916 // NSObject <NSCopying, NSSecureCoding>
2917 //
2918 // so we need to count how many open angles we have left.
2919 unsigned NumOpenAngles = 1;
2920 do {
2921 nextToken();
2922 // Early exit in case someone forgot a close angle.
2923 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2924 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2925 break;
2926 if (FormatTok->Tok.is(tok::less))
2927 ++NumOpenAngles;
2928 else if (FormatTok->Tok.is(tok::greater)) {
2929 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2930 --NumOpenAngles;
2931 }
2932 } while (!eof() && NumOpenAngles != 0);
2933 nextToken(); // Skip '>'.
2934 }
2935
2936 // Returns true for the declaration/definition form of @protocol,
2937 // false for the expression form.
parseObjCProtocol()2938 bool UnwrappedLineParser::parseObjCProtocol() {
2939 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2940 nextToken();
2941
2942 if (FormatTok->is(tok::l_paren))
2943 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2944 return false;
2945
2946 // The definition/declaration form,
2947 // @protocol Foo
2948 // - (int)someMethod;
2949 // @end
2950
2951 nextToken(); // protocol name
2952
2953 if (FormatTok->Tok.is(tok::less))
2954 parseObjCProtocolList();
2955
2956 // Check for protocol declaration.
2957 if (FormatTok->Tok.is(tok::semi)) {
2958 nextToken();
2959 addUnwrappedLine();
2960 return true;
2961 }
2962
2963 addUnwrappedLine();
2964 parseObjCUntilAtEnd();
2965 return true;
2966 }
2967
parseJavaScriptEs6ImportExport()2968 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2969 bool IsImport = FormatTok->is(Keywords.kw_import);
2970 assert(IsImport || FormatTok->is(tok::kw_export));
2971 nextToken();
2972
2973 // Consume the "default" in "export default class/function".
2974 if (FormatTok->is(tok::kw_default))
2975 nextToken();
2976
2977 // Consume "async function", "function" and "default function", so that these
2978 // get parsed as free-standing JS functions, i.e. do not require a trailing
2979 // semicolon.
2980 if (FormatTok->is(Keywords.kw_async))
2981 nextToken();
2982 if (FormatTok->is(Keywords.kw_function)) {
2983 nextToken();
2984 return;
2985 }
2986
2987 // For imports, `export *`, `export {...}`, consume the rest of the line up
2988 // to the terminating `;`. For everything else, just return and continue
2989 // parsing the structural element, i.e. the declaration or expression for
2990 // `export default`.
2991 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2992 !FormatTok->isStringLiteral())
2993 return;
2994
2995 while (!eof()) {
2996 if (FormatTok->is(tok::semi))
2997 return;
2998 if (Line->Tokens.empty()) {
2999 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3000 // import statement should terminate.
3001 return;
3002 }
3003 if (FormatTok->is(tok::l_brace)) {
3004 FormatTok->setBlockKind(BK_Block);
3005 nextToken();
3006 parseBracedList();
3007 } else {
3008 nextToken();
3009 }
3010 }
3011 }
3012
parseStatementMacro()3013 void UnwrappedLineParser::parseStatementMacro() {
3014 nextToken();
3015 if (FormatTok->is(tok::l_paren))
3016 parseParens();
3017 if (FormatTok->is(tok::semi))
3018 nextToken();
3019 addUnwrappedLine();
3020 }
3021
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")3022 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3023 StringRef Prefix = "") {
3024 llvm::dbgs() << Prefix << "Line(" << Line.Level
3025 << ", FSC=" << Line.FirstStartColumn << ")"
3026 << (Line.InPPDirective ? " MACRO" : "") << ": ";
3027 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3028 E = Line.Tokens.end();
3029 I != E; ++I) {
3030 llvm::dbgs() << I->Tok->Tok.getName() << "["
3031 << "T=" << (unsigned)I->Tok->getType()
3032 << ", OC=" << I->Tok->OriginalColumn << "] ";
3033 }
3034 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3035 E = Line.Tokens.end();
3036 I != E; ++I) {
3037 const UnwrappedLineNode &Node = *I;
3038 for (SmallVectorImpl<UnwrappedLine>::const_iterator
3039 I = Node.Children.begin(),
3040 E = Node.Children.end();
3041 I != E; ++I) {
3042 printDebugInfo(*I, "\nChild: ");
3043 }
3044 }
3045 llvm::dbgs() << "\n";
3046 }
3047
addUnwrappedLine(LineLevel AdjustLevel)3048 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3049 if (Line->Tokens.empty())
3050 return;
3051 LLVM_DEBUG({
3052 if (CurrentLines == &Lines)
3053 printDebugInfo(*Line);
3054 });
3055
3056 // If this line closes a block when in Whitesmiths mode, remember that
3057 // information so that the level can be decreased after the line is added.
3058 // This has to happen after the addition of the line since the line itself
3059 // needs to be indented.
3060 bool ClosesWhitesmithsBlock =
3061 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3062 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3063
3064 CurrentLines->push_back(std::move(*Line));
3065 Line->Tokens.clear();
3066 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3067 Line->FirstStartColumn = 0;
3068
3069 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3070 --Line->Level;
3071 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3072 CurrentLines->append(
3073 std::make_move_iterator(PreprocessorDirectives.begin()),
3074 std::make_move_iterator(PreprocessorDirectives.end()));
3075 PreprocessorDirectives.clear();
3076 }
3077 // Disconnect the current token from the last token on the previous line.
3078 FormatTok->Previous = nullptr;
3079 }
3080
eof() const3081 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3082
isOnNewLine(const FormatToken & FormatTok)3083 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3084 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3085 FormatTok.NewlinesBefore > 0;
3086 }
3087
3088 // Checks if \p FormatTok is a line comment that continues the line comment
3089 // section on \p Line.
3090 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)3091 continuesLineCommentSection(const FormatToken &FormatTok,
3092 const UnwrappedLine &Line,
3093 const llvm::Regex &CommentPragmasRegex) {
3094 if (Line.Tokens.empty())
3095 return false;
3096
3097 StringRef IndentContent = FormatTok.TokenText;
3098 if (FormatTok.TokenText.startswith("//") ||
3099 FormatTok.TokenText.startswith("/*"))
3100 IndentContent = FormatTok.TokenText.substr(2);
3101 if (CommentPragmasRegex.match(IndentContent))
3102 return false;
3103
3104 // If Line starts with a line comment, then FormatTok continues the comment
3105 // section if its original column is greater or equal to the original start
3106 // column of the line.
3107 //
3108 // Define the min column token of a line as follows: if a line ends in '{' or
3109 // contains a '{' followed by a line comment, then the min column token is
3110 // that '{'. Otherwise, the min column token of the line is the first token of
3111 // the line.
3112 //
3113 // If Line starts with a token other than a line comment, then FormatTok
3114 // continues the comment section if its original column is greater than the
3115 // original start column of the min column token of the line.
3116 //
3117 // For example, the second line comment continues the first in these cases:
3118 //
3119 // // first line
3120 // // second line
3121 //
3122 // and:
3123 //
3124 // // first line
3125 // // second line
3126 //
3127 // and:
3128 //
3129 // int i; // first line
3130 // // second line
3131 //
3132 // and:
3133 //
3134 // do { // first line
3135 // // second line
3136 // int i;
3137 // } while (true);
3138 //
3139 // and:
3140 //
3141 // enum {
3142 // a, // first line
3143 // // second line
3144 // b
3145 // };
3146 //
3147 // The second line comment doesn't continue the first in these cases:
3148 //
3149 // // first line
3150 // // second line
3151 //
3152 // and:
3153 //
3154 // int i; // first line
3155 // // second line
3156 //
3157 // and:
3158 //
3159 // do { // first line
3160 // // second line
3161 // int i;
3162 // } while (true);
3163 //
3164 // and:
3165 //
3166 // enum {
3167 // a, // first line
3168 // // second line
3169 // };
3170 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3171
3172 // Scan for '{//'. If found, use the column of '{' as a min column for line
3173 // comment section continuation.
3174 const FormatToken *PreviousToken = nullptr;
3175 for (const UnwrappedLineNode &Node : Line.Tokens) {
3176 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3177 isLineComment(*Node.Tok)) {
3178 MinColumnToken = PreviousToken;
3179 break;
3180 }
3181 PreviousToken = Node.Tok;
3182
3183 // Grab the last newline preceding a token in this unwrapped line.
3184 if (Node.Tok->NewlinesBefore > 0) {
3185 MinColumnToken = Node.Tok;
3186 }
3187 }
3188 if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3189 MinColumnToken = PreviousToken;
3190 }
3191
3192 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3193 MinColumnToken);
3194 }
3195
flushComments(bool NewlineBeforeNext)3196 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3197 bool JustComments = Line->Tokens.empty();
3198 for (SmallVectorImpl<FormatToken *>::const_iterator
3199 I = CommentsBeforeNextToken.begin(),
3200 E = CommentsBeforeNextToken.end();
3201 I != E; ++I) {
3202 // Line comments that belong to the same line comment section are put on the
3203 // same line since later we might want to reflow content between them.
3204 // Additional fine-grained breaking of line comment sections is controlled
3205 // by the class BreakableLineCommentSection in case it is desirable to keep
3206 // several line comment sections in the same unwrapped line.
3207 //
3208 // FIXME: Consider putting separate line comment sections as children to the
3209 // unwrapped line instead.
3210 (*I)->ContinuesLineCommentSection =
3211 continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3212 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3213 addUnwrappedLine();
3214 pushToken(*I);
3215 }
3216 if (NewlineBeforeNext && JustComments)
3217 addUnwrappedLine();
3218 CommentsBeforeNextToken.clear();
3219 }
3220
nextToken(int LevelDifference)3221 void UnwrappedLineParser::nextToken(int LevelDifference) {
3222 if (eof())
3223 return;
3224 flushComments(isOnNewLine(*FormatTok));
3225 pushToken(FormatTok);
3226 FormatToken *Previous = FormatTok;
3227 if (Style.Language != FormatStyle::LK_JavaScript)
3228 readToken(LevelDifference);
3229 else
3230 readTokenWithJavaScriptASI();
3231 FormatTok->Previous = Previous;
3232 }
3233
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)3234 void UnwrappedLineParser::distributeComments(
3235 const SmallVectorImpl<FormatToken *> &Comments,
3236 const FormatToken *NextTok) {
3237 // Whether or not a line comment token continues a line is controlled by
3238 // the method continuesLineCommentSection, with the following caveat:
3239 //
3240 // Define a trail of Comments to be a nonempty proper postfix of Comments such
3241 // that each comment line from the trail is aligned with the next token, if
3242 // the next token exists. If a trail exists, the beginning of the maximal
3243 // trail is marked as a start of a new comment section.
3244 //
3245 // For example in this code:
3246 //
3247 // int a; // line about a
3248 // // line 1 about b
3249 // // line 2 about b
3250 // int b;
3251 //
3252 // the two lines about b form a maximal trail, so there are two sections, the
3253 // first one consisting of the single comment "// line about a" and the
3254 // second one consisting of the next two comments.
3255 if (Comments.empty())
3256 return;
3257 bool ShouldPushCommentsInCurrentLine = true;
3258 bool HasTrailAlignedWithNextToken = false;
3259 unsigned StartOfTrailAlignedWithNextToken = 0;
3260 if (NextTok) {
3261 // We are skipping the first element intentionally.
3262 for (unsigned i = Comments.size() - 1; i > 0; --i) {
3263 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3264 HasTrailAlignedWithNextToken = true;
3265 StartOfTrailAlignedWithNextToken = i;
3266 }
3267 }
3268 }
3269 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3270 FormatToken *FormatTok = Comments[i];
3271 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3272 FormatTok->ContinuesLineCommentSection = false;
3273 } else {
3274 FormatTok->ContinuesLineCommentSection =
3275 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3276 }
3277 if (!FormatTok->ContinuesLineCommentSection &&
3278 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3279 ShouldPushCommentsInCurrentLine = false;
3280 }
3281 if (ShouldPushCommentsInCurrentLine) {
3282 pushToken(FormatTok);
3283 } else {
3284 CommentsBeforeNextToken.push_back(FormatTok);
3285 }
3286 }
3287 }
3288
readToken(int LevelDifference)3289 void UnwrappedLineParser::readToken(int LevelDifference) {
3290 SmallVector<FormatToken *, 1> Comments;
3291 do {
3292 FormatTok = Tokens->getNextToken();
3293 assert(FormatTok);
3294 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3295 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3296 distributeComments(Comments, FormatTok);
3297 Comments.clear();
3298 // If there is an unfinished unwrapped line, we flush the preprocessor
3299 // directives only after that unwrapped line was finished later.
3300 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3301 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3302 assert((LevelDifference >= 0 ||
3303 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3304 "LevelDifference makes Line->Level negative");
3305 Line->Level += LevelDifference;
3306 // Comments stored before the preprocessor directive need to be output
3307 // before the preprocessor directive, at the same level as the
3308 // preprocessor directive, as we consider them to apply to the directive.
3309 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3310 PPBranchLevel > 0)
3311 Line->Level += PPBranchLevel;
3312 flushComments(isOnNewLine(*FormatTok));
3313 parsePPDirective();
3314 }
3315 while (FormatTok->getType() == TT_ConflictStart ||
3316 FormatTok->getType() == TT_ConflictEnd ||
3317 FormatTok->getType() == TT_ConflictAlternative) {
3318 if (FormatTok->getType() == TT_ConflictStart) {
3319 conditionalCompilationStart(/*Unreachable=*/false);
3320 } else if (FormatTok->getType() == TT_ConflictAlternative) {
3321 conditionalCompilationAlternative();
3322 } else if (FormatTok->getType() == TT_ConflictEnd) {
3323 conditionalCompilationEnd();
3324 }
3325 FormatTok = Tokens->getNextToken();
3326 FormatTok->MustBreakBefore = true;
3327 }
3328
3329 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3330 !Line->InPPDirective) {
3331 continue;
3332 }
3333
3334 if (!FormatTok->Tok.is(tok::comment)) {
3335 distributeComments(Comments, FormatTok);
3336 Comments.clear();
3337 return;
3338 }
3339
3340 Comments.push_back(FormatTok);
3341 } while (!eof());
3342
3343 distributeComments(Comments, nullptr);
3344 Comments.clear();
3345 }
3346
pushToken(FormatToken * Tok)3347 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3348 Line->Tokens.push_back(UnwrappedLineNode(Tok));
3349 if (MustBreakBeforeNextToken) {
3350 Line->Tokens.back().Tok->MustBreakBefore = true;
3351 MustBreakBeforeNextToken = false;
3352 }
3353 }
3354
3355 } // end namespace format
3356 } // end namespace clang
3357