1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
22
23 #include <algorithm>
24 #include <utility>
25
26 #define DEBUG_TYPE "format-parser"
27
28 namespace clang {
29 namespace format {
30
31 class FormatTokenSource {
32 public:
~FormatTokenSource()33 virtual ~FormatTokenSource() {}
34
35 // Returns the next token in the token stream.
36 virtual FormatToken *getNextToken() = 0;
37
38 // Returns the token preceding the token returned by the last call to
39 // getNextToken() in the token stream, or nullptr if no such token exists.
40 virtual FormatToken *getPreviousToken() = 0;
41
42 // Returns the token that would be returned by the next call to
43 // getNextToken().
44 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
45
46 // Returns whether we are at the end of the file.
47 // This can be different from whether getNextToken() returned an eof token
48 // when the FormatTokenSource is a view on a part of the token stream.
49 virtual bool isEOF() = 0;
50
51 // Gets the current position in the token stream, to be used by setPosition().
52 virtual unsigned getPosition() = 0;
53
54 // Resets the token stream to the state it was in when getPosition() returned
55 // Position, and return the token at that position in the stream.
56 virtual FormatToken *setPosition(unsigned Position) = 0;
57 };
58
59 namespace {
60
printLine(llvm::raw_ostream & OS,const UnwrappedLine & Line,StringRef Prefix="",bool PrintText=false)61 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
62 StringRef Prefix = "", bool PrintText = false) {
63 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
64 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
65 bool NewLine = false;
66 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
67 E = Line.Tokens.end();
68 I != E; ++I) {
69 if (NewLine) {
70 OS << Prefix;
71 NewLine = false;
72 }
73 OS << I->Tok->Tok.getName() << "["
74 << "T=" << (unsigned)I->Tok->getType()
75 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
76 << "\"] ";
77 for (SmallVectorImpl<UnwrappedLine>::const_iterator
78 CI = I->Children.begin(),
79 CE = I->Children.end();
80 CI != CE; ++CI) {
81 OS << "\n";
82 printLine(OS, *CI, (Prefix + " ").str());
83 NewLine = true;
84 }
85 }
86 if (!NewLine)
87 OS << "\n";
88 }
89
printDebugInfo(const UnwrappedLine & Line)90 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
91 printLine(llvm::dbgs(), Line);
92 }
93
94 class ScopedDeclarationState {
95 public:
ScopedDeclarationState(UnwrappedLine & Line,llvm::BitVector & Stack,bool MustBeDeclaration)96 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
97 bool MustBeDeclaration)
98 : Line(Line), Stack(Stack) {
99 Line.MustBeDeclaration = MustBeDeclaration;
100 Stack.push_back(MustBeDeclaration);
101 }
~ScopedDeclarationState()102 ~ScopedDeclarationState() {
103 Stack.pop_back();
104 if (!Stack.empty())
105 Line.MustBeDeclaration = Stack.back();
106 else
107 Line.MustBeDeclaration = true;
108 }
109
110 private:
111 UnwrappedLine &Line;
112 llvm::BitVector &Stack;
113 };
114
isLineComment(const FormatToken & FormatTok)115 static bool isLineComment(const FormatToken &FormatTok) {
116 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
117 }
118
119 // Checks if \p FormatTok is a line comment that continues the line comment
120 // \p Previous. The original column of \p MinColumnToken is used to determine
121 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)122 static bool continuesLineComment(const FormatToken &FormatTok,
123 const FormatToken *Previous,
124 const FormatToken *MinColumnToken) {
125 if (!Previous || !MinColumnToken)
126 return false;
127 unsigned MinContinueColumn =
128 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
129 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
130 isLineComment(*Previous) &&
131 FormatTok.OriginalColumn >= MinContinueColumn;
132 }
133
134 class ScopedMacroState : public FormatTokenSource {
135 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)136 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
137 FormatToken *&ResetToken)
138 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
139 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
140 Token(nullptr), PreviousToken(nullptr) {
141 FakeEOF.Tok.startToken();
142 FakeEOF.Tok.setKind(tok::eof);
143 TokenSource = this;
144 Line.Level = 0;
145 Line.InPPDirective = true;
146 // InMacroBody gets set after the `#define x` part.
147 }
148
~ScopedMacroState()149 ~ScopedMacroState() override {
150 TokenSource = PreviousTokenSource;
151 ResetToken = Token;
152 Line.InPPDirective = false;
153 Line.InMacroBody = false;
154 Line.Level = PreviousLineLevel;
155 }
156
getNextToken()157 FormatToken *getNextToken() override {
158 // The \c UnwrappedLineParser guards against this by never calling
159 // \c getNextToken() after it has encountered the first eof token.
160 assert(!eof());
161 PreviousToken = Token;
162 Token = PreviousTokenSource->getNextToken();
163 if (eof())
164 return &FakeEOF;
165 return Token;
166 }
167
getPreviousToken()168 FormatToken *getPreviousToken() override {
169 return PreviousTokenSource->getPreviousToken();
170 }
171
peekNextToken(bool SkipComment)172 FormatToken *peekNextToken(bool SkipComment) override {
173 if (eof())
174 return &FakeEOF;
175 return PreviousTokenSource->peekNextToken(SkipComment);
176 }
177
isEOF()178 bool isEOF() override { return PreviousTokenSource->isEOF(); }
179
getPosition()180 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
181
setPosition(unsigned Position)182 FormatToken *setPosition(unsigned Position) override {
183 PreviousToken = nullptr;
184 Token = PreviousTokenSource->setPosition(Position);
185 return Token;
186 }
187
188 private:
eof()189 bool eof() {
190 return Token && Token->HasUnescapedNewline &&
191 !continuesLineComment(*Token, PreviousToken,
192 /*MinColumnToken=*/PreviousToken);
193 }
194
195 FormatToken FakeEOF;
196 UnwrappedLine &Line;
197 FormatTokenSource *&TokenSource;
198 FormatToken *&ResetToken;
199 unsigned PreviousLineLevel;
200 FormatTokenSource *PreviousTokenSource;
201
202 FormatToken *Token;
203 FormatToken *PreviousToken;
204 };
205
206 } // end anonymous namespace
207
208 class ScopedLineState {
209 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)210 ScopedLineState(UnwrappedLineParser &Parser,
211 bool SwitchToPreprocessorLines = false)
212 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
213 if (SwitchToPreprocessorLines)
214 Parser.CurrentLines = &Parser.PreprocessorDirectives;
215 else if (!Parser.Line->Tokens.empty())
216 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
217 PreBlockLine = std::move(Parser.Line);
218 Parser.Line = std::make_unique<UnwrappedLine>();
219 Parser.Line->Level = PreBlockLine->Level;
220 Parser.Line->PPLevel = PreBlockLine->PPLevel;
221 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
222 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
223 }
224
~ScopedLineState()225 ~ScopedLineState() {
226 if (!Parser.Line->Tokens.empty())
227 Parser.addUnwrappedLine();
228 assert(Parser.Line->Tokens.empty());
229 Parser.Line = std::move(PreBlockLine);
230 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
231 Parser.MustBreakBeforeNextToken = true;
232 Parser.CurrentLines = OriginalLines;
233 }
234
235 private:
236 UnwrappedLineParser &Parser;
237
238 std::unique_ptr<UnwrappedLine> PreBlockLine;
239 SmallVectorImpl<UnwrappedLine> *OriginalLines;
240 };
241
242 class CompoundStatementIndenter {
243 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)244 CompoundStatementIndenter(UnwrappedLineParser *Parser,
245 const FormatStyle &Style, unsigned &LineLevel)
246 : CompoundStatementIndenter(Parser, LineLevel,
247 Style.BraceWrapping.AfterControlStatement,
248 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)249 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
250 bool WrapBrace, bool IndentBrace)
251 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
252 if (WrapBrace)
253 Parser->addUnwrappedLine();
254 if (IndentBrace)
255 ++LineLevel;
256 }
~CompoundStatementIndenter()257 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
258
259 private:
260 unsigned &LineLevel;
261 unsigned OldLineLevel;
262 };
263
264 namespace {
265
266 class IndexedTokenSource : public FormatTokenSource {
267 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)268 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
269 : Tokens(Tokens), Position(-1) {}
270
getNextToken()271 FormatToken *getNextToken() override {
272 if (Position >= 0 && isEOF()) {
273 LLVM_DEBUG({
274 llvm::dbgs() << "Next ";
275 dbgToken(Position);
276 });
277 return Tokens[Position];
278 }
279 ++Position;
280 LLVM_DEBUG({
281 llvm::dbgs() << "Next ";
282 dbgToken(Position);
283 });
284 return Tokens[Position];
285 }
286
getPreviousToken()287 FormatToken *getPreviousToken() override {
288 return Position > 0 ? Tokens[Position - 1] : nullptr;
289 }
290
peekNextToken(bool SkipComment)291 FormatToken *peekNextToken(bool SkipComment) override {
292 int Next = Position + 1;
293 if (SkipComment)
294 while (Tokens[Next]->is(tok::comment))
295 ++Next;
296 LLVM_DEBUG({
297 llvm::dbgs() << "Peeking ";
298 dbgToken(Next);
299 });
300 return Tokens[Next];
301 }
302
isEOF()303 bool isEOF() override { return Tokens[Position]->is(tok::eof); }
304
getPosition()305 unsigned getPosition() override {
306 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
307 assert(Position >= 0);
308 return Position;
309 }
310
setPosition(unsigned P)311 FormatToken *setPosition(unsigned P) override {
312 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
313 Position = P;
314 return Tokens[Position];
315 }
316
reset()317 void reset() { Position = -1; }
318
319 private:
dbgToken(int Position,llvm::StringRef Indent="")320 void dbgToken(int Position, llvm::StringRef Indent = "") {
321 FormatToken *Tok = Tokens[Position];
322 llvm::dbgs() << Indent << "[" << Position
323 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
324 << ", Macro: " << !!Tok->MacroCtx << "\n";
325 }
326
327 ArrayRef<FormatToken *> Tokens;
328 int Position;
329 };
330
331 } // end anonymous namespace
332
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)333 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
334 const AdditionalKeywords &Keywords,
335 unsigned FirstStartColumn,
336 ArrayRef<FormatToken *> Tokens,
337 UnwrappedLineConsumer &Callback)
338 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
339 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
340 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
341 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
342 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
343 ? IG_Rejected
344 : IG_Inited),
345 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
346
reset()347 void UnwrappedLineParser::reset() {
348 PPBranchLevel = -1;
349 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
350 ? IG_Rejected
351 : IG_Inited;
352 IncludeGuardToken = nullptr;
353 Line.reset(new UnwrappedLine);
354 CommentsBeforeNextToken.clear();
355 FormatTok = nullptr;
356 MustBreakBeforeNextToken = false;
357 PreprocessorDirectives.clear();
358 CurrentLines = &Lines;
359 DeclarationScopeStack.clear();
360 NestedTooDeep.clear();
361 PPStack.clear();
362 Line->FirstStartColumn = FirstStartColumn;
363 }
364
parse()365 void UnwrappedLineParser::parse() {
366 IndexedTokenSource TokenSource(AllTokens);
367 Line->FirstStartColumn = FirstStartColumn;
368 do {
369 LLVM_DEBUG(llvm::dbgs() << "----\n");
370 reset();
371 Tokens = &TokenSource;
372 TokenSource.reset();
373
374 readToken();
375 parseFile();
376
377 // If we found an include guard then all preprocessor directives (other than
378 // the guard) are over-indented by one.
379 if (IncludeGuard == IG_Found) {
380 for (auto &Line : Lines)
381 if (Line.InPPDirective && Line.Level > 0)
382 --Line.Level;
383 }
384
385 // Create line with eof token.
386 pushToken(FormatTok);
387 addUnwrappedLine();
388
389 for (const UnwrappedLine &Line : Lines)
390 Callback.consumeUnwrappedLine(Line);
391
392 Callback.finishRun();
393 Lines.clear();
394 while (!PPLevelBranchIndex.empty() &&
395 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
396 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
397 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
398 }
399 if (!PPLevelBranchIndex.empty()) {
400 ++PPLevelBranchIndex.back();
401 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
402 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
403 }
404 } while (!PPLevelBranchIndex.empty());
405 }
406
parseFile()407 void UnwrappedLineParser::parseFile() {
408 // The top-level context in a file always has declarations, except for pre-
409 // processor directives and JavaScript files.
410 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
411 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
412 MustBeDeclaration);
413 if (Style.Language == FormatStyle::LK_TextProto)
414 parseBracedList();
415 else
416 parseLevel();
417 // Make sure to format the remaining tokens.
418 //
419 // LK_TextProto is special since its top-level is parsed as the body of a
420 // braced list, which does not necessarily have natural line separators such
421 // as a semicolon. Comments after the last entry that have been determined to
422 // not belong to that line, as in:
423 // key: value
424 // // endfile comment
425 // do not have a chance to be put on a line of their own until this point.
426 // Here we add this newline before end-of-file comments.
427 if (Style.Language == FormatStyle::LK_TextProto &&
428 !CommentsBeforeNextToken.empty()) {
429 addUnwrappedLine();
430 }
431 flushComments(true);
432 addUnwrappedLine();
433 }
434
parseCSharpGenericTypeConstraint()435 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
436 do {
437 switch (FormatTok->Tok.getKind()) {
438 case tok::l_brace:
439 return;
440 default:
441 if (FormatTok->is(Keywords.kw_where)) {
442 addUnwrappedLine();
443 nextToken();
444 parseCSharpGenericTypeConstraint();
445 break;
446 }
447 nextToken();
448 break;
449 }
450 } while (!eof());
451 }
452
parseCSharpAttribute()453 void UnwrappedLineParser::parseCSharpAttribute() {
454 int UnpairedSquareBrackets = 1;
455 do {
456 switch (FormatTok->Tok.getKind()) {
457 case tok::r_square:
458 nextToken();
459 --UnpairedSquareBrackets;
460 if (UnpairedSquareBrackets == 0) {
461 addUnwrappedLine();
462 return;
463 }
464 break;
465 case tok::l_square:
466 ++UnpairedSquareBrackets;
467 nextToken();
468 break;
469 default:
470 nextToken();
471 break;
472 }
473 } while (!eof());
474 }
475
precededByCommentOrPPDirective() const476 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
477 if (!Lines.empty() && Lines.back().InPPDirective)
478 return true;
479
480 const FormatToken *Previous = Tokens->getPreviousToken();
481 return Previous && Previous->is(tok::comment) &&
482 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
483 }
484
485 /// \brief Parses a level, that is ???.
486 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
487 /// \param CanContainBracedList If the content can contain (at any level) a
488 /// braced list.
489 /// \param NextLBracesType The type for left brace found in this level.
490 /// \param IfKind The \p if statement kind in the level.
491 /// \param IfLeftBrace The left brace of the \p if block in the level.
492 /// \returns true if a simple block of if/else/for/while, or false otherwise.
493 /// (A simple block has a single statement.)
parseLevel(const FormatToken * OpeningBrace,bool CanContainBracedList,TokenType NextLBracesType,IfStmtKind * IfKind,FormatToken ** IfLeftBrace)494 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
495 bool CanContainBracedList,
496 TokenType NextLBracesType,
497 IfStmtKind *IfKind,
498 FormatToken **IfLeftBrace) {
499 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
500 ? TT_BracedListLBrace
501 : TT_Unknown;
502 const bool IsPrecededByCommentOrPPDirective =
503 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
504 FormatToken *IfLBrace = nullptr;
505 bool HasDoWhile = false;
506 bool HasLabel = false;
507 unsigned StatementCount = 0;
508 bool SwitchLabelEncountered = false;
509
510 do {
511 if (FormatTok->getType() == TT_AttributeMacro) {
512 nextToken();
513 continue;
514 }
515 tok::TokenKind kind = FormatTok->Tok.getKind();
516 if (FormatTok->getType() == TT_MacroBlockBegin)
517 kind = tok::l_brace;
518 else if (FormatTok->getType() == TT_MacroBlockEnd)
519 kind = tok::r_brace;
520
521 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
522 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
523 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
524 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
525 HasLabel ? nullptr : &HasLabel);
526 ++StatementCount;
527 assert(StatementCount > 0 && "StatementCount overflow!");
528 };
529
530 switch (kind) {
531 case tok::comment:
532 nextToken();
533 addUnwrappedLine();
534 break;
535 case tok::l_brace:
536 if (NextLBracesType != TT_Unknown) {
537 FormatTok->setFinalizedType(NextLBracesType);
538 } else if (FormatTok->Previous &&
539 FormatTok->Previous->ClosesRequiresClause) {
540 // We need the 'default' case here to correctly parse a function
541 // l_brace.
542 ParseDefault();
543 continue;
544 }
545 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
546 tryToParseBracedList()) {
547 continue;
548 }
549 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
550 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
551 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
552 NextLBracesType);
553 ++StatementCount;
554 assert(StatementCount > 0 && "StatementCount overflow!");
555 addUnwrappedLine();
556 break;
557 case tok::r_brace:
558 if (OpeningBrace) {
559 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
560 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
561 return false;
562 }
563 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
564 HasDoWhile || IsPrecededByCommentOrPPDirective ||
565 precededByCommentOrPPDirective()) {
566 return false;
567 }
568 const FormatToken *Next = Tokens->peekNextToken();
569 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
570 return false;
571 if (IfLeftBrace)
572 *IfLeftBrace = IfLBrace;
573 return true;
574 }
575 nextToken();
576 addUnwrappedLine();
577 break;
578 case tok::kw_default: {
579 unsigned StoredPosition = Tokens->getPosition();
580 FormatToken *Next;
581 do {
582 Next = Tokens->getNextToken();
583 assert(Next);
584 } while (Next->is(tok::comment));
585 FormatTok = Tokens->setPosition(StoredPosition);
586 if (Next->isNot(tok::colon)) {
587 // default not followed by ':' is not a case label; treat it like
588 // an identifier.
589 parseStructuralElement();
590 break;
591 }
592 // Else, if it is 'default:', fall through to the case handling.
593 [[fallthrough]];
594 }
595 case tok::kw_case:
596 if (Style.isProto() || Style.isVerilog() ||
597 (Style.isJavaScript() && Line->MustBeDeclaration)) {
598 // Proto: there are no switch/case statements
599 // Verilog: Case labels don't have this word. We handle case
600 // labels including default in TokenAnnotator.
601 // JavaScript: A 'case: string' style field declaration.
602 ParseDefault();
603 break;
604 }
605 if (!SwitchLabelEncountered &&
606 (Style.IndentCaseLabels ||
607 (Line->InPPDirective && Line->Level == 1))) {
608 ++Line->Level;
609 }
610 SwitchLabelEncountered = true;
611 parseStructuralElement();
612 break;
613 case tok::l_square:
614 if (Style.isCSharp()) {
615 nextToken();
616 parseCSharpAttribute();
617 break;
618 }
619 if (handleCppAttributes())
620 break;
621 [[fallthrough]];
622 default:
623 ParseDefault();
624 break;
625 }
626 } while (!eof());
627
628 return false;
629 }
630
calculateBraceTypes(bool ExpectClassBody)631 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
632 // We'll parse forward through the tokens until we hit
633 // a closing brace or eof - note that getNextToken() will
634 // parse macros, so this will magically work inside macro
635 // definitions, too.
636 unsigned StoredPosition = Tokens->getPosition();
637 FormatToken *Tok = FormatTok;
638 const FormatToken *PrevTok = Tok->Previous;
639 // Keep a stack of positions of lbrace tokens. We will
640 // update information about whether an lbrace starts a
641 // braced init list or a different block during the loop.
642 SmallVector<FormatToken *, 8> LBraceStack;
643 assert(Tok->is(tok::l_brace));
644 do {
645 // Get next non-comment token.
646 FormatToken *NextTok;
647 do {
648 NextTok = Tokens->getNextToken();
649 } while (NextTok->is(tok::comment));
650
651 switch (Tok->Tok.getKind()) {
652 case tok::l_brace:
653 if (Style.isJavaScript() && PrevTok) {
654 if (PrevTok->isOneOf(tok::colon, tok::less)) {
655 // A ':' indicates this code is in a type, or a braced list
656 // following a label in an object literal ({a: {b: 1}}).
657 // A '<' could be an object used in a comparison, but that is nonsense
658 // code (can never return true), so more likely it is a generic type
659 // argument (`X<{a: string; b: number}>`).
660 // The code below could be confused by semicolons between the
661 // individual members in a type member list, which would normally
662 // trigger BK_Block. In both cases, this must be parsed as an inline
663 // braced init.
664 Tok->setBlockKind(BK_BracedInit);
665 } else if (PrevTok->is(tok::r_paren)) {
666 // `) { }` can only occur in function or method declarations in JS.
667 Tok->setBlockKind(BK_Block);
668 }
669 } else {
670 Tok->setBlockKind(BK_Unknown);
671 }
672 LBraceStack.push_back(Tok);
673 break;
674 case tok::r_brace:
675 if (LBraceStack.empty())
676 break;
677 if (LBraceStack.back()->is(BK_Unknown)) {
678 bool ProbablyBracedList = false;
679 if (Style.Language == FormatStyle::LK_Proto) {
680 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
681 } else {
682 // Skip NextTok over preprocessor lines, otherwise we may not
683 // properly diagnose the block as a braced intializer
684 // if the comma separator appears after the pp directive.
685 while (NextTok->is(tok::hash)) {
686 ScopedMacroState MacroState(*Line, Tokens, NextTok);
687 do {
688 NextTok = Tokens->getNextToken();
689 } while (NextTok->isNot(tok::eof));
690 }
691
692 // Using OriginalColumn to distinguish between ObjC methods and
693 // binary operators is a bit hacky.
694 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
695 NextTok->OriginalColumn == 0;
696
697 // Try to detect a braced list. Note that regardless how we mark inner
698 // braces here, we will overwrite the BlockKind later if we parse a
699 // braced list (where all blocks inside are by default braced lists),
700 // or when we explicitly detect blocks (for example while parsing
701 // lambdas).
702
703 // If we already marked the opening brace as braced list, the closing
704 // must also be part of it.
705 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
706
707 ProbablyBracedList = ProbablyBracedList ||
708 (Style.isJavaScript() &&
709 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
710 Keywords.kw_as));
711 ProbablyBracedList = ProbablyBracedList ||
712 (Style.isCpp() && NextTok->is(tok::l_paren));
713
714 // If there is a comma, semicolon or right paren after the closing
715 // brace, we assume this is a braced initializer list.
716 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
717 // braced list in JS.
718 ProbablyBracedList =
719 ProbablyBracedList ||
720 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
721 tok::r_paren, tok::r_square, tok::l_brace,
722 tok::ellipsis);
723
724 ProbablyBracedList =
725 ProbablyBracedList ||
726 (NextTok->is(tok::identifier) &&
727 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
728
729 ProbablyBracedList = ProbablyBracedList ||
730 (NextTok->is(tok::semi) &&
731 (!ExpectClassBody || LBraceStack.size() != 1));
732
733 ProbablyBracedList =
734 ProbablyBracedList ||
735 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
736
737 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
738 // We can have an array subscript after a braced init
739 // list, but C++11 attributes are expected after blocks.
740 NextTok = Tokens->getNextToken();
741 ProbablyBracedList = NextTok->isNot(tok::l_square);
742 }
743 }
744 if (ProbablyBracedList) {
745 Tok->setBlockKind(BK_BracedInit);
746 LBraceStack.back()->setBlockKind(BK_BracedInit);
747 } else {
748 Tok->setBlockKind(BK_Block);
749 LBraceStack.back()->setBlockKind(BK_Block);
750 }
751 }
752 LBraceStack.pop_back();
753 break;
754 case tok::identifier:
755 if (!Tok->is(TT_StatementMacro))
756 break;
757 [[fallthrough]];
758 case tok::at:
759 case tok::semi:
760 case tok::kw_if:
761 case tok::kw_while:
762 case tok::kw_for:
763 case tok::kw_switch:
764 case tok::kw_try:
765 case tok::kw___try:
766 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
767 LBraceStack.back()->setBlockKind(BK_Block);
768 break;
769 default:
770 break;
771 }
772 PrevTok = Tok;
773 Tok = NextTok;
774 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
775
776 // Assume other blocks for all unclosed opening braces.
777 for (FormatToken *LBrace : LBraceStack)
778 if (LBrace->is(BK_Unknown))
779 LBrace->setBlockKind(BK_Block);
780
781 FormatTok = Tokens->setPosition(StoredPosition);
782 }
783
784 template <class T>
hash_combine(std::size_t & seed,const T & v)785 static inline void hash_combine(std::size_t &seed, const T &v) {
786 std::hash<T> hasher;
787 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
788 }
789
computePPHash() const790 size_t UnwrappedLineParser::computePPHash() const {
791 size_t h = 0;
792 for (const auto &i : PPStack) {
793 hash_combine(h, size_t(i.Kind));
794 hash_combine(h, i.Line);
795 }
796 return h;
797 }
798
799 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
800 // is not null, subtracts its length (plus the preceding space) when computing
801 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
802 // running the token annotator on it so that we can restore them afterward.
mightFitOnOneLine(UnwrappedLine & ParsedLine,const FormatToken * OpeningBrace) const803 bool UnwrappedLineParser::mightFitOnOneLine(
804 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
805 const auto ColumnLimit = Style.ColumnLimit;
806 if (ColumnLimit == 0)
807 return true;
808
809 auto &Tokens = ParsedLine.Tokens;
810 assert(!Tokens.empty());
811
812 const auto *LastToken = Tokens.back().Tok;
813 assert(LastToken);
814
815 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
816
817 int Index = 0;
818 for (const auto &Token : Tokens) {
819 assert(Token.Tok);
820 auto &SavedToken = SavedTokens[Index++];
821 SavedToken.Tok = new FormatToken;
822 SavedToken.Tok->copyFrom(*Token.Tok);
823 SavedToken.Children = std::move(Token.Children);
824 }
825
826 AnnotatedLine Line(ParsedLine);
827 assert(Line.Last == LastToken);
828
829 TokenAnnotator Annotator(Style, Keywords);
830 Annotator.annotate(Line);
831 Annotator.calculateFormattingInformation(Line);
832
833 auto Length = LastToken->TotalLength;
834 if (OpeningBrace) {
835 assert(OpeningBrace != Tokens.front().Tok);
836 if (auto Prev = OpeningBrace->Previous;
837 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
838 Length -= ColumnLimit;
839 }
840 Length -= OpeningBrace->TokenText.size() + 1;
841 }
842
843 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
844 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
845 Length -= FirstToken->TokenText.size() + 1;
846 }
847
848 Index = 0;
849 for (auto &Token : Tokens) {
850 const auto &SavedToken = SavedTokens[Index++];
851 Token.Tok->copyFrom(*SavedToken.Tok);
852 Token.Children = std::move(SavedToken.Children);
853 delete SavedToken.Tok;
854 }
855
856 // If these change PPLevel needs to be used for get correct indentation.
857 assert(!Line.InMacroBody);
858 assert(!Line.InPPDirective);
859 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
860 }
861
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool KeepBraces,IfStmtKind * IfKind,bool UnindentWhitesmithsBraces,bool CanContainBracedList,TokenType NextLBracesType)862 FormatToken *UnwrappedLineParser::parseBlock(
863 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
864 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
865 bool CanContainBracedList, TokenType NextLBracesType) {
866 auto HandleVerilogBlockLabel = [this]() {
867 // ":" name
868 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
869 nextToken();
870 if (Keywords.isVerilogIdentifier(*FormatTok))
871 nextToken();
872 }
873 };
874
875 // Whether this is a Verilog-specific block that has a special header like a
876 // module.
877 const bool VerilogHierarchy =
878 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
879 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
880 (Style.isVerilog() &&
881 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
882 "'{' or macro block token expected");
883 FormatToken *Tok = FormatTok;
884 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
885 auto Index = CurrentLines->size();
886 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
887 FormatTok->setBlockKind(BK_Block);
888
889 // For Whitesmiths mode, jump to the next level prior to skipping over the
890 // braces.
891 if (!VerilogHierarchy && AddLevels > 0 &&
892 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
893 ++Line->Level;
894 }
895
896 size_t PPStartHash = computePPHash();
897
898 const unsigned InitialLevel = Line->Level;
899 if (VerilogHierarchy) {
900 AddLevels += parseVerilogHierarchyHeader();
901 } else {
902 nextToken(/*LevelDifference=*/AddLevels);
903 HandleVerilogBlockLabel();
904 }
905
906 // Bail out if there are too many levels. Otherwise, the stack might overflow.
907 if (Line->Level > 300)
908 return nullptr;
909
910 if (MacroBlock && FormatTok->is(tok::l_paren))
911 parseParens();
912
913 size_t NbPreprocessorDirectives =
914 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
915 addUnwrappedLine();
916 size_t OpeningLineIndex =
917 CurrentLines->empty()
918 ? (UnwrappedLine::kInvalidIndex)
919 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
920
921 // Whitesmiths is weird here. The brace needs to be indented for the namespace
922 // block, but the block itself may not be indented depending on the style
923 // settings. This allows the format to back up one level in those cases.
924 if (UnindentWhitesmithsBraces)
925 --Line->Level;
926
927 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
928 MustBeDeclaration);
929 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
930 Line->Level += AddLevels;
931
932 FormatToken *IfLBrace = nullptr;
933 const bool SimpleBlock =
934 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
935
936 if (eof())
937 return IfLBrace;
938
939 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
940 : !FormatTok->is(tok::r_brace)) {
941 Line->Level = InitialLevel;
942 FormatTok->setBlockKind(BK_Block);
943 return IfLBrace;
944 }
945
946 const bool IsFunctionRBrace =
947 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
948
949 auto RemoveBraces = [=]() mutable {
950 if (!SimpleBlock)
951 return false;
952 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
953 assert(FormatTok->is(tok::r_brace));
954 const bool WrappedOpeningBrace = !Tok->Previous;
955 if (WrappedOpeningBrace && FollowedByComment)
956 return false;
957 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
958 if (KeepBraces && !HasRequiredIfBraces)
959 return false;
960 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
961 const FormatToken *Previous = Tokens->getPreviousToken();
962 assert(Previous);
963 if (Previous->is(tok::r_brace) && !Previous->Optional)
964 return false;
965 }
966 assert(!CurrentLines->empty());
967 auto &LastLine = CurrentLines->back();
968 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
969 return false;
970 if (Tok->is(TT_ElseLBrace))
971 return true;
972 if (WrappedOpeningBrace) {
973 assert(Index > 0);
974 --Index; // The line above the wrapped l_brace.
975 Tok = nullptr;
976 }
977 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
978 };
979 if (RemoveBraces()) {
980 Tok->MatchingParen = FormatTok;
981 FormatTok->MatchingParen = Tok;
982 }
983
984 size_t PPEndHash = computePPHash();
985
986 // Munch the closing brace.
987 nextToken(/*LevelDifference=*/-AddLevels);
988
989 // When this is a function block and there is an unnecessary semicolon
990 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
991 // it later).
992 if (Style.RemoveSemicolon && IsFunctionRBrace) {
993 while (FormatTok->is(tok::semi)) {
994 FormatTok->Optional = true;
995 nextToken();
996 }
997 }
998
999 HandleVerilogBlockLabel();
1000
1001 if (MacroBlock && FormatTok->is(tok::l_paren))
1002 parseParens();
1003
1004 Line->Level = InitialLevel;
1005
1006 if (FormatTok->is(tok::kw_noexcept)) {
1007 // A noexcept in a requires expression.
1008 nextToken();
1009 }
1010
1011 if (FormatTok->is(tok::arrow)) {
1012 // Following the } or noexcept we can find a trailing return type arrow
1013 // as part of an implicit conversion constraint.
1014 nextToken();
1015 parseStructuralElement();
1016 }
1017
1018 if (MunchSemi && FormatTok->is(tok::semi))
1019 nextToken();
1020
1021 if (PPStartHash == PPEndHash) {
1022 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
1023 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
1024 // Update the opening line to add the forward reference as well
1025 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
1026 CurrentLines->size() - 1;
1027 }
1028 }
1029
1030 return IfLBrace;
1031 }
1032
isGoogScope(const UnwrappedLine & Line)1033 static bool isGoogScope(const UnwrappedLine &Line) {
1034 // FIXME: Closure-library specific stuff should not be hard-coded but be
1035 // configurable.
1036 if (Line.Tokens.size() < 4)
1037 return false;
1038 auto I = Line.Tokens.begin();
1039 if (I->Tok->TokenText != "goog")
1040 return false;
1041 ++I;
1042 if (I->Tok->isNot(tok::period))
1043 return false;
1044 ++I;
1045 if (I->Tok->TokenText != "scope")
1046 return false;
1047 ++I;
1048 return I->Tok->is(tok::l_paren);
1049 }
1050
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)1051 static bool isIIFE(const UnwrappedLine &Line,
1052 const AdditionalKeywords &Keywords) {
1053 // Look for the start of an immediately invoked anonymous function.
1054 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
1055 // This is commonly done in JavaScript to create a new, anonymous scope.
1056 // Example: (function() { ... })()
1057 if (Line.Tokens.size() < 3)
1058 return false;
1059 auto I = Line.Tokens.begin();
1060 if (I->Tok->isNot(tok::l_paren))
1061 return false;
1062 ++I;
1063 if (I->Tok->isNot(Keywords.kw_function))
1064 return false;
1065 ++I;
1066 return I->Tok->is(tok::l_paren);
1067 }
1068
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)1069 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
1070 const FormatToken &InitialToken) {
1071 tok::TokenKind Kind = InitialToken.Tok.getKind();
1072 if (InitialToken.is(TT_NamespaceMacro))
1073 Kind = tok::kw_namespace;
1074
1075 switch (Kind) {
1076 case tok::kw_namespace:
1077 return Style.BraceWrapping.AfterNamespace;
1078 case tok::kw_class:
1079 return Style.BraceWrapping.AfterClass;
1080 case tok::kw_union:
1081 return Style.BraceWrapping.AfterUnion;
1082 case tok::kw_struct:
1083 return Style.BraceWrapping.AfterStruct;
1084 case tok::kw_enum:
1085 return Style.BraceWrapping.AfterEnum;
1086 default:
1087 return false;
1088 }
1089 }
1090
parseChildBlock(bool CanContainBracedList,clang::format::TokenType NextLBracesType)1091 void UnwrappedLineParser::parseChildBlock(
1092 bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
1093 assert(FormatTok->is(tok::l_brace));
1094 FormatTok->setBlockKind(BK_Block);
1095 const FormatToken *OpeningBrace = FormatTok;
1096 nextToken();
1097 {
1098 bool SkipIndent = (Style.isJavaScript() &&
1099 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1100 ScopedLineState LineState(*this);
1101 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1102 /*MustBeDeclaration=*/false);
1103 Line->Level += SkipIndent ? 0 : 1;
1104 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
1105 flushComments(isOnNewLine(*FormatTok));
1106 Line->Level -= SkipIndent ? 0 : 1;
1107 }
1108 nextToken();
1109 }
1110
parsePPDirective()1111 void UnwrappedLineParser::parsePPDirective() {
1112 assert(FormatTok->is(tok::hash) && "'#' expected");
1113 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1114
1115 nextToken();
1116
1117 if (!FormatTok->Tok.getIdentifierInfo()) {
1118 parsePPUnknown();
1119 return;
1120 }
1121
1122 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1123 case tok::pp_define:
1124 parsePPDefine();
1125 return;
1126 case tok::pp_if:
1127 parsePPIf(/*IfDef=*/false);
1128 break;
1129 case tok::pp_ifdef:
1130 case tok::pp_ifndef:
1131 parsePPIf(/*IfDef=*/true);
1132 break;
1133 case tok::pp_else:
1134 case tok::pp_elifdef:
1135 case tok::pp_elifndef:
1136 case tok::pp_elif:
1137 parsePPElse();
1138 break;
1139 case tok::pp_endif:
1140 parsePPEndIf();
1141 break;
1142 case tok::pp_pragma:
1143 parsePPPragma();
1144 break;
1145 default:
1146 parsePPUnknown();
1147 break;
1148 }
1149 }
1150
conditionalCompilationCondition(bool Unreachable)1151 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1152 size_t Line = CurrentLines->size();
1153 if (CurrentLines == &PreprocessorDirectives)
1154 Line += Lines.size();
1155
1156 if (Unreachable ||
1157 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1158 PPStack.push_back({PP_Unreachable, Line});
1159 } else {
1160 PPStack.push_back({PP_Conditional, Line});
1161 }
1162 }
1163
conditionalCompilationStart(bool Unreachable)1164 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1165 ++PPBranchLevel;
1166 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1167 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1168 PPLevelBranchIndex.push_back(0);
1169 PPLevelBranchCount.push_back(0);
1170 }
1171 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1172 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1173 conditionalCompilationCondition(Unreachable || Skip);
1174 }
1175
conditionalCompilationAlternative()1176 void UnwrappedLineParser::conditionalCompilationAlternative() {
1177 if (!PPStack.empty())
1178 PPStack.pop_back();
1179 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1180 if (!PPChainBranchIndex.empty())
1181 ++PPChainBranchIndex.top();
1182 conditionalCompilationCondition(
1183 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1184 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1185 }
1186
conditionalCompilationEnd()1187 void UnwrappedLineParser::conditionalCompilationEnd() {
1188 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1189 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1190 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1191 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1192 }
1193 // Guard against #endif's without #if.
1194 if (PPBranchLevel > -1)
1195 --PPBranchLevel;
1196 if (!PPChainBranchIndex.empty())
1197 PPChainBranchIndex.pop();
1198 if (!PPStack.empty())
1199 PPStack.pop_back();
1200 }
1201
parsePPIf(bool IfDef)1202 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1203 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1204 nextToken();
1205 bool Unreachable = false;
1206 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1207 Unreachable = true;
1208 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1209 Unreachable = true;
1210 conditionalCompilationStart(Unreachable);
1211 FormatToken *IfCondition = FormatTok;
1212 // If there's a #ifndef on the first line, and the only lines before it are
1213 // comments, it could be an include guard.
1214 bool MaybeIncludeGuard = IfNDef;
1215 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1216 for (auto &Line : Lines) {
1217 if (!Line.Tokens.front().Tok->is(tok::comment)) {
1218 MaybeIncludeGuard = false;
1219 IncludeGuard = IG_Rejected;
1220 break;
1221 }
1222 }
1223 }
1224 --PPBranchLevel;
1225 parsePPUnknown();
1226 ++PPBranchLevel;
1227 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1228 IncludeGuard = IG_IfNdefed;
1229 IncludeGuardToken = IfCondition;
1230 }
1231 }
1232
parsePPElse()1233 void UnwrappedLineParser::parsePPElse() {
1234 // If a potential include guard has an #else, it's not an include guard.
1235 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1236 IncludeGuard = IG_Rejected;
1237 // Don't crash when there is an #else without an #if.
1238 assert(PPBranchLevel >= -1);
1239 if (PPBranchLevel == -1)
1240 conditionalCompilationStart(/*Unreachable=*/true);
1241 conditionalCompilationAlternative();
1242 --PPBranchLevel;
1243 parsePPUnknown();
1244 ++PPBranchLevel;
1245 }
1246
parsePPEndIf()1247 void UnwrappedLineParser::parsePPEndIf() {
1248 conditionalCompilationEnd();
1249 parsePPUnknown();
1250 // If the #endif of a potential include guard is the last thing in the file,
1251 // then we found an include guard.
1252 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1253 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1254 IncludeGuard = IG_Found;
1255 }
1256 }
1257
parsePPDefine()1258 void UnwrappedLineParser::parsePPDefine() {
1259 nextToken();
1260
1261 if (!FormatTok->Tok.getIdentifierInfo()) {
1262 IncludeGuard = IG_Rejected;
1263 IncludeGuardToken = nullptr;
1264 parsePPUnknown();
1265 return;
1266 }
1267
1268 if (IncludeGuard == IG_IfNdefed &&
1269 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1270 IncludeGuard = IG_Defined;
1271 IncludeGuardToken = nullptr;
1272 for (auto &Line : Lines) {
1273 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1274 IncludeGuard = IG_Rejected;
1275 break;
1276 }
1277 }
1278 }
1279
1280 // In the context of a define, even keywords should be treated as normal
1281 // identifiers. Setting the kind to identifier is not enough, because we need
1282 // to treat additional keywords like __except as well, which are already
1283 // identifiers. Setting the identifier info to null interferes with include
1284 // guard processing above, and changes preprocessing nesting.
1285 FormatTok->Tok.setKind(tok::identifier);
1286 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1287 nextToken();
1288 if (FormatTok->Tok.getKind() == tok::l_paren &&
1289 !FormatTok->hasWhitespaceBefore()) {
1290 parseParens();
1291 }
1292 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1293 Line->Level += PPBranchLevel + 1;
1294 addUnwrappedLine();
1295 ++Line->Level;
1296
1297 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1298 assert((int)Line->PPLevel >= 0);
1299 Line->InMacroBody = true;
1300
1301 // Errors during a preprocessor directive can only affect the layout of the
1302 // preprocessor directive, and thus we ignore them. An alternative approach
1303 // would be to use the same approach we use on the file level (no
1304 // re-indentation if there was a structural error) within the macro
1305 // definition.
1306 parseFile();
1307 }
1308
parsePPPragma()1309 void UnwrappedLineParser::parsePPPragma() {
1310 Line->InPragmaDirective = true;
1311 parsePPUnknown();
1312 }
1313
parsePPUnknown()1314 void UnwrappedLineParser::parsePPUnknown() {
1315 do {
1316 nextToken();
1317 } while (!eof());
1318 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1319 Line->Level += PPBranchLevel + 1;
1320 addUnwrappedLine();
1321 }
1322
1323 // Here we exclude certain tokens that are not usually the first token in an
1324 // unwrapped line. This is used in attempt to distinguish macro calls without
1325 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)1326 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1327 // Semicolon can be a null-statement, l_square can be a start of a macro or
1328 // a C++11 attribute, but this doesn't seem to be common.
1329 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1330 Tok.isNot(TT_AttributeSquare) &&
1331 // Tokens that can only be used as binary operators and a part of
1332 // overloaded operator names.
1333 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1334 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1335 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1336 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1337 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1338 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1339 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1340 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1341 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1342 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1343 Tok.isNot(tok::lesslessequal) &&
1344 // Colon is used in labels, base class lists, initializer lists,
1345 // range-based for loops, ternary operator, but should never be the
1346 // first token in an unwrapped line.
1347 Tok.isNot(tok::colon) &&
1348 // 'noexcept' is a trailing annotation.
1349 Tok.isNot(tok::kw_noexcept);
1350 }
1351
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1352 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1353 const FormatToken *FormatTok) {
1354 // FIXME: This returns true for C/C++ keywords like 'struct'.
1355 return FormatTok->is(tok::identifier) &&
1356 (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1357 !FormatTok->isOneOf(
1358 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1359 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1360 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1361 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1362 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1363 Keywords.kw_instanceof, Keywords.kw_interface,
1364 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1365 }
1366
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1367 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1368 const FormatToken *FormatTok) {
1369 return FormatTok->Tok.isLiteral() ||
1370 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1371 mustBeJSIdent(Keywords, FormatTok);
1372 }
1373
1374 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1375 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1376 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1377 const FormatToken *FormatTok) {
1378 return FormatTok->isOneOf(
1379 tok::kw_return, Keywords.kw_yield,
1380 // conditionals
1381 tok::kw_if, tok::kw_else,
1382 // loops
1383 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1384 // switch/case
1385 tok::kw_switch, tok::kw_case,
1386 // exceptions
1387 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1388 // declaration
1389 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1390 Keywords.kw_async, Keywords.kw_function,
1391 // import/export
1392 Keywords.kw_import, tok::kw_export);
1393 }
1394
1395 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)1396 static bool isC78Type(const FormatToken &Tok) {
1397 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1398 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1399 tok::identifier);
1400 }
1401
1402 // This function checks whether a token starts the first parameter declaration
1403 // in a K&R C (aka C78) function definition, e.g.:
1404 // int f(a, b)
1405 // short a, b;
1406 // {
1407 // return a + b;
1408 // }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1409 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1410 const FormatToken *FuncName) {
1411 assert(Tok);
1412 assert(Next);
1413 assert(FuncName);
1414
1415 if (FuncName->isNot(tok::identifier))
1416 return false;
1417
1418 const FormatToken *Prev = FuncName->Previous;
1419 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1420 return false;
1421
1422 if (!isC78Type(*Tok) &&
1423 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1424 return false;
1425 }
1426
1427 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1428 return false;
1429
1430 Tok = Tok->Previous;
1431 if (!Tok || Tok->isNot(tok::r_paren))
1432 return false;
1433
1434 Tok = Tok->Previous;
1435 if (!Tok || Tok->isNot(tok::identifier))
1436 return false;
1437
1438 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1439 }
1440
parseModuleImport()1441 bool UnwrappedLineParser::parseModuleImport() {
1442 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1443
1444 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1445 !Token->Tok.getIdentifierInfo() &&
1446 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1447 return false;
1448 }
1449
1450 nextToken();
1451 while (!eof()) {
1452 if (FormatTok->is(tok::colon)) {
1453 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1454 }
1455 // Handle import <foo/bar.h> as we would an include statement.
1456 else if (FormatTok->is(tok::less)) {
1457 nextToken();
1458 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1459 // Mark tokens up to the trailing line comments as implicit string
1460 // literals.
1461 if (FormatTok->isNot(tok::comment) &&
1462 !FormatTok->TokenText.startswith("//")) {
1463 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1464 }
1465 nextToken();
1466 }
1467 }
1468 if (FormatTok->is(tok::semi)) {
1469 nextToken();
1470 break;
1471 }
1472 nextToken();
1473 }
1474
1475 addUnwrappedLine();
1476 return true;
1477 }
1478
1479 // readTokenWithJavaScriptASI reads the next token and terminates the current
1480 // line if JavaScript Automatic Semicolon Insertion must
1481 // happen between the current token and the next token.
1482 //
1483 // This method is conservative - it cannot cover all edge cases of JavaScript,
1484 // but only aims to correctly handle certain well known cases. It *must not*
1485 // return true in speculative cases.
readTokenWithJavaScriptASI()1486 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1487 FormatToken *Previous = FormatTok;
1488 readToken();
1489 FormatToken *Next = FormatTok;
1490
1491 bool IsOnSameLine =
1492 CommentsBeforeNextToken.empty()
1493 ? Next->NewlinesBefore == 0
1494 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1495 if (IsOnSameLine)
1496 return;
1497
1498 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1499 bool PreviousStartsTemplateExpr =
1500 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1501 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1502 // If the line contains an '@' sign, the previous token might be an
1503 // annotation, which can precede another identifier/value.
1504 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1505 return LineNode.Tok->is(tok::at);
1506 });
1507 if (HasAt)
1508 return;
1509 }
1510 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1511 return addUnwrappedLine();
1512 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1513 bool NextEndsTemplateExpr =
1514 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1515 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1516 (PreviousMustBeValue ||
1517 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1518 tok::minusminus))) {
1519 return addUnwrappedLine();
1520 }
1521 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1522 isJSDeclOrStmt(Keywords, Next)) {
1523 return addUnwrappedLine();
1524 }
1525 }
1526
parseStructuralElement(bool IsTopLevel,TokenType NextLBracesType,IfStmtKind * IfKind,FormatToken ** IfLeftBrace,bool * HasDoWhile,bool * HasLabel)1527 void UnwrappedLineParser::parseStructuralElement(
1528 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1529 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1530 if (Style.Language == FormatStyle::LK_TableGen &&
1531 FormatTok->is(tok::pp_include)) {
1532 nextToken();
1533 if (FormatTok->is(tok::string_literal))
1534 nextToken();
1535 addUnwrappedLine();
1536 return;
1537 }
1538
1539 if (Style.isVerilog()) {
1540 // Skip things that can exist before keywords like 'if' and 'case'.
1541 while (true) {
1542 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1543 Keywords.kw_unique0)) {
1544 nextToken();
1545 } else if (FormatTok->is(tok::l_paren) &&
1546 Tokens->peekNextToken()->is(tok::star)) {
1547 parseParens();
1548 } else {
1549 break;
1550 }
1551 }
1552 }
1553
1554 // Tokens that only make sense at the beginning of a line.
1555 switch (FormatTok->Tok.getKind()) {
1556 case tok::kw_asm:
1557 nextToken();
1558 if (FormatTok->is(tok::l_brace)) {
1559 FormatTok->setFinalizedType(TT_InlineASMBrace);
1560 nextToken();
1561 while (FormatTok && !eof()) {
1562 if (FormatTok->is(tok::r_brace)) {
1563 FormatTok->setFinalizedType(TT_InlineASMBrace);
1564 nextToken();
1565 addUnwrappedLine();
1566 break;
1567 }
1568 FormatTok->Finalized = true;
1569 nextToken();
1570 }
1571 }
1572 break;
1573 case tok::kw_namespace:
1574 parseNamespace();
1575 return;
1576 case tok::kw_public:
1577 case tok::kw_protected:
1578 case tok::kw_private:
1579 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1580 Style.isCSharp()) {
1581 nextToken();
1582 } else {
1583 parseAccessSpecifier();
1584 }
1585 return;
1586 case tok::kw_if: {
1587 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1588 // field/method declaration.
1589 break;
1590 }
1591 FormatToken *Tok = parseIfThenElse(IfKind);
1592 if (IfLeftBrace)
1593 *IfLeftBrace = Tok;
1594 return;
1595 }
1596 case tok::kw_for:
1597 case tok::kw_while:
1598 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1599 // field/method declaration.
1600 break;
1601 }
1602 parseForOrWhileLoop();
1603 return;
1604 case tok::kw_do:
1605 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1606 // field/method declaration.
1607 break;
1608 }
1609 parseDoWhile();
1610 if (HasDoWhile)
1611 *HasDoWhile = true;
1612 return;
1613 case tok::kw_switch:
1614 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1615 // 'switch: string' field declaration.
1616 break;
1617 }
1618 parseSwitch();
1619 return;
1620 case tok::kw_default:
1621 // In Verilog default along with other labels are handled in the next loop.
1622 if (Style.isVerilog())
1623 break;
1624 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1625 // 'default: string' field declaration.
1626 break;
1627 }
1628 nextToken();
1629 if (FormatTok->is(tok::colon)) {
1630 parseLabel();
1631 return;
1632 }
1633 // e.g. "default void f() {}" in a Java interface.
1634 break;
1635 case tok::kw_case:
1636 // Proto: there are no switch/case statements.
1637 if (Style.isProto()) {
1638 nextToken();
1639 return;
1640 }
1641 if (Style.isVerilog()) {
1642 parseBlock();
1643 addUnwrappedLine();
1644 return;
1645 }
1646 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1647 // 'case: string' field declaration.
1648 nextToken();
1649 break;
1650 }
1651 parseCaseLabel();
1652 return;
1653 case tok::kw_try:
1654 case tok::kw___try:
1655 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1656 // field/method declaration.
1657 break;
1658 }
1659 parseTryCatch();
1660 return;
1661 case tok::kw_extern:
1662 nextToken();
1663 if (Style.isVerilog()) {
1664 // In Verilog and extern module declaration looks like a start of module.
1665 // But there is no body and endmodule. So we handle it separately.
1666 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1667 parseVerilogHierarchyHeader();
1668 return;
1669 }
1670 } else if (FormatTok->is(tok::string_literal)) {
1671 nextToken();
1672 if (FormatTok->is(tok::l_brace)) {
1673 if (Style.BraceWrapping.AfterExternBlock)
1674 addUnwrappedLine();
1675 // Either we indent or for backwards compatibility we follow the
1676 // AfterExternBlock style.
1677 unsigned AddLevels =
1678 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1679 (Style.BraceWrapping.AfterExternBlock &&
1680 Style.IndentExternBlock ==
1681 FormatStyle::IEBS_AfterExternBlock)
1682 ? 1u
1683 : 0u;
1684 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1685 addUnwrappedLine();
1686 return;
1687 }
1688 }
1689 break;
1690 case tok::kw_export:
1691 if (Style.isJavaScript()) {
1692 parseJavaScriptEs6ImportExport();
1693 return;
1694 }
1695 if (Style.isCpp()) {
1696 nextToken();
1697 if (FormatTok->is(tok::kw_namespace)) {
1698 parseNamespace();
1699 return;
1700 }
1701 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1702 return;
1703 }
1704 break;
1705 case tok::kw_inline:
1706 nextToken();
1707 if (FormatTok->is(tok::kw_namespace)) {
1708 parseNamespace();
1709 return;
1710 }
1711 break;
1712 case tok::identifier:
1713 if (FormatTok->is(TT_ForEachMacro)) {
1714 parseForOrWhileLoop();
1715 return;
1716 }
1717 if (FormatTok->is(TT_MacroBlockBegin)) {
1718 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1719 /*MunchSemi=*/false);
1720 return;
1721 }
1722 if (FormatTok->is(Keywords.kw_import)) {
1723 if (Style.isJavaScript()) {
1724 parseJavaScriptEs6ImportExport();
1725 return;
1726 }
1727 if (Style.Language == FormatStyle::LK_Proto) {
1728 nextToken();
1729 if (FormatTok->is(tok::kw_public))
1730 nextToken();
1731 if (!FormatTok->is(tok::string_literal))
1732 return;
1733 nextToken();
1734 if (FormatTok->is(tok::semi))
1735 nextToken();
1736 addUnwrappedLine();
1737 return;
1738 }
1739 if (Style.isCpp() && parseModuleImport())
1740 return;
1741 }
1742 if (Style.isCpp() &&
1743 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1744 Keywords.kw_slots, Keywords.kw_qslots)) {
1745 nextToken();
1746 if (FormatTok->is(tok::colon)) {
1747 nextToken();
1748 addUnwrappedLine();
1749 return;
1750 }
1751 }
1752 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1753 parseStatementMacro();
1754 return;
1755 }
1756 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1757 parseNamespace();
1758 return;
1759 }
1760 // In all other cases, parse the declaration.
1761 break;
1762 default:
1763 break;
1764 }
1765 do {
1766 const FormatToken *Previous = FormatTok->Previous;
1767 switch (FormatTok->Tok.getKind()) {
1768 case tok::at:
1769 nextToken();
1770 if (FormatTok->is(tok::l_brace)) {
1771 nextToken();
1772 parseBracedList();
1773 break;
1774 } else if (Style.Language == FormatStyle::LK_Java &&
1775 FormatTok->is(Keywords.kw_interface)) {
1776 nextToken();
1777 break;
1778 }
1779 switch (FormatTok->Tok.getObjCKeywordID()) {
1780 case tok::objc_public:
1781 case tok::objc_protected:
1782 case tok::objc_package:
1783 case tok::objc_private:
1784 return parseAccessSpecifier();
1785 case tok::objc_interface:
1786 case tok::objc_implementation:
1787 return parseObjCInterfaceOrImplementation();
1788 case tok::objc_protocol:
1789 if (parseObjCProtocol())
1790 return;
1791 break;
1792 case tok::objc_end:
1793 return; // Handled by the caller.
1794 case tok::objc_optional:
1795 case tok::objc_required:
1796 nextToken();
1797 addUnwrappedLine();
1798 return;
1799 case tok::objc_autoreleasepool:
1800 nextToken();
1801 if (FormatTok->is(tok::l_brace)) {
1802 if (Style.BraceWrapping.AfterControlStatement ==
1803 FormatStyle::BWACS_Always) {
1804 addUnwrappedLine();
1805 }
1806 parseBlock();
1807 }
1808 addUnwrappedLine();
1809 return;
1810 case tok::objc_synchronized:
1811 nextToken();
1812 if (FormatTok->is(tok::l_paren)) {
1813 // Skip synchronization object
1814 parseParens();
1815 }
1816 if (FormatTok->is(tok::l_brace)) {
1817 if (Style.BraceWrapping.AfterControlStatement ==
1818 FormatStyle::BWACS_Always) {
1819 addUnwrappedLine();
1820 }
1821 parseBlock();
1822 }
1823 addUnwrappedLine();
1824 return;
1825 case tok::objc_try:
1826 // This branch isn't strictly necessary (the kw_try case below would
1827 // do this too after the tok::at is parsed above). But be explicit.
1828 parseTryCatch();
1829 return;
1830 default:
1831 break;
1832 }
1833 break;
1834 case tok::kw_requires: {
1835 if (Style.isCpp()) {
1836 bool ParsedClause = parseRequires();
1837 if (ParsedClause)
1838 return;
1839 } else {
1840 nextToken();
1841 }
1842 break;
1843 }
1844 case tok::kw_enum:
1845 // Ignore if this is part of "template <enum ...".
1846 if (Previous && Previous->is(tok::less)) {
1847 nextToken();
1848 break;
1849 }
1850
1851 // parseEnum falls through and does not yet add an unwrapped line as an
1852 // enum definition can start a structural element.
1853 if (!parseEnum())
1854 break;
1855 // This only applies for C++.
1856 if (!Style.isCpp()) {
1857 addUnwrappedLine();
1858 return;
1859 }
1860 break;
1861 case tok::kw_typedef:
1862 nextToken();
1863 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1864 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1865 Keywords.kw_CF_CLOSED_ENUM,
1866 Keywords.kw_NS_CLOSED_ENUM)) {
1867 parseEnum();
1868 }
1869 break;
1870 case tok::kw_class:
1871 if (Style.isVerilog()) {
1872 parseBlock();
1873 addUnwrappedLine();
1874 return;
1875 }
1876 [[fallthrough]];
1877 case tok::kw_struct:
1878 case tok::kw_union:
1879 if (parseStructLike())
1880 return;
1881 break;
1882 case tok::period:
1883 nextToken();
1884 // In Java, classes have an implicit static member "class".
1885 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1886 FormatTok->is(tok::kw_class)) {
1887 nextToken();
1888 }
1889 if (Style.isJavaScript() && FormatTok &&
1890 FormatTok->Tok.getIdentifierInfo()) {
1891 // JavaScript only has pseudo keywords, all keywords are allowed to
1892 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1893 nextToken();
1894 }
1895 break;
1896 case tok::semi:
1897 nextToken();
1898 addUnwrappedLine();
1899 return;
1900 case tok::r_brace:
1901 addUnwrappedLine();
1902 return;
1903 case tok::l_paren: {
1904 parseParens();
1905 // Break the unwrapped line if a K&R C function definition has a parameter
1906 // declaration.
1907 if (!IsTopLevel || !Style.isCpp() || !Previous || eof())
1908 break;
1909 if (isC78ParameterDecl(FormatTok,
1910 Tokens->peekNextToken(/*SkipComment=*/true),
1911 Previous)) {
1912 addUnwrappedLine();
1913 return;
1914 }
1915 break;
1916 }
1917 case tok::kw_operator:
1918 nextToken();
1919 if (FormatTok->isBinaryOperator())
1920 nextToken();
1921 break;
1922 case tok::caret:
1923 nextToken();
1924 if (FormatTok->Tok.isAnyIdentifier() ||
1925 FormatTok->isSimpleTypeSpecifier()) {
1926 nextToken();
1927 }
1928 if (FormatTok->is(tok::l_paren))
1929 parseParens();
1930 if (FormatTok->is(tok::l_brace))
1931 parseChildBlock();
1932 break;
1933 case tok::l_brace:
1934 if (NextLBracesType != TT_Unknown)
1935 FormatTok->setFinalizedType(NextLBracesType);
1936 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1937 // A block outside of parentheses must be the last part of a
1938 // structural element.
1939 // FIXME: Figure out cases where this is not true, and add projections
1940 // for them (the one we know is missing are lambdas).
1941 if (Style.Language == FormatStyle::LK_Java &&
1942 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1943 // If necessary, we could set the type to something different than
1944 // TT_FunctionLBrace.
1945 if (Style.BraceWrapping.AfterControlStatement ==
1946 FormatStyle::BWACS_Always) {
1947 addUnwrappedLine();
1948 }
1949 } else if (Style.BraceWrapping.AfterFunction) {
1950 addUnwrappedLine();
1951 }
1952 FormatTok->setFinalizedType(TT_FunctionLBrace);
1953 parseBlock();
1954 addUnwrappedLine();
1955 return;
1956 }
1957 // Otherwise this was a braced init list, and the structural
1958 // element continues.
1959 break;
1960 case tok::kw_try:
1961 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1962 // field/method declaration.
1963 nextToken();
1964 break;
1965 }
1966 // We arrive here when parsing function-try blocks.
1967 if (Style.BraceWrapping.AfterFunction)
1968 addUnwrappedLine();
1969 parseTryCatch();
1970 return;
1971 case tok::identifier: {
1972 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1973 Line->MustBeDeclaration) {
1974 addUnwrappedLine();
1975 parseCSharpGenericTypeConstraint();
1976 break;
1977 }
1978 if (FormatTok->is(TT_MacroBlockEnd)) {
1979 addUnwrappedLine();
1980 return;
1981 }
1982
1983 // Function declarations (as opposed to function expressions) are parsed
1984 // on their own unwrapped line by continuing this loop. Function
1985 // expressions (functions that are not on their own line) must not create
1986 // a new unwrapped line, so they are special cased below.
1987 size_t TokenCount = Line->Tokens.size();
1988 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1989 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1990 Keywords.kw_async)))) {
1991 tryToParseJSFunction();
1992 break;
1993 }
1994 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1995 FormatTok->is(Keywords.kw_interface)) {
1996 if (Style.isJavaScript()) {
1997 // In JavaScript/TypeScript, "interface" can be used as a standalone
1998 // identifier, e.g. in `var interface = 1;`. If "interface" is
1999 // followed by another identifier, it is very like to be an actual
2000 // interface declaration.
2001 unsigned StoredPosition = Tokens->getPosition();
2002 FormatToken *Next = Tokens->getNextToken();
2003 FormatTok = Tokens->setPosition(StoredPosition);
2004 if (!mustBeJSIdent(Keywords, Next)) {
2005 nextToken();
2006 break;
2007 }
2008 }
2009 parseRecord();
2010 addUnwrappedLine();
2011 return;
2012 }
2013
2014 if (Style.isVerilog()) {
2015 if (FormatTok->is(Keywords.kw_table)) {
2016 parseVerilogTable();
2017 return;
2018 }
2019 if (Keywords.isVerilogBegin(*FormatTok) ||
2020 Keywords.isVerilogHierarchy(*FormatTok)) {
2021 parseBlock();
2022 addUnwrappedLine();
2023 return;
2024 }
2025 }
2026
2027 if (FormatTok->is(Keywords.kw_interface)) {
2028 if (parseStructLike())
2029 return;
2030 break;
2031 }
2032
2033 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
2034 parseStatementMacro();
2035 return;
2036 }
2037
2038 // See if the following token should start a new unwrapped line.
2039 StringRef Text = FormatTok->TokenText;
2040
2041 FormatToken *PreviousToken = FormatTok;
2042 nextToken();
2043
2044 // JS doesn't have macros, and within classes colons indicate fields, not
2045 // labels.
2046 if (Style.isJavaScript())
2047 break;
2048
2049 auto OneTokenSoFar = [&]() {
2050 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2051 while (I != E && I->Tok->is(tok::comment))
2052 ++I;
2053 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
2054 ++I;
2055 return I != E && (++I == E);
2056 };
2057 if (OneTokenSoFar()) {
2058 // In Verilog labels can be any expression, so we don't do them here.
2059 if (!Style.isVerilog() && FormatTok->is(tok::colon) &&
2060 !Line->MustBeDeclaration) {
2061 Line->Tokens.begin()->Tok->MustBreakBefore = true;
2062 parseLabel(!Style.IndentGotoLabels);
2063 if (HasLabel)
2064 *HasLabel = true;
2065 return;
2066 }
2067 // Recognize function-like macro usages without trailing semicolon as
2068 // well as free-standing macros like Q_OBJECT.
2069 bool FunctionLike = FormatTok->is(tok::l_paren);
2070 if (FunctionLike)
2071 parseParens();
2072
2073 bool FollowedByNewline =
2074 CommentsBeforeNextToken.empty()
2075 ? FormatTok->NewlinesBefore > 0
2076 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2077
2078 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2079 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2080 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2081 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2082 addUnwrappedLine();
2083 return;
2084 }
2085 }
2086 break;
2087 }
2088 case tok::equal:
2089 if ((Style.isJavaScript() || Style.isCSharp()) &&
2090 FormatTok->is(TT_FatArrow)) {
2091 tryToParseChildBlock();
2092 break;
2093 }
2094
2095 nextToken();
2096 if (FormatTok->is(tok::l_brace)) {
2097 // Block kind should probably be set to BK_BracedInit for any language.
2098 // C# needs this change to ensure that array initialisers and object
2099 // initialisers are indented the same way.
2100 if (Style.isCSharp())
2101 FormatTok->setBlockKind(BK_BracedInit);
2102 nextToken();
2103 parseBracedList();
2104 } else if (Style.Language == FormatStyle::LK_Proto &&
2105 FormatTok->is(tok::less)) {
2106 nextToken();
2107 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2108 /*ClosingBraceKind=*/tok::greater);
2109 }
2110 break;
2111 case tok::l_square:
2112 parseSquare();
2113 break;
2114 case tok::kw_new:
2115 parseNew();
2116 break;
2117 case tok::kw_case:
2118 // Proto: there are no switch/case statements.
2119 if (Style.isProto()) {
2120 nextToken();
2121 return;
2122 }
2123 // In Verilog switch is called case.
2124 if (Style.isVerilog()) {
2125 parseBlock();
2126 addUnwrappedLine();
2127 return;
2128 }
2129 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2130 // 'case: string' field declaration.
2131 nextToken();
2132 break;
2133 }
2134 parseCaseLabel();
2135 break;
2136 case tok::kw_default:
2137 nextToken();
2138 if (Style.isVerilog()) {
2139 if (FormatTok->is(tok::colon)) {
2140 // The label will be handled in the next iteration.
2141 break;
2142 }
2143 if (FormatTok->is(Keywords.kw_clocking)) {
2144 // A default clocking block.
2145 parseBlock();
2146 addUnwrappedLine();
2147 return;
2148 }
2149 parseVerilogCaseLabel();
2150 return;
2151 }
2152 break;
2153 case tok::colon:
2154 nextToken();
2155 if (Style.isVerilog()) {
2156 parseVerilogCaseLabel();
2157 return;
2158 }
2159 break;
2160 default:
2161 nextToken();
2162 break;
2163 }
2164 } while (!eof());
2165 }
2166
tryToParsePropertyAccessor()2167 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2168 assert(FormatTok->is(tok::l_brace));
2169 if (!Style.isCSharp())
2170 return false;
2171 // See if it's a property accessor.
2172 if (FormatTok->Previous->isNot(tok::identifier))
2173 return false;
2174
2175 // See if we are inside a property accessor.
2176 //
2177 // Record the current tokenPosition so that we can advance and
2178 // reset the current token. `Next` is not set yet so we need
2179 // another way to advance along the token stream.
2180 unsigned int StoredPosition = Tokens->getPosition();
2181 FormatToken *Tok = Tokens->getNextToken();
2182
2183 // A trivial property accessor is of the form:
2184 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2185 // Track these as they do not require line breaks to be introduced.
2186 bool HasSpecialAccessor = false;
2187 bool IsTrivialPropertyAccessor = true;
2188 while (!eof()) {
2189 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2190 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2191 Keywords.kw_init, Keywords.kw_set)) {
2192 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2193 HasSpecialAccessor = true;
2194 Tok = Tokens->getNextToken();
2195 continue;
2196 }
2197 if (Tok->isNot(tok::r_brace))
2198 IsTrivialPropertyAccessor = false;
2199 break;
2200 }
2201
2202 if (!HasSpecialAccessor) {
2203 Tokens->setPosition(StoredPosition);
2204 return false;
2205 }
2206
2207 // Try to parse the property accessor:
2208 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2209 Tokens->setPosition(StoredPosition);
2210 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2211 addUnwrappedLine();
2212 nextToken();
2213 do {
2214 switch (FormatTok->Tok.getKind()) {
2215 case tok::r_brace:
2216 nextToken();
2217 if (FormatTok->is(tok::equal)) {
2218 while (!eof() && FormatTok->isNot(tok::semi))
2219 nextToken();
2220 nextToken();
2221 }
2222 addUnwrappedLine();
2223 return true;
2224 case tok::l_brace:
2225 ++Line->Level;
2226 parseBlock(/*MustBeDeclaration=*/true);
2227 addUnwrappedLine();
2228 --Line->Level;
2229 break;
2230 case tok::equal:
2231 if (FormatTok->is(TT_FatArrow)) {
2232 ++Line->Level;
2233 do {
2234 nextToken();
2235 } while (!eof() && FormatTok->isNot(tok::semi));
2236 nextToken();
2237 addUnwrappedLine();
2238 --Line->Level;
2239 break;
2240 }
2241 nextToken();
2242 break;
2243 default:
2244 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2245 Keywords.kw_set) &&
2246 !IsTrivialPropertyAccessor) {
2247 // Non-trivial get/set needs to be on its own line.
2248 addUnwrappedLine();
2249 }
2250 nextToken();
2251 }
2252 } while (!eof());
2253
2254 // Unreachable for well-formed code (paired '{' and '}').
2255 return true;
2256 }
2257
tryToParseLambda()2258 bool UnwrappedLineParser::tryToParseLambda() {
2259 assert(FormatTok->is(tok::l_square));
2260 if (!Style.isCpp()) {
2261 nextToken();
2262 return false;
2263 }
2264 FormatToken &LSquare = *FormatTok;
2265 if (!tryToParseLambdaIntroducer())
2266 return false;
2267
2268 bool SeenArrow = false;
2269 bool InTemplateParameterList = false;
2270
2271 while (FormatTok->isNot(tok::l_brace)) {
2272 if (FormatTok->isSimpleTypeSpecifier()) {
2273 nextToken();
2274 continue;
2275 }
2276 switch (FormatTok->Tok.getKind()) {
2277 case tok::l_brace:
2278 break;
2279 case tok::l_paren:
2280 parseParens();
2281 break;
2282 case tok::l_square:
2283 parseSquare();
2284 break;
2285 case tok::less:
2286 assert(FormatTok->Previous);
2287 if (FormatTok->Previous->is(tok::r_square))
2288 InTemplateParameterList = true;
2289 nextToken();
2290 break;
2291 case tok::kw_auto:
2292 case tok::kw_class:
2293 case tok::kw_template:
2294 case tok::kw_typename:
2295 case tok::amp:
2296 case tok::star:
2297 case tok::kw_const:
2298 case tok::kw_constexpr:
2299 case tok::kw_consteval:
2300 case tok::comma:
2301 case tok::greater:
2302 case tok::identifier:
2303 case tok::numeric_constant:
2304 case tok::coloncolon:
2305 case tok::kw_mutable:
2306 case tok::kw_noexcept:
2307 case tok::kw_static:
2308 nextToken();
2309 break;
2310 // Specialization of a template with an integer parameter can contain
2311 // arithmetic, logical, comparison and ternary operators.
2312 //
2313 // FIXME: This also accepts sequences of operators that are not in the scope
2314 // of a template argument list.
2315 //
2316 // In a C++ lambda a template type can only occur after an arrow. We use
2317 // this as an heuristic to distinguish between Objective-C expressions
2318 // followed by an `a->b` expression, such as:
2319 // ([obj func:arg] + a->b)
2320 // Otherwise the code below would parse as a lambda.
2321 //
2322 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2323 // explicit template lists: []<bool b = true && false>(U &&u){}
2324 case tok::plus:
2325 case tok::minus:
2326 case tok::exclaim:
2327 case tok::tilde:
2328 case tok::slash:
2329 case tok::percent:
2330 case tok::lessless:
2331 case tok::pipe:
2332 case tok::pipepipe:
2333 case tok::ampamp:
2334 case tok::caret:
2335 case tok::equalequal:
2336 case tok::exclaimequal:
2337 case tok::greaterequal:
2338 case tok::lessequal:
2339 case tok::question:
2340 case tok::colon:
2341 case tok::ellipsis:
2342 case tok::kw_true:
2343 case tok::kw_false:
2344 if (SeenArrow || InTemplateParameterList) {
2345 nextToken();
2346 break;
2347 }
2348 return true;
2349 case tok::arrow:
2350 // This might or might not actually be a lambda arrow (this could be an
2351 // ObjC method invocation followed by a dereferencing arrow). We might
2352 // reset this back to TT_Unknown in TokenAnnotator.
2353 FormatTok->setFinalizedType(TT_LambdaArrow);
2354 SeenArrow = true;
2355 nextToken();
2356 break;
2357 default:
2358 return true;
2359 }
2360 }
2361 FormatTok->setFinalizedType(TT_LambdaLBrace);
2362 LSquare.setFinalizedType(TT_LambdaLSquare);
2363 parseChildBlock();
2364 return true;
2365 }
2366
tryToParseLambdaIntroducer()2367 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2368 const FormatToken *Previous = FormatTok->Previous;
2369 const FormatToken *LeftSquare = FormatTok;
2370 nextToken();
2371 if (Previous &&
2372 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2373 tok::kw_delete, tok::l_square) ||
2374 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2375 Previous->isSimpleTypeSpecifier())) {
2376 return false;
2377 }
2378 if (FormatTok->is(tok::l_square))
2379 return false;
2380 if (FormatTok->is(tok::r_square)) {
2381 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2382 if (Next->is(tok::greater))
2383 return false;
2384 }
2385 parseSquare(/*LambdaIntroducer=*/true);
2386 return true;
2387 }
2388
tryToParseJSFunction()2389 void UnwrappedLineParser::tryToParseJSFunction() {
2390 assert(FormatTok->is(Keywords.kw_function) ||
2391 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2392 if (FormatTok->is(Keywords.kw_async))
2393 nextToken();
2394 // Consume "function".
2395 nextToken();
2396
2397 // Consume * (generator function). Treat it like C++'s overloaded operators.
2398 if (FormatTok->is(tok::star)) {
2399 FormatTok->setFinalizedType(TT_OverloadedOperator);
2400 nextToken();
2401 }
2402
2403 // Consume function name.
2404 if (FormatTok->is(tok::identifier))
2405 nextToken();
2406
2407 if (FormatTok->isNot(tok::l_paren))
2408 return;
2409
2410 // Parse formal parameter list.
2411 parseParens();
2412
2413 if (FormatTok->is(tok::colon)) {
2414 // Parse a type definition.
2415 nextToken();
2416
2417 // Eat the type declaration. For braced inline object types, balance braces,
2418 // otherwise just parse until finding an l_brace for the function body.
2419 if (FormatTok->is(tok::l_brace))
2420 tryToParseBracedList();
2421 else
2422 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2423 nextToken();
2424 }
2425
2426 if (FormatTok->is(tok::semi))
2427 return;
2428
2429 parseChildBlock();
2430 }
2431
tryToParseBracedList()2432 bool UnwrappedLineParser::tryToParseBracedList() {
2433 if (FormatTok->is(BK_Unknown))
2434 calculateBraceTypes();
2435 assert(FormatTok->isNot(BK_Unknown));
2436 if (FormatTok->is(BK_Block))
2437 return false;
2438 nextToken();
2439 parseBracedList();
2440 return true;
2441 }
2442
tryToParseChildBlock()2443 bool UnwrappedLineParser::tryToParseChildBlock() {
2444 assert(Style.isJavaScript() || Style.isCSharp());
2445 assert(FormatTok->is(TT_FatArrow));
2446 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2447 // They always start an expression or a child block if followed by a curly
2448 // brace.
2449 nextToken();
2450 if (FormatTok->isNot(tok::l_brace))
2451 return false;
2452 parseChildBlock();
2453 return true;
2454 }
2455
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)2456 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2457 bool IsEnum,
2458 tok::TokenKind ClosingBraceKind) {
2459 bool HasError = false;
2460
2461 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2462 // replace this by using parseAssignmentExpression() inside.
2463 do {
2464 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2465 tryToParseChildBlock()) {
2466 continue;
2467 }
2468 if (Style.isJavaScript()) {
2469 if (FormatTok->is(Keywords.kw_function) ||
2470 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2471 tryToParseJSFunction();
2472 continue;
2473 }
2474 if (FormatTok->is(tok::l_brace)) {
2475 // Could be a method inside of a braced list `{a() { return 1; }}`.
2476 if (tryToParseBracedList())
2477 continue;
2478 parseChildBlock();
2479 }
2480 }
2481 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2482 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2483 addUnwrappedLine();
2484 nextToken();
2485 return !HasError;
2486 }
2487 switch (FormatTok->Tok.getKind()) {
2488 case tok::l_square:
2489 if (Style.isCSharp())
2490 parseSquare();
2491 else
2492 tryToParseLambda();
2493 break;
2494 case tok::l_paren:
2495 parseParens();
2496 // JavaScript can just have free standing methods and getters/setters in
2497 // object literals. Detect them by a "{" following ")".
2498 if (Style.isJavaScript()) {
2499 if (FormatTok->is(tok::l_brace))
2500 parseChildBlock();
2501 break;
2502 }
2503 break;
2504 case tok::l_brace:
2505 // Assume there are no blocks inside a braced init list apart
2506 // from the ones we explicitly parse out (like lambdas).
2507 FormatTok->setBlockKind(BK_BracedInit);
2508 nextToken();
2509 parseBracedList();
2510 break;
2511 case tok::less:
2512 if (Style.Language == FormatStyle::LK_Proto ||
2513 ClosingBraceKind == tok::greater) {
2514 nextToken();
2515 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2516 /*ClosingBraceKind=*/tok::greater);
2517 } else {
2518 nextToken();
2519 }
2520 break;
2521 case tok::semi:
2522 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2523 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2524 // used for error recovery if we have otherwise determined that this is
2525 // a braced list.
2526 if (Style.isJavaScript()) {
2527 nextToken();
2528 break;
2529 }
2530 HasError = true;
2531 if (!ContinueOnSemicolons)
2532 return !HasError;
2533 nextToken();
2534 break;
2535 case tok::comma:
2536 nextToken();
2537 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2538 addUnwrappedLine();
2539 break;
2540 default:
2541 nextToken();
2542 break;
2543 }
2544 } while (!eof());
2545 return false;
2546 }
2547
2548 /// \brief Parses a pair of parentheses (and everything between them).
2549 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2550 /// double ampersands. This only counts for the current parens scope.
parseParens(TokenType AmpAmpTokenType)2551 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2552 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2553 nextToken();
2554 do {
2555 switch (FormatTok->Tok.getKind()) {
2556 case tok::l_paren:
2557 parseParens();
2558 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2559 parseChildBlock();
2560 break;
2561 case tok::r_paren:
2562 nextToken();
2563 return;
2564 case tok::r_brace:
2565 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2566 return;
2567 case tok::l_square:
2568 tryToParseLambda();
2569 break;
2570 case tok::l_brace:
2571 if (!tryToParseBracedList())
2572 parseChildBlock();
2573 break;
2574 case tok::at:
2575 nextToken();
2576 if (FormatTok->is(tok::l_brace)) {
2577 nextToken();
2578 parseBracedList();
2579 }
2580 break;
2581 case tok::equal:
2582 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2583 tryToParseChildBlock();
2584 else
2585 nextToken();
2586 break;
2587 case tok::kw_class:
2588 if (Style.isJavaScript())
2589 parseRecord(/*ParseAsExpr=*/true);
2590 else
2591 nextToken();
2592 break;
2593 case tok::identifier:
2594 if (Style.isJavaScript() &&
2595 (FormatTok->is(Keywords.kw_function) ||
2596 FormatTok->startsSequence(Keywords.kw_async,
2597 Keywords.kw_function))) {
2598 tryToParseJSFunction();
2599 } else {
2600 nextToken();
2601 }
2602 break;
2603 case tok::kw_requires: {
2604 auto RequiresToken = FormatTok;
2605 nextToken();
2606 parseRequiresExpression(RequiresToken);
2607 break;
2608 }
2609 case tok::ampamp:
2610 if (AmpAmpTokenType != TT_Unknown)
2611 FormatTok->setFinalizedType(AmpAmpTokenType);
2612 [[fallthrough]];
2613 default:
2614 nextToken();
2615 break;
2616 }
2617 } while (!eof());
2618 }
2619
parseSquare(bool LambdaIntroducer)2620 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2621 if (!LambdaIntroducer) {
2622 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2623 if (tryToParseLambda())
2624 return;
2625 }
2626 do {
2627 switch (FormatTok->Tok.getKind()) {
2628 case tok::l_paren:
2629 parseParens();
2630 break;
2631 case tok::r_square:
2632 nextToken();
2633 return;
2634 case tok::r_brace:
2635 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2636 return;
2637 case tok::l_square:
2638 parseSquare();
2639 break;
2640 case tok::l_brace: {
2641 if (!tryToParseBracedList())
2642 parseChildBlock();
2643 break;
2644 }
2645 case tok::at:
2646 nextToken();
2647 if (FormatTok->is(tok::l_brace)) {
2648 nextToken();
2649 parseBracedList();
2650 }
2651 break;
2652 default:
2653 nextToken();
2654 break;
2655 }
2656 } while (!eof());
2657 }
2658
keepAncestorBraces()2659 void UnwrappedLineParser::keepAncestorBraces() {
2660 if (!Style.RemoveBracesLLVM)
2661 return;
2662
2663 const int MaxNestingLevels = 2;
2664 const int Size = NestedTooDeep.size();
2665 if (Size >= MaxNestingLevels)
2666 NestedTooDeep[Size - MaxNestingLevels] = true;
2667 NestedTooDeep.push_back(false);
2668 }
2669
getLastNonComment(const UnwrappedLine & Line)2670 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2671 for (const auto &Token : llvm::reverse(Line.Tokens))
2672 if (Token.Tok->isNot(tok::comment))
2673 return Token.Tok;
2674
2675 return nullptr;
2676 }
2677
parseUnbracedBody(bool CheckEOF)2678 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2679 FormatToken *Tok = nullptr;
2680
2681 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2682 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2683 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2684 ? getLastNonComment(*Line)
2685 : Line->Tokens.back().Tok;
2686 assert(Tok);
2687 if (Tok->BraceCount < 0) {
2688 assert(Tok->BraceCount == -1);
2689 Tok = nullptr;
2690 } else {
2691 Tok->BraceCount = -1;
2692 }
2693 }
2694
2695 addUnwrappedLine();
2696 ++Line->Level;
2697 parseStructuralElement();
2698
2699 if (Tok) {
2700 assert(!Line->InPPDirective);
2701 Tok = nullptr;
2702 for (const auto &L : llvm::reverse(*CurrentLines)) {
2703 if (!L.InPPDirective && getLastNonComment(L)) {
2704 Tok = L.Tokens.back().Tok;
2705 break;
2706 }
2707 }
2708 assert(Tok);
2709 ++Tok->BraceCount;
2710 }
2711
2712 if (CheckEOF && eof())
2713 addUnwrappedLine();
2714
2715 --Line->Level;
2716 }
2717
markOptionalBraces(FormatToken * LeftBrace)2718 static void markOptionalBraces(FormatToken *LeftBrace) {
2719 if (!LeftBrace)
2720 return;
2721
2722 assert(LeftBrace->is(tok::l_brace));
2723
2724 FormatToken *RightBrace = LeftBrace->MatchingParen;
2725 if (!RightBrace) {
2726 assert(!LeftBrace->Optional);
2727 return;
2728 }
2729
2730 assert(RightBrace->is(tok::r_brace));
2731 assert(RightBrace->MatchingParen == LeftBrace);
2732 assert(LeftBrace->Optional == RightBrace->Optional);
2733
2734 LeftBrace->Optional = true;
2735 RightBrace->Optional = true;
2736 }
2737
handleAttributes()2738 void UnwrappedLineParser::handleAttributes() {
2739 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2740 if (FormatTok->is(TT_AttributeMacro))
2741 nextToken();
2742 if (FormatTok->is(tok::l_square))
2743 handleCppAttributes();
2744 }
2745
handleCppAttributes()2746 bool UnwrappedLineParser::handleCppAttributes() {
2747 // Handle [[likely]] / [[unlikely]] attributes.
2748 assert(FormatTok->is(tok::l_square));
2749 if (!tryToParseSimpleAttribute())
2750 return false;
2751 parseSquare();
2752 return true;
2753 }
2754
2755 /// Returns whether \c Tok begins a block.
isBlockBegin(const FormatToken & Tok) const2756 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2757 // FIXME: rename the function or make
2758 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2759 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2760 : Tok.is(tok::l_brace);
2761 }
2762
parseIfThenElse(IfStmtKind * IfKind,bool KeepBraces)2763 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2764 bool KeepBraces) {
2765 assert(FormatTok->is(tok::kw_if) && "'if' expected");
2766 nextToken();
2767 if (FormatTok->is(tok::exclaim))
2768 nextToken();
2769
2770 bool KeepIfBraces = true;
2771 if (FormatTok->is(tok::kw_consteval)) {
2772 nextToken();
2773 } else {
2774 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2775 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2776 nextToken();
2777 if (FormatTok->is(tok::l_paren))
2778 parseParens();
2779 }
2780 handleAttributes();
2781
2782 bool NeedsUnwrappedLine = false;
2783 keepAncestorBraces();
2784
2785 FormatToken *IfLeftBrace = nullptr;
2786 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2787
2788 if (isBlockBegin(*FormatTok)) {
2789 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2790 IfLeftBrace = FormatTok;
2791 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2792 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2793 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2794 if (Style.BraceWrapping.BeforeElse)
2795 addUnwrappedLine();
2796 else
2797 NeedsUnwrappedLine = true;
2798 } else {
2799 parseUnbracedBody();
2800 }
2801
2802 if (Style.RemoveBracesLLVM) {
2803 assert(!NestedTooDeep.empty());
2804 KeepIfBraces = KeepIfBraces ||
2805 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2806 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2807 IfBlockKind == IfStmtKind::IfElseIf;
2808 }
2809
2810 bool KeepElseBraces = KeepIfBraces;
2811 FormatToken *ElseLeftBrace = nullptr;
2812 IfStmtKind Kind = IfStmtKind::IfOnly;
2813
2814 if (FormatTok->is(tok::kw_else)) {
2815 if (Style.RemoveBracesLLVM) {
2816 NestedTooDeep.back() = false;
2817 Kind = IfStmtKind::IfElse;
2818 }
2819 nextToken();
2820 handleAttributes();
2821 if (isBlockBegin(*FormatTok)) {
2822 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2823 FormatTok->setFinalizedType(TT_ElseLBrace);
2824 ElseLeftBrace = FormatTok;
2825 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2826 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2827 FormatToken *IfLBrace =
2828 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2829 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2830 if (FormatTok->is(tok::kw_else)) {
2831 KeepElseBraces = KeepElseBraces ||
2832 ElseBlockKind == IfStmtKind::IfOnly ||
2833 ElseBlockKind == IfStmtKind::IfElseIf;
2834 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2835 KeepElseBraces = true;
2836 assert(ElseLeftBrace->MatchingParen);
2837 markOptionalBraces(ElseLeftBrace);
2838 }
2839 addUnwrappedLine();
2840 } else if (FormatTok->is(tok::kw_if)) {
2841 const FormatToken *Previous = Tokens->getPreviousToken();
2842 assert(Previous);
2843 const bool IsPrecededByComment = Previous->is(tok::comment);
2844 if (IsPrecededByComment) {
2845 addUnwrappedLine();
2846 ++Line->Level;
2847 }
2848 bool TooDeep = true;
2849 if (Style.RemoveBracesLLVM) {
2850 Kind = IfStmtKind::IfElseIf;
2851 TooDeep = NestedTooDeep.pop_back_val();
2852 }
2853 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2854 if (Style.RemoveBracesLLVM)
2855 NestedTooDeep.push_back(TooDeep);
2856 if (IsPrecededByComment)
2857 --Line->Level;
2858 } else {
2859 parseUnbracedBody(/*CheckEOF=*/true);
2860 }
2861 } else {
2862 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2863 if (NeedsUnwrappedLine)
2864 addUnwrappedLine();
2865 }
2866
2867 if (!Style.RemoveBracesLLVM)
2868 return nullptr;
2869
2870 assert(!NestedTooDeep.empty());
2871 KeepElseBraces = KeepElseBraces ||
2872 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2873 NestedTooDeep.back();
2874
2875 NestedTooDeep.pop_back();
2876
2877 if (!KeepIfBraces && !KeepElseBraces) {
2878 markOptionalBraces(IfLeftBrace);
2879 markOptionalBraces(ElseLeftBrace);
2880 } else if (IfLeftBrace) {
2881 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2882 if (IfRightBrace) {
2883 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2884 assert(!IfLeftBrace->Optional);
2885 assert(!IfRightBrace->Optional);
2886 IfLeftBrace->MatchingParen = nullptr;
2887 IfRightBrace->MatchingParen = nullptr;
2888 }
2889 }
2890
2891 if (IfKind)
2892 *IfKind = Kind;
2893
2894 return IfLeftBrace;
2895 }
2896
parseTryCatch()2897 void UnwrappedLineParser::parseTryCatch() {
2898 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2899 nextToken();
2900 bool NeedsUnwrappedLine = false;
2901 if (FormatTok->is(tok::colon)) {
2902 // We are in a function try block, what comes is an initializer list.
2903 nextToken();
2904
2905 // In case identifiers were removed by clang-tidy, what might follow is
2906 // multiple commas in sequence - before the first identifier.
2907 while (FormatTok->is(tok::comma))
2908 nextToken();
2909
2910 while (FormatTok->is(tok::identifier)) {
2911 nextToken();
2912 if (FormatTok->is(tok::l_paren))
2913 parseParens();
2914 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2915 FormatTok->is(tok::l_brace)) {
2916 do {
2917 nextToken();
2918 } while (!FormatTok->is(tok::r_brace));
2919 nextToken();
2920 }
2921
2922 // In case identifiers were removed by clang-tidy, what might follow is
2923 // multiple commas in sequence - after the first identifier.
2924 while (FormatTok->is(tok::comma))
2925 nextToken();
2926 }
2927 }
2928 // Parse try with resource.
2929 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2930 parseParens();
2931
2932 keepAncestorBraces();
2933
2934 if (FormatTok->is(tok::l_brace)) {
2935 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2936 parseBlock();
2937 if (Style.BraceWrapping.BeforeCatch)
2938 addUnwrappedLine();
2939 else
2940 NeedsUnwrappedLine = true;
2941 } else if (!FormatTok->is(tok::kw_catch)) {
2942 // The C++ standard requires a compound-statement after a try.
2943 // If there's none, we try to assume there's a structuralElement
2944 // and try to continue.
2945 addUnwrappedLine();
2946 ++Line->Level;
2947 parseStructuralElement();
2948 --Line->Level;
2949 }
2950 while (true) {
2951 if (FormatTok->is(tok::at))
2952 nextToken();
2953 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2954 tok::kw___finally) ||
2955 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2956 FormatTok->is(Keywords.kw_finally)) ||
2957 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2958 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2959 break;
2960 }
2961 nextToken();
2962 while (FormatTok->isNot(tok::l_brace)) {
2963 if (FormatTok->is(tok::l_paren)) {
2964 parseParens();
2965 continue;
2966 }
2967 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2968 if (Style.RemoveBracesLLVM)
2969 NestedTooDeep.pop_back();
2970 return;
2971 }
2972 nextToken();
2973 }
2974 NeedsUnwrappedLine = false;
2975 Line->MustBeDeclaration = false;
2976 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2977 parseBlock();
2978 if (Style.BraceWrapping.BeforeCatch)
2979 addUnwrappedLine();
2980 else
2981 NeedsUnwrappedLine = true;
2982 }
2983
2984 if (Style.RemoveBracesLLVM)
2985 NestedTooDeep.pop_back();
2986
2987 if (NeedsUnwrappedLine)
2988 addUnwrappedLine();
2989 }
2990
parseNamespace()2991 void UnwrappedLineParser::parseNamespace() {
2992 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2993 "'namespace' expected");
2994
2995 const FormatToken &InitialToken = *FormatTok;
2996 nextToken();
2997 if (InitialToken.is(TT_NamespaceMacro)) {
2998 parseParens();
2999 } else {
3000 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3001 tok::l_square, tok::period, tok::l_paren) ||
3002 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3003 if (FormatTok->is(tok::l_square))
3004 parseSquare();
3005 else if (FormatTok->is(tok::l_paren))
3006 parseParens();
3007 else
3008 nextToken();
3009 }
3010 }
3011 if (FormatTok->is(tok::l_brace)) {
3012 if (ShouldBreakBeforeBrace(Style, InitialToken))
3013 addUnwrappedLine();
3014
3015 unsigned AddLevels =
3016 Style.NamespaceIndentation == FormatStyle::NI_All ||
3017 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3018 DeclarationScopeStack.size() > 1)
3019 ? 1u
3020 : 0u;
3021 bool ManageWhitesmithsBraces =
3022 AddLevels == 0u &&
3023 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3024
3025 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3026 // the whole block.
3027 if (ManageWhitesmithsBraces)
3028 ++Line->Level;
3029
3030 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3031 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3032 ManageWhitesmithsBraces);
3033
3034 // Munch the semicolon after a namespace. This is more common than one would
3035 // think. Putting the semicolon into its own line is very ugly.
3036 if (FormatTok->is(tok::semi))
3037 nextToken();
3038
3039 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3040
3041 if (ManageWhitesmithsBraces)
3042 --Line->Level;
3043 }
3044 // FIXME: Add error handling.
3045 }
3046
parseNew()3047 void UnwrappedLineParser::parseNew() {
3048 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3049 nextToken();
3050
3051 if (Style.isCSharp()) {
3052 do {
3053 // Handle constructor invocation, e.g. `new(field: value)`.
3054 if (FormatTok->is(tok::l_paren))
3055 parseParens();
3056
3057 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3058 if (FormatTok->is(tok::l_brace))
3059 parseBracedList();
3060
3061 if (FormatTok->isOneOf(tok::semi, tok::comma))
3062 return;
3063
3064 nextToken();
3065 } while (!eof());
3066 }
3067
3068 if (Style.Language != FormatStyle::LK_Java)
3069 return;
3070
3071 // In Java, we can parse everything up to the parens, which aren't optional.
3072 do {
3073 // There should not be a ;, { or } before the new's open paren.
3074 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3075 return;
3076
3077 // Consume the parens.
3078 if (FormatTok->is(tok::l_paren)) {
3079 parseParens();
3080
3081 // If there is a class body of an anonymous class, consume that as child.
3082 if (FormatTok->is(tok::l_brace))
3083 parseChildBlock();
3084 return;
3085 }
3086 nextToken();
3087 } while (!eof());
3088 }
3089
parseLoopBody(bool KeepBraces,bool WrapRightBrace)3090 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3091 keepAncestorBraces();
3092
3093 if (isBlockBegin(*FormatTok)) {
3094 if (!KeepBraces)
3095 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3096 FormatToken *LeftBrace = FormatTok;
3097 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3098 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3099 /*MunchSemi=*/true, KeepBraces);
3100 if (!KeepBraces) {
3101 assert(!NestedTooDeep.empty());
3102 if (!NestedTooDeep.back())
3103 markOptionalBraces(LeftBrace);
3104 }
3105 if (WrapRightBrace)
3106 addUnwrappedLine();
3107 } else {
3108 parseUnbracedBody();
3109 }
3110
3111 if (!KeepBraces)
3112 NestedTooDeep.pop_back();
3113 }
3114
parseForOrWhileLoop()3115 void UnwrappedLineParser::parseForOrWhileLoop() {
3116 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
3117 "'for', 'while' or foreach macro expected");
3118 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3119 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3120
3121 nextToken();
3122 // JS' for await ( ...
3123 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3124 nextToken();
3125 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3126 nextToken();
3127 if (FormatTok->is(tok::l_paren))
3128 parseParens();
3129
3130 handleAttributes();
3131 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3132 }
3133
parseDoWhile()3134 void UnwrappedLineParser::parseDoWhile() {
3135 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3136 nextToken();
3137
3138 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3139
3140 // FIXME: Add error handling.
3141 if (!FormatTok->is(tok::kw_while)) {
3142 addUnwrappedLine();
3143 return;
3144 }
3145
3146 // If in Whitesmiths mode, the line with the while() needs to be indented
3147 // to the same level as the block.
3148 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3149 ++Line->Level;
3150
3151 nextToken();
3152 parseStructuralElement();
3153 }
3154
parseLabel(bool LeftAlignLabel)3155 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3156 nextToken();
3157 unsigned OldLineLevel = Line->Level;
3158 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3159 --Line->Level;
3160 if (LeftAlignLabel)
3161 Line->Level = 0;
3162
3163 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3164 FormatTok->is(tok::l_brace)) {
3165
3166 CompoundStatementIndenter Indenter(this, Line->Level,
3167 Style.BraceWrapping.AfterCaseLabel,
3168 Style.BraceWrapping.IndentBraces);
3169 parseBlock();
3170 if (FormatTok->is(tok::kw_break)) {
3171 if (Style.BraceWrapping.AfterControlStatement ==
3172 FormatStyle::BWACS_Always) {
3173 addUnwrappedLine();
3174 if (!Style.IndentCaseBlocks &&
3175 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3176 ++Line->Level;
3177 }
3178 }
3179 parseStructuralElement();
3180 }
3181 addUnwrappedLine();
3182 } else {
3183 if (FormatTok->is(tok::semi))
3184 nextToken();
3185 addUnwrappedLine();
3186 }
3187 Line->Level = OldLineLevel;
3188 if (FormatTok->isNot(tok::l_brace)) {
3189 parseStructuralElement();
3190 addUnwrappedLine();
3191 }
3192 }
3193
parseCaseLabel()3194 void UnwrappedLineParser::parseCaseLabel() {
3195 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3196
3197 // FIXME: fix handling of complex expressions here.
3198 do {
3199 nextToken();
3200 } while (!eof() && !FormatTok->is(tok::colon));
3201 parseLabel();
3202 }
3203
parseSwitch()3204 void UnwrappedLineParser::parseSwitch() {
3205 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3206 nextToken();
3207 if (FormatTok->is(tok::l_paren))
3208 parseParens();
3209
3210 keepAncestorBraces();
3211
3212 if (FormatTok->is(tok::l_brace)) {
3213 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3214 parseBlock();
3215 addUnwrappedLine();
3216 } else {
3217 addUnwrappedLine();
3218 ++Line->Level;
3219 parseStructuralElement();
3220 --Line->Level;
3221 }
3222
3223 if (Style.RemoveBracesLLVM)
3224 NestedTooDeep.pop_back();
3225 }
3226
3227 // Operators that can follow a C variable.
isCOperatorFollowingVar(tok::TokenKind kind)3228 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3229 switch (kind) {
3230 case tok::ampamp:
3231 case tok::ampequal:
3232 case tok::arrow:
3233 case tok::caret:
3234 case tok::caretequal:
3235 case tok::comma:
3236 case tok::ellipsis:
3237 case tok::equal:
3238 case tok::equalequal:
3239 case tok::exclaim:
3240 case tok::exclaimequal:
3241 case tok::greater:
3242 case tok::greaterequal:
3243 case tok::greatergreater:
3244 case tok::greatergreaterequal:
3245 case tok::l_paren:
3246 case tok::l_square:
3247 case tok::less:
3248 case tok::lessequal:
3249 case tok::lessless:
3250 case tok::lesslessequal:
3251 case tok::minus:
3252 case tok::minusequal:
3253 case tok::minusminus:
3254 case tok::percent:
3255 case tok::percentequal:
3256 case tok::period:
3257 case tok::pipe:
3258 case tok::pipeequal:
3259 case tok::pipepipe:
3260 case tok::plus:
3261 case tok::plusequal:
3262 case tok::plusplus:
3263 case tok::question:
3264 case tok::r_brace:
3265 case tok::r_paren:
3266 case tok::r_square:
3267 case tok::semi:
3268 case tok::slash:
3269 case tok::slashequal:
3270 case tok::star:
3271 case tok::starequal:
3272 return true;
3273 default:
3274 return false;
3275 }
3276 }
3277
parseAccessSpecifier()3278 void UnwrappedLineParser::parseAccessSpecifier() {
3279 FormatToken *AccessSpecifierCandidate = FormatTok;
3280 nextToken();
3281 // Understand Qt's slots.
3282 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3283 nextToken();
3284 // Otherwise, we don't know what it is, and we'd better keep the next token.
3285 if (FormatTok->is(tok::colon)) {
3286 nextToken();
3287 addUnwrappedLine();
3288 } else if (!FormatTok->is(tok::coloncolon) &&
3289 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3290 // Not a variable name nor namespace name.
3291 addUnwrappedLine();
3292 } else if (AccessSpecifierCandidate) {
3293 // Consider the access specifier to be a C identifier.
3294 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3295 }
3296 }
3297
3298 /// \brief Parses a requires, decides if it is a clause or an expression.
3299 /// \pre The current token has to be the requires keyword.
3300 /// \returns true if it parsed a clause.
parseRequires()3301 bool clang::format::UnwrappedLineParser::parseRequires() {
3302 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3303 auto RequiresToken = FormatTok;
3304
3305 // We try to guess if it is a requires clause, or a requires expression. For
3306 // that we first consume the keyword and check the next token.
3307 nextToken();
3308
3309 switch (FormatTok->Tok.getKind()) {
3310 case tok::l_brace:
3311 // This can only be an expression, never a clause.
3312 parseRequiresExpression(RequiresToken);
3313 return false;
3314 case tok::l_paren:
3315 // Clauses and expression can start with a paren, it's unclear what we have.
3316 break;
3317 default:
3318 // All other tokens can only be a clause.
3319 parseRequiresClause(RequiresToken);
3320 return true;
3321 }
3322
3323 // Looking forward we would have to decide if there are function declaration
3324 // like arguments to the requires expression:
3325 // requires (T t) {
3326 // Or there is a constraint expression for the requires clause:
3327 // requires (C<T> && ...
3328
3329 // But first let's look behind.
3330 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3331
3332 if (!PreviousNonComment ||
3333 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3334 // If there is no token, or an expression left brace, we are a requires
3335 // clause within a requires expression.
3336 parseRequiresClause(RequiresToken);
3337 return true;
3338 }
3339
3340 switch (PreviousNonComment->Tok.getKind()) {
3341 case tok::greater:
3342 case tok::r_paren:
3343 case tok::kw_noexcept:
3344 case tok::kw_const:
3345 // This is a requires clause.
3346 parseRequiresClause(RequiresToken);
3347 return true;
3348 case tok::amp:
3349 case tok::ampamp: {
3350 // This can be either:
3351 // if (... && requires (T t) ...)
3352 // Or
3353 // void member(...) && requires (C<T> ...
3354 // We check the one token before that for a const:
3355 // void member(...) const && requires (C<T> ...
3356 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3357 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3358 parseRequiresClause(RequiresToken);
3359 return true;
3360 }
3361 break;
3362 }
3363 default:
3364 if (PreviousNonComment->isTypeOrIdentifier()) {
3365 // This is a requires clause.
3366 parseRequiresClause(RequiresToken);
3367 return true;
3368 }
3369 // It's an expression.
3370 parseRequiresExpression(RequiresToken);
3371 return false;
3372 }
3373
3374 // Now we look forward and try to check if the paren content is a parameter
3375 // list. The parameters can be cv-qualified and contain references or
3376 // pointers.
3377 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3378 // of stuff: typename, const, *, &, &&, ::, identifiers.
3379
3380 unsigned StoredPosition = Tokens->getPosition();
3381 FormatToken *NextToken = Tokens->getNextToken();
3382 int Lookahead = 0;
3383 auto PeekNext = [&Lookahead, &NextToken, this] {
3384 ++Lookahead;
3385 NextToken = Tokens->getNextToken();
3386 };
3387
3388 bool FoundType = false;
3389 bool LastWasColonColon = false;
3390 int OpenAngles = 0;
3391
3392 for (; Lookahead < 50; PeekNext()) {
3393 switch (NextToken->Tok.getKind()) {
3394 case tok::kw_volatile:
3395 case tok::kw_const:
3396 case tok::comma:
3397 FormatTok = Tokens->setPosition(StoredPosition);
3398 parseRequiresExpression(RequiresToken);
3399 return false;
3400 case tok::r_paren:
3401 case tok::pipepipe:
3402 FormatTok = Tokens->setPosition(StoredPosition);
3403 parseRequiresClause(RequiresToken);
3404 return true;
3405 case tok::eof:
3406 // Break out of the loop.
3407 Lookahead = 50;
3408 break;
3409 case tok::coloncolon:
3410 LastWasColonColon = true;
3411 break;
3412 case tok::identifier:
3413 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3414 FormatTok = Tokens->setPosition(StoredPosition);
3415 parseRequiresExpression(RequiresToken);
3416 return false;
3417 }
3418 FoundType = true;
3419 LastWasColonColon = false;
3420 break;
3421 case tok::less:
3422 ++OpenAngles;
3423 break;
3424 case tok::greater:
3425 --OpenAngles;
3426 break;
3427 default:
3428 if (NextToken->isSimpleTypeSpecifier()) {
3429 FormatTok = Tokens->setPosition(StoredPosition);
3430 parseRequiresExpression(RequiresToken);
3431 return false;
3432 }
3433 break;
3434 }
3435 }
3436 // This seems to be a complicated expression, just assume it's a clause.
3437 FormatTok = Tokens->setPosition(StoredPosition);
3438 parseRequiresClause(RequiresToken);
3439 return true;
3440 }
3441
3442 /// \brief Parses a requires clause.
3443 /// \param RequiresToken The requires keyword token, which starts this clause.
3444 /// \pre We need to be on the next token after the requires keyword.
3445 /// \sa parseRequiresExpression
3446 ///
3447 /// Returns if it either has finished parsing the clause, or it detects, that
3448 /// the clause is incorrect.
parseRequiresClause(FormatToken * RequiresToken)3449 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3450 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3451 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3452
3453 // If there is no previous token, we are within a requires expression,
3454 // otherwise we will always have the template or function declaration in front
3455 // of it.
3456 bool InRequiresExpression =
3457 !RequiresToken->Previous ||
3458 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3459
3460 RequiresToken->setFinalizedType(InRequiresExpression
3461 ? TT_RequiresClauseInARequiresExpression
3462 : TT_RequiresClause);
3463
3464 // NOTE: parseConstraintExpression is only ever called from this function.
3465 // It could be inlined into here.
3466 parseConstraintExpression();
3467
3468 if (!InRequiresExpression)
3469 FormatTok->Previous->ClosesRequiresClause = true;
3470 }
3471
3472 /// \brief Parses a requires expression.
3473 /// \param RequiresToken The requires keyword token, which starts this clause.
3474 /// \pre We need to be on the next token after the requires keyword.
3475 /// \sa parseRequiresClause
3476 ///
3477 /// Returns if it either has finished parsing the expression, or it detects,
3478 /// that the expression is incorrect.
parseRequiresExpression(FormatToken * RequiresToken)3479 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3480 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3481 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3482
3483 RequiresToken->setFinalizedType(TT_RequiresExpression);
3484
3485 if (FormatTok->is(tok::l_paren)) {
3486 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3487 parseParens();
3488 }
3489
3490 if (FormatTok->is(tok::l_brace)) {
3491 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3492 parseChildBlock(/*CanContainBracedList=*/false,
3493 /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3494 }
3495 }
3496
3497 /// \brief Parses a constraint expression.
3498 ///
3499 /// This is the body of a requires clause. It returns, when the parsing is
3500 /// complete, or the expression is incorrect.
parseConstraintExpression()3501 void UnwrappedLineParser::parseConstraintExpression() {
3502 // The special handling for lambdas is needed since tryToParseLambda() eats a
3503 // token and if a requires expression is the last part of a requires clause
3504 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3505 // not set on the correct token. Thus we need to be aware if we even expect a
3506 // lambda to be possible.
3507 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3508 bool LambdaNextTimeAllowed = true;
3509 do {
3510 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3511
3512 switch (FormatTok->Tok.getKind()) {
3513 case tok::kw_requires: {
3514 auto RequiresToken = FormatTok;
3515 nextToken();
3516 parseRequiresExpression(RequiresToken);
3517 break;
3518 }
3519
3520 case tok::l_paren:
3521 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3522 break;
3523
3524 case tok::l_square:
3525 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3526 return;
3527 break;
3528
3529 case tok::kw_const:
3530 case tok::semi:
3531 case tok::kw_class:
3532 case tok::kw_struct:
3533 case tok::kw_union:
3534 return;
3535
3536 case tok::l_brace:
3537 // Potential function body.
3538 return;
3539
3540 case tok::ampamp:
3541 case tok::pipepipe:
3542 FormatTok->setFinalizedType(TT_BinaryOperator);
3543 nextToken();
3544 LambdaNextTimeAllowed = true;
3545 break;
3546
3547 case tok::comma:
3548 case tok::comment:
3549 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3550 nextToken();
3551 break;
3552
3553 case tok::kw_sizeof:
3554 case tok::greater:
3555 case tok::greaterequal:
3556 case tok::greatergreater:
3557 case tok::less:
3558 case tok::lessequal:
3559 case tok::lessless:
3560 case tok::equalequal:
3561 case tok::exclaim:
3562 case tok::exclaimequal:
3563 case tok::plus:
3564 case tok::minus:
3565 case tok::star:
3566 case tok::slash:
3567 LambdaNextTimeAllowed = true;
3568 // Just eat them.
3569 nextToken();
3570 break;
3571
3572 case tok::numeric_constant:
3573 case tok::coloncolon:
3574 case tok::kw_true:
3575 case tok::kw_false:
3576 // Just eat them.
3577 nextToken();
3578 break;
3579
3580 case tok::kw_static_cast:
3581 case tok::kw_const_cast:
3582 case tok::kw_reinterpret_cast:
3583 case tok::kw_dynamic_cast:
3584 nextToken();
3585 if (!FormatTok->is(tok::less))
3586 return;
3587
3588 nextToken();
3589 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3590 /*ClosingBraceKind=*/tok::greater);
3591 break;
3592
3593 case tok::kw_bool:
3594 // bool is only allowed if it is directly followed by a paren for a cast:
3595 // concept C = bool(...);
3596 // and bool is the only type, all other types as cast must be inside a
3597 // cast to bool an thus are handled by the other cases.
3598 if (Tokens->peekNextToken()->isNot(tok::l_paren))
3599 return;
3600 nextToken();
3601 parseParens();
3602 break;
3603
3604 default:
3605 if (!FormatTok->Tok.getIdentifierInfo()) {
3606 // Identifiers are part of the default case, we check for more then
3607 // tok::identifier to handle builtin type traits.
3608 return;
3609 }
3610
3611 // We need to differentiate identifiers for a template deduction guide,
3612 // variables, or function return types (the constraint expression has
3613 // ended before that), and basically all other cases. But it's easier to
3614 // check the other way around.
3615 assert(FormatTok->Previous);
3616 switch (FormatTok->Previous->Tok.getKind()) {
3617 case tok::coloncolon: // Nested identifier.
3618 case tok::ampamp: // Start of a function or variable for the
3619 case tok::pipepipe: // constraint expression. (binary)
3620 case tok::exclaim: // The same as above, but unary.
3621 case tok::kw_requires: // Initial identifier of a requires clause.
3622 case tok::equal: // Initial identifier of a concept declaration.
3623 break;
3624 default:
3625 return;
3626 }
3627
3628 // Read identifier with optional template declaration.
3629 nextToken();
3630 if (FormatTok->is(tok::less)) {
3631 nextToken();
3632 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3633 /*ClosingBraceKind=*/tok::greater);
3634 }
3635 break;
3636 }
3637 } while (!eof());
3638 }
3639
parseEnum()3640 bool UnwrappedLineParser::parseEnum() {
3641 const FormatToken &InitialToken = *FormatTok;
3642
3643 // Won't be 'enum' for NS_ENUMs.
3644 if (FormatTok->is(tok::kw_enum))
3645 nextToken();
3646
3647 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3648 // declarations. An "enum" keyword followed by a colon would be a syntax
3649 // error and thus assume it is just an identifier.
3650 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3651 return false;
3652
3653 // In protobuf, "enum" can be used as a field name.
3654 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3655 return false;
3656
3657 // Eat up enum class ...
3658 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3659 nextToken();
3660
3661 while (FormatTok->Tok.getIdentifierInfo() ||
3662 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3663 tok::greater, tok::comma, tok::question,
3664 tok::l_square, tok::r_square)) {
3665 nextToken();
3666 // We can have macros or attributes in between 'enum' and the enum name.
3667 if (FormatTok->is(tok::l_paren))
3668 parseParens();
3669 if (FormatTok->is(TT_AttributeSquare)) {
3670 parseSquare();
3671 // Consume the closing TT_AttributeSquare.
3672 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3673 nextToken();
3674 }
3675 if (FormatTok->is(tok::identifier)) {
3676 nextToken();
3677 // If there are two identifiers in a row, this is likely an elaborate
3678 // return type. In Java, this can be "implements", etc.
3679 if (Style.isCpp() && FormatTok->is(tok::identifier))
3680 return false;
3681 }
3682 }
3683
3684 // Just a declaration or something is wrong.
3685 if (FormatTok->isNot(tok::l_brace))
3686 return true;
3687 FormatTok->setFinalizedType(TT_EnumLBrace);
3688 FormatTok->setBlockKind(BK_Block);
3689
3690 if (Style.Language == FormatStyle::LK_Java) {
3691 // Java enums are different.
3692 parseJavaEnumBody();
3693 return true;
3694 }
3695 if (Style.Language == FormatStyle::LK_Proto) {
3696 parseBlock(/*MustBeDeclaration=*/true);
3697 return true;
3698 }
3699
3700 if (!Style.AllowShortEnumsOnASingleLine &&
3701 ShouldBreakBeforeBrace(Style, InitialToken)) {
3702 addUnwrappedLine();
3703 }
3704 // Parse enum body.
3705 nextToken();
3706 if (!Style.AllowShortEnumsOnASingleLine) {
3707 addUnwrappedLine();
3708 Line->Level += 1;
3709 }
3710 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3711 /*IsEnum=*/true);
3712 if (!Style.AllowShortEnumsOnASingleLine)
3713 Line->Level -= 1;
3714 if (HasError) {
3715 if (FormatTok->is(tok::semi))
3716 nextToken();
3717 addUnwrappedLine();
3718 }
3719 return true;
3720
3721 // There is no addUnwrappedLine() here so that we fall through to parsing a
3722 // structural element afterwards. Thus, in "enum A {} n, m;",
3723 // "} n, m;" will end up in one unwrapped line.
3724 }
3725
parseStructLike()3726 bool UnwrappedLineParser::parseStructLike() {
3727 // parseRecord falls through and does not yet add an unwrapped line as a
3728 // record declaration or definition can start a structural element.
3729 parseRecord();
3730 // This does not apply to Java, JavaScript and C#.
3731 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3732 Style.isCSharp()) {
3733 if (FormatTok->is(tok::semi))
3734 nextToken();
3735 addUnwrappedLine();
3736 return true;
3737 }
3738 return false;
3739 }
3740
3741 namespace {
3742 // A class used to set and restore the Token position when peeking
3743 // ahead in the token source.
3744 class ScopedTokenPosition {
3745 unsigned StoredPosition;
3746 FormatTokenSource *Tokens;
3747
3748 public:
ScopedTokenPosition(FormatTokenSource * Tokens)3749 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3750 assert(Tokens && "Tokens expected to not be null");
3751 StoredPosition = Tokens->getPosition();
3752 }
3753
~ScopedTokenPosition()3754 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3755 };
3756 } // namespace
3757
3758 // Look to see if we have [[ by looking ahead, if
3759 // its not then rewind to the original position.
tryToParseSimpleAttribute()3760 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3761 ScopedTokenPosition AutoPosition(Tokens);
3762 FormatToken *Tok = Tokens->getNextToken();
3763 // We already read the first [ check for the second.
3764 if (!Tok->is(tok::l_square))
3765 return false;
3766 // Double check that the attribute is just something
3767 // fairly simple.
3768 while (Tok->isNot(tok::eof)) {
3769 if (Tok->is(tok::r_square))
3770 break;
3771 Tok = Tokens->getNextToken();
3772 }
3773 if (Tok->is(tok::eof))
3774 return false;
3775 Tok = Tokens->getNextToken();
3776 if (!Tok->is(tok::r_square))
3777 return false;
3778 Tok = Tokens->getNextToken();
3779 if (Tok->is(tok::semi))
3780 return false;
3781 return true;
3782 }
3783
parseJavaEnumBody()3784 void UnwrappedLineParser::parseJavaEnumBody() {
3785 assert(FormatTok->is(tok::l_brace));
3786 const FormatToken *OpeningBrace = FormatTok;
3787
3788 // Determine whether the enum is simple, i.e. does not have a semicolon or
3789 // constants with class bodies. Simple enums can be formatted like braced
3790 // lists, contracted to a single line, etc.
3791 unsigned StoredPosition = Tokens->getPosition();
3792 bool IsSimple = true;
3793 FormatToken *Tok = Tokens->getNextToken();
3794 while (!Tok->is(tok::eof)) {
3795 if (Tok->is(tok::r_brace))
3796 break;
3797 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3798 IsSimple = false;
3799 break;
3800 }
3801 // FIXME: This will also mark enums with braces in the arguments to enum
3802 // constants as "not simple". This is probably fine in practice, though.
3803 Tok = Tokens->getNextToken();
3804 }
3805 FormatTok = Tokens->setPosition(StoredPosition);
3806
3807 if (IsSimple) {
3808 nextToken();
3809 parseBracedList();
3810 addUnwrappedLine();
3811 return;
3812 }
3813
3814 // Parse the body of a more complex enum.
3815 // First add a line for everything up to the "{".
3816 nextToken();
3817 addUnwrappedLine();
3818 ++Line->Level;
3819
3820 // Parse the enum constants.
3821 while (!eof()) {
3822 if (FormatTok->is(tok::l_brace)) {
3823 // Parse the constant's class body.
3824 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3825 /*MunchSemi=*/false);
3826 } else if (FormatTok->is(tok::l_paren)) {
3827 parseParens();
3828 } else if (FormatTok->is(tok::comma)) {
3829 nextToken();
3830 addUnwrappedLine();
3831 } else if (FormatTok->is(tok::semi)) {
3832 nextToken();
3833 addUnwrappedLine();
3834 break;
3835 } else if (FormatTok->is(tok::r_brace)) {
3836 addUnwrappedLine();
3837 break;
3838 } else {
3839 nextToken();
3840 }
3841 }
3842
3843 // Parse the class body after the enum's ";" if any.
3844 parseLevel(OpeningBrace);
3845 nextToken();
3846 --Line->Level;
3847 addUnwrappedLine();
3848 }
3849
parseRecord(bool ParseAsExpr)3850 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3851 const FormatToken &InitialToken = *FormatTok;
3852 nextToken();
3853
3854 // The actual identifier can be a nested name specifier, and in macros
3855 // it is often token-pasted.
3856 // An [[attribute]] can be before the identifier.
3857 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3858 tok::kw___attribute, tok::kw___declspec,
3859 tok::kw_alignas, tok::l_square) ||
3860 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3861 FormatTok->isOneOf(tok::period, tok::comma))) {
3862 if (Style.isJavaScript() &&
3863 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3864 // JavaScript/TypeScript supports inline object types in
3865 // extends/implements positions:
3866 // class Foo implements {bar: number} { }
3867 nextToken();
3868 if (FormatTok->is(tok::l_brace)) {
3869 tryToParseBracedList();
3870 continue;
3871 }
3872 }
3873 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3874 continue;
3875 bool IsNonMacroIdentifier =
3876 FormatTok->is(tok::identifier) &&
3877 FormatTok->TokenText != FormatTok->TokenText.upper();
3878 nextToken();
3879 // We can have macros in between 'class' and the class name.
3880 if (!IsNonMacroIdentifier && FormatTok->is(tok::l_paren))
3881 parseParens();
3882 }
3883
3884 // Note that parsing away template declarations here leads to incorrectly
3885 // accepting function declarations as record declarations.
3886 // In general, we cannot solve this problem. Consider:
3887 // class A<int> B() {}
3888 // which can be a function definition or a class definition when B() is a
3889 // macro. If we find enough real-world cases where this is a problem, we
3890 // can parse for the 'template' keyword in the beginning of the statement,
3891 // and thus rule out the record production in case there is no template
3892 // (this would still leave us with an ambiguity between template function
3893 // and class declarations).
3894 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3895 do {
3896 if (FormatTok->is(tok::l_brace)) {
3897 calculateBraceTypes(/*ExpectClassBody=*/true);
3898 if (!tryToParseBracedList())
3899 break;
3900 }
3901 if (FormatTok->is(tok::l_square)) {
3902 FormatToken *Previous = FormatTok->Previous;
3903 if (!Previous ||
3904 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3905 // Don't try parsing a lambda if we had a closing parenthesis before,
3906 // it was probably a pointer to an array: int (*)[].
3907 if (!tryToParseLambda())
3908 break;
3909 } else {
3910 parseSquare();
3911 continue;
3912 }
3913 }
3914 if (FormatTok->is(tok::semi))
3915 return;
3916 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3917 addUnwrappedLine();
3918 nextToken();
3919 parseCSharpGenericTypeConstraint();
3920 break;
3921 }
3922 nextToken();
3923 } while (!eof());
3924 }
3925
3926 auto GetBraceType = [](const FormatToken &RecordTok) {
3927 switch (RecordTok.Tok.getKind()) {
3928 case tok::kw_class:
3929 return TT_ClassLBrace;
3930 case tok::kw_struct:
3931 return TT_StructLBrace;
3932 case tok::kw_union:
3933 return TT_UnionLBrace;
3934 default:
3935 // Useful for e.g. interface.
3936 return TT_RecordLBrace;
3937 }
3938 };
3939 if (FormatTok->is(tok::l_brace)) {
3940 FormatTok->setFinalizedType(GetBraceType(InitialToken));
3941 if (ParseAsExpr) {
3942 parseChildBlock();
3943 } else {
3944 if (ShouldBreakBeforeBrace(Style, InitialToken))
3945 addUnwrappedLine();
3946
3947 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3948 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3949 }
3950 }
3951 // There is no addUnwrappedLine() here so that we fall through to parsing a
3952 // structural element afterwards. Thus, in "class A {} n, m;",
3953 // "} n, m;" will end up in one unwrapped line.
3954 }
3955
parseObjCMethod()3956 void UnwrappedLineParser::parseObjCMethod() {
3957 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3958 "'(' or identifier expected.");
3959 do {
3960 if (FormatTok->is(tok::semi)) {
3961 nextToken();
3962 addUnwrappedLine();
3963 return;
3964 } else if (FormatTok->is(tok::l_brace)) {
3965 if (Style.BraceWrapping.AfterFunction)
3966 addUnwrappedLine();
3967 parseBlock();
3968 addUnwrappedLine();
3969 return;
3970 } else {
3971 nextToken();
3972 }
3973 } while (!eof());
3974 }
3975
parseObjCProtocolList()3976 void UnwrappedLineParser::parseObjCProtocolList() {
3977 assert(FormatTok->is(tok::less) && "'<' expected.");
3978 do {
3979 nextToken();
3980 // Early exit in case someone forgot a close angle.
3981 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3982 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3983 return;
3984 }
3985 } while (!eof() && FormatTok->isNot(tok::greater));
3986 nextToken(); // Skip '>'.
3987 }
3988
parseObjCUntilAtEnd()3989 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3990 do {
3991 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3992 nextToken();
3993 addUnwrappedLine();
3994 break;
3995 }
3996 if (FormatTok->is(tok::l_brace)) {
3997 parseBlock();
3998 // In ObjC interfaces, nothing should be following the "}".
3999 addUnwrappedLine();
4000 } else if (FormatTok->is(tok::r_brace)) {
4001 // Ignore stray "}". parseStructuralElement doesn't consume them.
4002 nextToken();
4003 addUnwrappedLine();
4004 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4005 nextToken();
4006 parseObjCMethod();
4007 } else {
4008 parseStructuralElement();
4009 }
4010 } while (!eof());
4011 }
4012
parseObjCInterfaceOrImplementation()4013 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4014 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4015 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4016 nextToken();
4017 nextToken(); // interface name
4018
4019 // @interface can be followed by a lightweight generic
4020 // specialization list, then either a base class or a category.
4021 if (FormatTok->is(tok::less))
4022 parseObjCLightweightGenerics();
4023 if (FormatTok->is(tok::colon)) {
4024 nextToken();
4025 nextToken(); // base class name
4026 // The base class can also have lightweight generics applied to it.
4027 if (FormatTok->is(tok::less))
4028 parseObjCLightweightGenerics();
4029 } else if (FormatTok->is(tok::l_paren)) {
4030 // Skip category, if present.
4031 parseParens();
4032 }
4033
4034 if (FormatTok->is(tok::less))
4035 parseObjCProtocolList();
4036
4037 if (FormatTok->is(tok::l_brace)) {
4038 if (Style.BraceWrapping.AfterObjCDeclaration)
4039 addUnwrappedLine();
4040 parseBlock(/*MustBeDeclaration=*/true);
4041 }
4042
4043 // With instance variables, this puts '}' on its own line. Without instance
4044 // variables, this ends the @interface line.
4045 addUnwrappedLine();
4046
4047 parseObjCUntilAtEnd();
4048 }
4049
parseObjCLightweightGenerics()4050 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4051 assert(FormatTok->is(tok::less));
4052 // Unlike protocol lists, generic parameterizations support
4053 // nested angles:
4054 //
4055 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4056 // NSObject <NSCopying, NSSecureCoding>
4057 //
4058 // so we need to count how many open angles we have left.
4059 unsigned NumOpenAngles = 1;
4060 do {
4061 nextToken();
4062 // Early exit in case someone forgot a close angle.
4063 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4064 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4065 break;
4066 }
4067 if (FormatTok->is(tok::less)) {
4068 ++NumOpenAngles;
4069 } else if (FormatTok->is(tok::greater)) {
4070 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4071 --NumOpenAngles;
4072 }
4073 } while (!eof() && NumOpenAngles != 0);
4074 nextToken(); // Skip '>'.
4075 }
4076
4077 // Returns true for the declaration/definition form of @protocol,
4078 // false for the expression form.
parseObjCProtocol()4079 bool UnwrappedLineParser::parseObjCProtocol() {
4080 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4081 nextToken();
4082
4083 if (FormatTok->is(tok::l_paren)) {
4084 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4085 return false;
4086 }
4087
4088 // The definition/declaration form,
4089 // @protocol Foo
4090 // - (int)someMethod;
4091 // @end
4092
4093 nextToken(); // protocol name
4094
4095 if (FormatTok->is(tok::less))
4096 parseObjCProtocolList();
4097
4098 // Check for protocol declaration.
4099 if (FormatTok->is(tok::semi)) {
4100 nextToken();
4101 addUnwrappedLine();
4102 return true;
4103 }
4104
4105 addUnwrappedLine();
4106 parseObjCUntilAtEnd();
4107 return true;
4108 }
4109
parseJavaScriptEs6ImportExport()4110 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4111 bool IsImport = FormatTok->is(Keywords.kw_import);
4112 assert(IsImport || FormatTok->is(tok::kw_export));
4113 nextToken();
4114
4115 // Consume the "default" in "export default class/function".
4116 if (FormatTok->is(tok::kw_default))
4117 nextToken();
4118
4119 // Consume "async function", "function" and "default function", so that these
4120 // get parsed as free-standing JS functions, i.e. do not require a trailing
4121 // semicolon.
4122 if (FormatTok->is(Keywords.kw_async))
4123 nextToken();
4124 if (FormatTok->is(Keywords.kw_function)) {
4125 nextToken();
4126 return;
4127 }
4128
4129 // For imports, `export *`, `export {...}`, consume the rest of the line up
4130 // to the terminating `;`. For everything else, just return and continue
4131 // parsing the structural element, i.e. the declaration or expression for
4132 // `export default`.
4133 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4134 !FormatTok->isStringLiteral()) {
4135 return;
4136 }
4137
4138 while (!eof()) {
4139 if (FormatTok->is(tok::semi))
4140 return;
4141 if (Line->Tokens.empty()) {
4142 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4143 // import statement should terminate.
4144 return;
4145 }
4146 if (FormatTok->is(tok::l_brace)) {
4147 FormatTok->setBlockKind(BK_Block);
4148 nextToken();
4149 parseBracedList();
4150 } else {
4151 nextToken();
4152 }
4153 }
4154 }
4155
parseStatementMacro()4156 void UnwrappedLineParser::parseStatementMacro() {
4157 nextToken();
4158 if (FormatTok->is(tok::l_paren))
4159 parseParens();
4160 if (FormatTok->is(tok::semi))
4161 nextToken();
4162 addUnwrappedLine();
4163 }
4164
parseVerilogHierarchyIdentifier()4165 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4166 // consume things like a::`b.c[d:e] or a::*
4167 while (true) {
4168 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4169 tok::coloncolon, tok::hash) ||
4170 Keywords.isVerilogIdentifier(*FormatTok)) {
4171 nextToken();
4172 } else if (FormatTok->is(tok::l_square)) {
4173 parseSquare();
4174 } else {
4175 break;
4176 }
4177 }
4178 }
4179
parseVerilogSensitivityList()4180 void UnwrappedLineParser::parseVerilogSensitivityList() {
4181 if (!FormatTok->is(tok::at))
4182 return;
4183 nextToken();
4184 // A block event expression has 2 at signs.
4185 if (FormatTok->is(tok::at))
4186 nextToken();
4187 switch (FormatTok->Tok.getKind()) {
4188 case tok::star:
4189 nextToken();
4190 break;
4191 case tok::l_paren:
4192 parseParens();
4193 break;
4194 default:
4195 parseVerilogHierarchyIdentifier();
4196 break;
4197 }
4198 }
4199
parseVerilogHierarchyHeader()4200 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4201 unsigned AddLevels = 0;
4202
4203 if (FormatTok->is(Keywords.kw_clocking)) {
4204 nextToken();
4205 if (Keywords.isVerilogIdentifier(*FormatTok))
4206 nextToken();
4207 parseVerilogSensitivityList();
4208 if (FormatTok->is(tok::semi))
4209 nextToken();
4210 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4211 Keywords.kw_casez, Keywords.kw_randcase,
4212 Keywords.kw_randsequence)) {
4213 if (Style.IndentCaseLabels)
4214 AddLevels++;
4215 nextToken();
4216 if (FormatTok->is(tok::l_paren)) {
4217 FormatTok->setFinalizedType(TT_ConditionLParen);
4218 parseParens();
4219 }
4220 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4221 nextToken();
4222 // The case header has no semicolon.
4223 } else {
4224 // "module" etc.
4225 nextToken();
4226 // all the words like the name of the module and specifiers like
4227 // "automatic" and the width of function return type
4228 while (true) {
4229 if (FormatTok->is(tok::l_square)) {
4230 auto Prev = FormatTok->getPreviousNonComment();
4231 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4232 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4233 parseSquare();
4234 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4235 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4236 nextToken();
4237 } else {
4238 break;
4239 }
4240 }
4241
4242 auto NewLine = [this]() {
4243 addUnwrappedLine();
4244 Line->IsContinuation = true;
4245 };
4246
4247 // package imports
4248 while (FormatTok->is(Keywords.kw_import)) {
4249 NewLine();
4250 nextToken();
4251 parseVerilogHierarchyIdentifier();
4252 if (FormatTok->is(tok::semi))
4253 nextToken();
4254 }
4255
4256 // parameters and ports
4257 if (FormatTok->is(Keywords.kw_verilogHash)) {
4258 NewLine();
4259 nextToken();
4260 if (FormatTok->is(tok::l_paren))
4261 parseParens();
4262 }
4263 if (FormatTok->is(tok::l_paren)) {
4264 NewLine();
4265 parseParens();
4266 }
4267
4268 // extends and implements
4269 if (FormatTok->is(Keywords.kw_extends)) {
4270 NewLine();
4271 nextToken();
4272 parseVerilogHierarchyIdentifier();
4273 if (FormatTok->is(tok::l_paren))
4274 parseParens();
4275 }
4276 if (FormatTok->is(Keywords.kw_implements)) {
4277 NewLine();
4278 do {
4279 nextToken();
4280 parseVerilogHierarchyIdentifier();
4281 } while (FormatTok->is(tok::comma));
4282 }
4283
4284 // Coverage event for cover groups.
4285 if (FormatTok->is(tok::at)) {
4286 NewLine();
4287 parseVerilogSensitivityList();
4288 }
4289
4290 if (FormatTok->is(tok::semi))
4291 nextToken(/*LevelDifference=*/1);
4292 addUnwrappedLine();
4293 }
4294
4295 return AddLevels;
4296 }
4297
parseVerilogTable()4298 void UnwrappedLineParser::parseVerilogTable() {
4299 assert(FormatTok->is(Keywords.kw_table));
4300 nextToken(/*LevelDifference=*/1);
4301 addUnwrappedLine();
4302
4303 auto InitialLevel = Line->Level++;
4304 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4305 FormatToken *Tok = FormatTok;
4306 nextToken();
4307 if (Tok->is(tok::semi))
4308 addUnwrappedLine();
4309 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4310 Tok->setFinalizedType(TT_VerilogTableItem);
4311 }
4312 Line->Level = InitialLevel;
4313 nextToken(/*LevelDifference=*/-1);
4314 addUnwrappedLine();
4315 }
4316
parseVerilogCaseLabel()4317 void UnwrappedLineParser::parseVerilogCaseLabel() {
4318 // The label will get unindented in AnnotatingParser. If there are no leading
4319 // spaces, indent the rest here so that things inside the block will be
4320 // indented relative to things outside. We don't use parseLabel because we
4321 // don't know whether this colon is a label or a ternary expression at this
4322 // point.
4323 auto OrigLevel = Line->Level;
4324 auto FirstLine = CurrentLines->size();
4325 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4326 ++Line->Level;
4327 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4328 --Line->Level;
4329 parseStructuralElement();
4330 // Restore the indentation in both the new line and the line that has the
4331 // label.
4332 if (CurrentLines->size() > FirstLine)
4333 (*CurrentLines)[FirstLine].Level = OrigLevel;
4334 Line->Level = OrigLevel;
4335 }
4336
addUnwrappedLine(LineLevel AdjustLevel)4337 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4338 if (Line->Tokens.empty())
4339 return;
4340 LLVM_DEBUG({
4341 if (CurrentLines == &Lines)
4342 printDebugInfo(*Line);
4343 });
4344
4345 // If this line closes a block when in Whitesmiths mode, remember that
4346 // information so that the level can be decreased after the line is added.
4347 // This has to happen after the addition of the line since the line itself
4348 // needs to be indented.
4349 bool ClosesWhitesmithsBlock =
4350 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4351 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4352
4353 CurrentLines->push_back(std::move(*Line));
4354 Line->Tokens.clear();
4355 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4356 Line->FirstStartColumn = 0;
4357 Line->IsContinuation = false;
4358
4359 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4360 --Line->Level;
4361 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
4362 CurrentLines->append(
4363 std::make_move_iterator(PreprocessorDirectives.begin()),
4364 std::make_move_iterator(PreprocessorDirectives.end()));
4365 PreprocessorDirectives.clear();
4366 }
4367 // Disconnect the current token from the last token on the previous line.
4368 FormatTok->Previous = nullptr;
4369 }
4370
eof() const4371 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4372
isOnNewLine(const FormatToken & FormatTok)4373 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4374 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4375 FormatTok.NewlinesBefore > 0;
4376 }
4377
4378 // Checks if \p FormatTok is a line comment that continues the line comment
4379 // section on \p Line.
4380 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)4381 continuesLineCommentSection(const FormatToken &FormatTok,
4382 const UnwrappedLine &Line,
4383 const llvm::Regex &CommentPragmasRegex) {
4384 if (Line.Tokens.empty())
4385 return false;
4386
4387 StringRef IndentContent = FormatTok.TokenText;
4388 if (FormatTok.TokenText.startswith("//") ||
4389 FormatTok.TokenText.startswith("/*")) {
4390 IndentContent = FormatTok.TokenText.substr(2);
4391 }
4392 if (CommentPragmasRegex.match(IndentContent))
4393 return false;
4394
4395 // If Line starts with a line comment, then FormatTok continues the comment
4396 // section if its original column is greater or equal to the original start
4397 // column of the line.
4398 //
4399 // Define the min column token of a line as follows: if a line ends in '{' or
4400 // contains a '{' followed by a line comment, then the min column token is
4401 // that '{'. Otherwise, the min column token of the line is the first token of
4402 // the line.
4403 //
4404 // If Line starts with a token other than a line comment, then FormatTok
4405 // continues the comment section if its original column is greater than the
4406 // original start column of the min column token of the line.
4407 //
4408 // For example, the second line comment continues the first in these cases:
4409 //
4410 // // first line
4411 // // second line
4412 //
4413 // and:
4414 //
4415 // // first line
4416 // // second line
4417 //
4418 // and:
4419 //
4420 // int i; // first line
4421 // // second line
4422 //
4423 // and:
4424 //
4425 // do { // first line
4426 // // second line
4427 // int i;
4428 // } while (true);
4429 //
4430 // and:
4431 //
4432 // enum {
4433 // a, // first line
4434 // // second line
4435 // b
4436 // };
4437 //
4438 // The second line comment doesn't continue the first in these cases:
4439 //
4440 // // first line
4441 // // second line
4442 //
4443 // and:
4444 //
4445 // int i; // first line
4446 // // second line
4447 //
4448 // and:
4449 //
4450 // do { // first line
4451 // // second line
4452 // int i;
4453 // } while (true);
4454 //
4455 // and:
4456 //
4457 // enum {
4458 // a, // first line
4459 // // second line
4460 // };
4461 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4462
4463 // Scan for '{//'. If found, use the column of '{' as a min column for line
4464 // comment section continuation.
4465 const FormatToken *PreviousToken = nullptr;
4466 for (const UnwrappedLineNode &Node : Line.Tokens) {
4467 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4468 isLineComment(*Node.Tok)) {
4469 MinColumnToken = PreviousToken;
4470 break;
4471 }
4472 PreviousToken = Node.Tok;
4473
4474 // Grab the last newline preceding a token in this unwrapped line.
4475 if (Node.Tok->NewlinesBefore > 0)
4476 MinColumnToken = Node.Tok;
4477 }
4478 if (PreviousToken && PreviousToken->is(tok::l_brace))
4479 MinColumnToken = PreviousToken;
4480
4481 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4482 MinColumnToken);
4483 }
4484
flushComments(bool NewlineBeforeNext)4485 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4486 bool JustComments = Line->Tokens.empty();
4487 for (FormatToken *Tok : CommentsBeforeNextToken) {
4488 // Line comments that belong to the same line comment section are put on the
4489 // same line since later we might want to reflow content between them.
4490 // Additional fine-grained breaking of line comment sections is controlled
4491 // by the class BreakableLineCommentSection in case it is desirable to keep
4492 // several line comment sections in the same unwrapped line.
4493 //
4494 // FIXME: Consider putting separate line comment sections as children to the
4495 // unwrapped line instead.
4496 Tok->ContinuesLineCommentSection =
4497 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4498 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4499 addUnwrappedLine();
4500 pushToken(Tok);
4501 }
4502 if (NewlineBeforeNext && JustComments)
4503 addUnwrappedLine();
4504 CommentsBeforeNextToken.clear();
4505 }
4506
nextToken(int LevelDifference)4507 void UnwrappedLineParser::nextToken(int LevelDifference) {
4508 if (eof())
4509 return;
4510 flushComments(isOnNewLine(*FormatTok));
4511 pushToken(FormatTok);
4512 FormatToken *Previous = FormatTok;
4513 if (!Style.isJavaScript())
4514 readToken(LevelDifference);
4515 else
4516 readTokenWithJavaScriptASI();
4517 FormatTok->Previous = Previous;
4518 if (Style.isVerilog()) {
4519 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4520 // keywords like `begin`, we can't treat them the same as left braces
4521 // because some contexts require one of them. For example structs use
4522 // braces and if blocks use keywords, and a left brace can occur in an if
4523 // statement, but it is not a block. For keywords like `end`, we simply
4524 // treat them the same as right braces.
4525 if (Keywords.isVerilogEnd(*FormatTok))
4526 FormatTok->Tok.setKind(tok::r_brace);
4527 }
4528 }
4529
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)4530 void UnwrappedLineParser::distributeComments(
4531 const SmallVectorImpl<FormatToken *> &Comments,
4532 const FormatToken *NextTok) {
4533 // Whether or not a line comment token continues a line is controlled by
4534 // the method continuesLineCommentSection, with the following caveat:
4535 //
4536 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4537 // that each comment line from the trail is aligned with the next token, if
4538 // the next token exists. If a trail exists, the beginning of the maximal
4539 // trail is marked as a start of a new comment section.
4540 //
4541 // For example in this code:
4542 //
4543 // int a; // line about a
4544 // // line 1 about b
4545 // // line 2 about b
4546 // int b;
4547 //
4548 // the two lines about b form a maximal trail, so there are two sections, the
4549 // first one consisting of the single comment "// line about a" and the
4550 // second one consisting of the next two comments.
4551 if (Comments.empty())
4552 return;
4553 bool ShouldPushCommentsInCurrentLine = true;
4554 bool HasTrailAlignedWithNextToken = false;
4555 unsigned StartOfTrailAlignedWithNextToken = 0;
4556 if (NextTok) {
4557 // We are skipping the first element intentionally.
4558 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4559 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4560 HasTrailAlignedWithNextToken = true;
4561 StartOfTrailAlignedWithNextToken = i;
4562 }
4563 }
4564 }
4565 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4566 FormatToken *FormatTok = Comments[i];
4567 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4568 FormatTok->ContinuesLineCommentSection = false;
4569 } else {
4570 FormatTok->ContinuesLineCommentSection =
4571 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4572 }
4573 if (!FormatTok->ContinuesLineCommentSection &&
4574 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4575 ShouldPushCommentsInCurrentLine = false;
4576 }
4577 if (ShouldPushCommentsInCurrentLine)
4578 pushToken(FormatTok);
4579 else
4580 CommentsBeforeNextToken.push_back(FormatTok);
4581 }
4582 }
4583
readToken(int LevelDifference)4584 void UnwrappedLineParser::readToken(int LevelDifference) {
4585 SmallVector<FormatToken *, 1> Comments;
4586 bool PreviousWasComment = false;
4587 bool FirstNonCommentOnLine = false;
4588 do {
4589 FormatTok = Tokens->getNextToken();
4590 assert(FormatTok);
4591 while (FormatTok->getType() == TT_ConflictStart ||
4592 FormatTok->getType() == TT_ConflictEnd ||
4593 FormatTok->getType() == TT_ConflictAlternative) {
4594 if (FormatTok->getType() == TT_ConflictStart)
4595 conditionalCompilationStart(/*Unreachable=*/false);
4596 else if (FormatTok->getType() == TT_ConflictAlternative)
4597 conditionalCompilationAlternative();
4598 else if (FormatTok->getType() == TT_ConflictEnd)
4599 conditionalCompilationEnd();
4600 FormatTok = Tokens->getNextToken();
4601 FormatTok->MustBreakBefore = true;
4602 }
4603
4604 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4605 const FormatToken &Tok,
4606 bool PreviousWasComment) {
4607 auto IsFirstOnLine = [](const FormatToken &Tok) {
4608 return Tok.HasUnescapedNewline || Tok.IsFirst;
4609 };
4610
4611 // Consider preprocessor directives preceded by block comments as first
4612 // on line.
4613 if (PreviousWasComment)
4614 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4615 return IsFirstOnLine(Tok);
4616 };
4617
4618 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4619 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4620 PreviousWasComment = FormatTok->is(tok::comment);
4621
4622 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4623 (!Style.isVerilog() ||
4624 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4625 FirstNonCommentOnLine) {
4626 distributeComments(Comments, FormatTok);
4627 Comments.clear();
4628 // If there is an unfinished unwrapped line, we flush the preprocessor
4629 // directives only after that unwrapped line was finished later.
4630 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4631 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4632 assert((LevelDifference >= 0 ||
4633 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4634 "LevelDifference makes Line->Level negative");
4635 Line->Level += LevelDifference;
4636 // Comments stored before the preprocessor directive need to be output
4637 // before the preprocessor directive, at the same level as the
4638 // preprocessor directive, as we consider them to apply to the directive.
4639 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4640 PPBranchLevel > 0) {
4641 Line->Level += PPBranchLevel;
4642 }
4643 flushComments(isOnNewLine(*FormatTok));
4644 parsePPDirective();
4645 PreviousWasComment = FormatTok->is(tok::comment);
4646 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4647 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4648 }
4649
4650 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4651 !Line->InPPDirective) {
4652 continue;
4653 }
4654
4655 if (!FormatTok->is(tok::comment)) {
4656 distributeComments(Comments, FormatTok);
4657 Comments.clear();
4658 return;
4659 }
4660
4661 Comments.push_back(FormatTok);
4662 } while (!eof());
4663
4664 distributeComments(Comments, nullptr);
4665 Comments.clear();
4666 }
4667
pushToken(FormatToken * Tok)4668 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4669 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4670 if (MustBreakBeforeNextToken) {
4671 Line->Tokens.back().Tok->MustBreakBefore = true;
4672 MustBreakBeforeNextToken = false;
4673 }
4674 }
4675
4676 } // end namespace format
4677 } // end namespace clang
4678