1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "FormatTokenLexer.h"
18 #include "FormatTokenSource.h"
19 #include "Macros.h"
20 #include "TokenAnnotator.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_os_ostream.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 #include <algorithm>
29 #include <utility>
30
31 #define DEBUG_TYPE "format-parser"
32
33 namespace clang {
34 namespace format {
35
36 namespace {
37
printLine(llvm::raw_ostream & OS,const UnwrappedLine & Line,StringRef Prefix="",bool PrintText=false)38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39 StringRef Prefix = "", bool PrintText = false) {
40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42 bool NewLine = false;
43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44 E = Line.Tokens.end();
45 I != E; ++I) {
46 if (NewLine) {
47 OS << Prefix;
48 NewLine = false;
49 }
50 OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType()
51 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
52 << "\"] ";
53 for (SmallVectorImpl<UnwrappedLine>::const_iterator
54 CI = I->Children.begin(),
55 CE = I->Children.end();
56 CI != CE; ++CI) {
57 OS << "\n";
58 printLine(OS, *CI, (Prefix + " ").str());
59 NewLine = true;
60 }
61 }
62 if (!NewLine)
63 OS << "\n";
64 }
65
printDebugInfo(const UnwrappedLine & Line)66 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
67 printLine(llvm::dbgs(), Line);
68 }
69
70 class ScopedDeclarationState {
71 public:
ScopedDeclarationState(UnwrappedLine & Line,llvm::BitVector & Stack,bool MustBeDeclaration)72 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
73 bool MustBeDeclaration)
74 : Line(Line), Stack(Stack) {
75 Line.MustBeDeclaration = MustBeDeclaration;
76 Stack.push_back(MustBeDeclaration);
77 }
~ScopedDeclarationState()78 ~ScopedDeclarationState() {
79 Stack.pop_back();
80 if (!Stack.empty())
81 Line.MustBeDeclaration = Stack.back();
82 else
83 Line.MustBeDeclaration = true;
84 }
85
86 private:
87 UnwrappedLine &Line;
88 llvm::BitVector &Stack;
89 };
90
91 } // end anonymous namespace
92
93 class ScopedLineState {
94 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)95 ScopedLineState(UnwrappedLineParser &Parser,
96 bool SwitchToPreprocessorLines = false)
97 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
98 if (SwitchToPreprocessorLines)
99 Parser.CurrentLines = &Parser.PreprocessorDirectives;
100 else if (!Parser.Line->Tokens.empty())
101 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
102 PreBlockLine = std::move(Parser.Line);
103 Parser.Line = std::make_unique<UnwrappedLine>();
104 Parser.Line->Level = PreBlockLine->Level;
105 Parser.Line->PPLevel = PreBlockLine->PPLevel;
106 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
107 Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
108 }
109
~ScopedLineState()110 ~ScopedLineState() {
111 if (!Parser.Line->Tokens.empty())
112 Parser.addUnwrappedLine();
113 assert(Parser.Line->Tokens.empty());
114 Parser.Line = std::move(PreBlockLine);
115 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
116 Parser.MustBreakBeforeNextToken = true;
117 Parser.CurrentLines = OriginalLines;
118 }
119
120 private:
121 UnwrappedLineParser &Parser;
122
123 std::unique_ptr<UnwrappedLine> PreBlockLine;
124 SmallVectorImpl<UnwrappedLine> *OriginalLines;
125 };
126
127 class CompoundStatementIndenter {
128 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)129 CompoundStatementIndenter(UnwrappedLineParser *Parser,
130 const FormatStyle &Style, unsigned &LineLevel)
131 : CompoundStatementIndenter(Parser, LineLevel,
132 Style.BraceWrapping.AfterControlStatement,
133 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)134 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
135 bool WrapBrace, bool IndentBrace)
136 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
137 if (WrapBrace)
138 Parser->addUnwrappedLine();
139 if (IndentBrace)
140 ++LineLevel;
141 }
~CompoundStatementIndenter()142 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
143
144 private:
145 unsigned &LineLevel;
146 unsigned OldLineLevel;
147 };
148
UnwrappedLineParser(SourceManager & SourceMgr,const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)149 UnwrappedLineParser::UnwrappedLineParser(
150 SourceManager &SourceMgr, const FormatStyle &Style,
151 const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
152 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
153 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
154 IdentifierTable &IdentTable)
155 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
156 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
157 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
158 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
159 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
160 ? IG_Rejected
161 : IG_Inited),
162 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
163 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
164
reset()165 void UnwrappedLineParser::reset() {
166 PPBranchLevel = -1;
167 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
168 ? IG_Rejected
169 : IG_Inited;
170 IncludeGuardToken = nullptr;
171 Line.reset(new UnwrappedLine);
172 CommentsBeforeNextToken.clear();
173 FormatTok = nullptr;
174 MustBreakBeforeNextToken = false;
175 IsDecltypeAutoFunction = false;
176 PreprocessorDirectives.clear();
177 CurrentLines = &Lines;
178 DeclarationScopeStack.clear();
179 NestedTooDeep.clear();
180 NestedLambdas.clear();
181 PPStack.clear();
182 Line->FirstStartColumn = FirstStartColumn;
183
184 if (!Unexpanded.empty())
185 for (FormatToken *Token : AllTokens)
186 Token->MacroCtx.reset();
187 CurrentExpandedLines.clear();
188 ExpandedLines.clear();
189 Unexpanded.clear();
190 InExpansion = false;
191 Reconstruct.reset();
192 }
193
parse()194 void UnwrappedLineParser::parse() {
195 IndexedTokenSource TokenSource(AllTokens);
196 Line->FirstStartColumn = FirstStartColumn;
197 do {
198 LLVM_DEBUG(llvm::dbgs() << "----\n");
199 reset();
200 Tokens = &TokenSource;
201 TokenSource.reset();
202
203 readToken();
204 parseFile();
205
206 // If we found an include guard then all preprocessor directives (other than
207 // the guard) are over-indented by one.
208 if (IncludeGuard == IG_Found) {
209 for (auto &Line : Lines)
210 if (Line.InPPDirective && Line.Level > 0)
211 --Line.Level;
212 }
213
214 // Create line with eof token.
215 assert(eof());
216 pushToken(FormatTok);
217 addUnwrappedLine();
218
219 // In a first run, format everything with the lines containing macro calls
220 // replaced by the expansion.
221 if (!ExpandedLines.empty()) {
222 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
223 for (const auto &Line : Lines) {
224 if (!Line.Tokens.empty()) {
225 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
226 if (it != ExpandedLines.end()) {
227 for (const auto &Expanded : it->second) {
228 LLVM_DEBUG(printDebugInfo(Expanded));
229 Callback.consumeUnwrappedLine(Expanded);
230 }
231 continue;
232 }
233 }
234 LLVM_DEBUG(printDebugInfo(Line));
235 Callback.consumeUnwrappedLine(Line);
236 }
237 Callback.finishRun();
238 }
239
240 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
241 for (const UnwrappedLine &Line : Lines) {
242 LLVM_DEBUG(printDebugInfo(Line));
243 Callback.consumeUnwrappedLine(Line);
244 }
245 Callback.finishRun();
246 Lines.clear();
247 while (!PPLevelBranchIndex.empty() &&
248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
251 }
252 if (!PPLevelBranchIndex.empty()) {
253 ++PPLevelBranchIndex.back();
254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
256 }
257 } while (!PPLevelBranchIndex.empty());
258 }
259
parseFile()260 void UnwrappedLineParser::parseFile() {
261 // The top-level context in a file always has declarations, except for pre-
262 // processor directives and JavaScript files.
263 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
264 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
265 MustBeDeclaration);
266 if (Style.Language == FormatStyle::LK_TextProto)
267 parseBracedList();
268 else
269 parseLevel();
270 // Make sure to format the remaining tokens.
271 //
272 // LK_TextProto is special since its top-level is parsed as the body of a
273 // braced list, which does not necessarily have natural line separators such
274 // as a semicolon. Comments after the last entry that have been determined to
275 // not belong to that line, as in:
276 // key: value
277 // // endfile comment
278 // do not have a chance to be put on a line of their own until this point.
279 // Here we add this newline before end-of-file comments.
280 if (Style.Language == FormatStyle::LK_TextProto &&
281 !CommentsBeforeNextToken.empty()) {
282 addUnwrappedLine();
283 }
284 flushComments(true);
285 addUnwrappedLine();
286 }
287
parseCSharpGenericTypeConstraint()288 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
289 do {
290 switch (FormatTok->Tok.getKind()) {
291 case tok::l_brace:
292 return;
293 default:
294 if (FormatTok->is(Keywords.kw_where)) {
295 addUnwrappedLine();
296 nextToken();
297 parseCSharpGenericTypeConstraint();
298 break;
299 }
300 nextToken();
301 break;
302 }
303 } while (!eof());
304 }
305
parseCSharpAttribute()306 void UnwrappedLineParser::parseCSharpAttribute() {
307 int UnpairedSquareBrackets = 1;
308 do {
309 switch (FormatTok->Tok.getKind()) {
310 case tok::r_square:
311 nextToken();
312 --UnpairedSquareBrackets;
313 if (UnpairedSquareBrackets == 0) {
314 addUnwrappedLine();
315 return;
316 }
317 break;
318 case tok::l_square:
319 ++UnpairedSquareBrackets;
320 nextToken();
321 break;
322 default:
323 nextToken();
324 break;
325 }
326 } while (!eof());
327 }
328
precededByCommentOrPPDirective() const329 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
330 if (!Lines.empty() && Lines.back().InPPDirective)
331 return true;
332
333 const FormatToken *Previous = Tokens->getPreviousToken();
334 return Previous && Previous->is(tok::comment) &&
335 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
336 }
337
338 /// \brief Parses a level, that is ???.
339 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
340 /// \param IfKind The \p if statement kind in the level.
341 /// \param IfLeftBrace The left brace of the \p if block in the level.
342 /// \returns true if a simple block of if/else/for/while, or false otherwise.
343 /// (A simple block has a single statement.)
parseLevel(const FormatToken * OpeningBrace,IfStmtKind * IfKind,FormatToken ** IfLeftBrace)344 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
345 IfStmtKind *IfKind,
346 FormatToken **IfLeftBrace) {
347 const bool InRequiresExpression =
348 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
349 const bool IsPrecededByCommentOrPPDirective =
350 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
351 FormatToken *IfLBrace = nullptr;
352 bool HasDoWhile = false;
353 bool HasLabel = false;
354 unsigned StatementCount = 0;
355 bool SwitchLabelEncountered = false;
356
357 do {
358 if (FormatTok->isAttribute()) {
359 nextToken();
360 continue;
361 }
362 tok::TokenKind kind = FormatTok->Tok.getKind();
363 if (FormatTok->getType() == TT_MacroBlockBegin)
364 kind = tok::l_brace;
365 else if (FormatTok->getType() == TT_MacroBlockEnd)
366 kind = tok::r_brace;
367
368 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
369 &HasLabel, &StatementCount] {
370 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
371 HasDoWhile ? nullptr : &HasDoWhile,
372 HasLabel ? nullptr : &HasLabel);
373 ++StatementCount;
374 assert(StatementCount > 0 && "StatementCount overflow!");
375 };
376
377 switch (kind) {
378 case tok::comment:
379 nextToken();
380 addUnwrappedLine();
381 break;
382 case tok::l_brace:
383 if (InRequiresExpression) {
384 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
385 } else if (FormatTok->Previous &&
386 FormatTok->Previous->ClosesRequiresClause) {
387 // We need the 'default' case here to correctly parse a function
388 // l_brace.
389 ParseDefault();
390 continue;
391 }
392 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin) &&
393 tryToParseBracedList()) {
394 continue;
395 }
396 parseBlock();
397 ++StatementCount;
398 assert(StatementCount > 0 && "StatementCount overflow!");
399 addUnwrappedLine();
400 break;
401 case tok::r_brace:
402 if (OpeningBrace) {
403 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
404 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
405 return false;
406 }
407 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
408 HasDoWhile || IsPrecededByCommentOrPPDirective ||
409 precededByCommentOrPPDirective()) {
410 return false;
411 }
412 const FormatToken *Next = Tokens->peekNextToken();
413 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
414 return false;
415 if (IfLeftBrace)
416 *IfLeftBrace = IfLBrace;
417 return true;
418 }
419 nextToken();
420 addUnwrappedLine();
421 break;
422 case tok::kw_default: {
423 unsigned StoredPosition = Tokens->getPosition();
424 FormatToken *Next;
425 do {
426 Next = Tokens->getNextToken();
427 assert(Next);
428 } while (Next->is(tok::comment));
429 FormatTok = Tokens->setPosition(StoredPosition);
430 if (Next->isNot(tok::colon)) {
431 // default not followed by ':' is not a case label; treat it like
432 // an identifier.
433 parseStructuralElement();
434 break;
435 }
436 // Else, if it is 'default:', fall through to the case handling.
437 [[fallthrough]];
438 }
439 case tok::kw_case:
440 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
441 (Style.isJavaScript() && Line->MustBeDeclaration)) {
442 // Proto: there are no switch/case statements
443 // Verilog: Case labels don't have this word. We handle case
444 // labels including default in TokenAnnotator.
445 // JavaScript: A 'case: string' style field declaration.
446 ParseDefault();
447 break;
448 }
449 if (!SwitchLabelEncountered &&
450 (Style.IndentCaseLabels ||
451 (Line->InPPDirective && Line->Level == 1))) {
452 ++Line->Level;
453 }
454 SwitchLabelEncountered = true;
455 parseStructuralElement();
456 break;
457 case tok::l_square:
458 if (Style.isCSharp()) {
459 nextToken();
460 parseCSharpAttribute();
461 break;
462 }
463 if (handleCppAttributes())
464 break;
465 [[fallthrough]];
466 default:
467 ParseDefault();
468 break;
469 }
470 } while (!eof());
471
472 return false;
473 }
474
calculateBraceTypes(bool ExpectClassBody)475 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
476 // We'll parse forward through the tokens until we hit
477 // a closing brace or eof - note that getNextToken() will
478 // parse macros, so this will magically work inside macro
479 // definitions, too.
480 unsigned StoredPosition = Tokens->getPosition();
481 FormatToken *Tok = FormatTok;
482 const FormatToken *PrevTok = Tok->Previous;
483 // Keep a stack of positions of lbrace tokens. We will
484 // update information about whether an lbrace starts a
485 // braced init list or a different block during the loop.
486 struct StackEntry {
487 FormatToken *Tok;
488 const FormatToken *PrevTok;
489 };
490 SmallVector<StackEntry, 8> LBraceStack;
491 assert(Tok->is(tok::l_brace));
492
493 do {
494 FormatToken *NextTok;
495 do {
496 NextTok = Tokens->getNextToken();
497 } while (NextTok->is(tok::comment));
498
499 if (!Line->InMacroBody) {
500 // Skip PPDirective lines and comments.
501 while (NextTok->is(tok::hash)) {
502 do {
503 NextTok = Tokens->getNextToken();
504 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof));
505
506 while (NextTok->is(tok::comment))
507 NextTok = Tokens->getNextToken();
508 }
509 }
510
511 switch (Tok->Tok.getKind()) {
512 case tok::l_brace:
513 if (Style.isJavaScript() && PrevTok) {
514 if (PrevTok->isOneOf(tok::colon, tok::less)) {
515 // A ':' indicates this code is in a type, or a braced list
516 // following a label in an object literal ({a: {b: 1}}).
517 // A '<' could be an object used in a comparison, but that is nonsense
518 // code (can never return true), so more likely it is a generic type
519 // argument (`X<{a: string; b: number}>`).
520 // The code below could be confused by semicolons between the
521 // individual members in a type member list, which would normally
522 // trigger BK_Block. In both cases, this must be parsed as an inline
523 // braced init.
524 Tok->setBlockKind(BK_BracedInit);
525 } else if (PrevTok->is(tok::r_paren)) {
526 // `) { }` can only occur in function or method declarations in JS.
527 Tok->setBlockKind(BK_Block);
528 }
529 } else {
530 Tok->setBlockKind(BK_Unknown);
531 }
532 LBraceStack.push_back({Tok, PrevTok});
533 break;
534 case tok::r_brace:
535 if (LBraceStack.empty())
536 break;
537 if (LBraceStack.back().Tok->is(BK_Unknown)) {
538 bool ProbablyBracedList = false;
539 if (Style.Language == FormatStyle::LK_Proto) {
540 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
541 } else {
542 // Using OriginalColumn to distinguish between ObjC methods and
543 // binary operators is a bit hacky.
544 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
545 NextTok->OriginalColumn == 0;
546
547 // Try to detect a braced list. Note that regardless how we mark inner
548 // braces here, we will overwrite the BlockKind later if we parse a
549 // braced list (where all blocks inside are by default braced lists),
550 // or when we explicitly detect blocks (for example while parsing
551 // lambdas).
552
553 // If we already marked the opening brace as braced list, the closing
554 // must also be part of it.
555 ProbablyBracedList = LBraceStack.back().Tok->is(TT_BracedListLBrace);
556
557 ProbablyBracedList = ProbablyBracedList ||
558 (Style.isJavaScript() &&
559 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
560 Keywords.kw_as));
561 ProbablyBracedList = ProbablyBracedList ||
562 (Style.isCpp() && NextTok->is(tok::l_paren));
563
564 // If there is a comma, semicolon or right paren after the closing
565 // brace, we assume this is a braced initializer list.
566 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
567 // braced list in JS.
568 ProbablyBracedList =
569 ProbablyBracedList ||
570 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
571 tok::r_paren, tok::r_square, tok::ellipsis);
572
573 // Distinguish between braced list in a constructor initializer list
574 // followed by constructor body, or just adjacent blocks.
575 ProbablyBracedList =
576 ProbablyBracedList ||
577 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
578 LBraceStack.back().PrevTok->isOneOf(tok::identifier,
579 tok::greater));
580
581 ProbablyBracedList =
582 ProbablyBracedList ||
583 (NextTok->is(tok::identifier) &&
584 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
585
586 ProbablyBracedList = ProbablyBracedList ||
587 (NextTok->is(tok::semi) &&
588 (!ExpectClassBody || LBraceStack.size() != 1));
589
590 ProbablyBracedList =
591 ProbablyBracedList ||
592 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
593
594 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
595 // We can have an array subscript after a braced init
596 // list, but C++11 attributes are expected after blocks.
597 NextTok = Tokens->getNextToken();
598 ProbablyBracedList = NextTok->isNot(tok::l_square);
599 }
600
601 // Cpp macro definition body that is a nonempty braced list or block:
602 if (Style.isCpp() && Line->InMacroBody && PrevTok != FormatTok &&
603 !FormatTok->Previous && NextTok->is(tok::eof) &&
604 // A statement can end with only `;` (simple statement), a block
605 // closing brace (compound statement), or `:` (label statement).
606 // If PrevTok is a block opening brace, Tok ends an empty block.
607 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
608 ProbablyBracedList = true;
609 }
610 }
611 if (ProbablyBracedList) {
612 Tok->setBlockKind(BK_BracedInit);
613 LBraceStack.back().Tok->setBlockKind(BK_BracedInit);
614 } else {
615 Tok->setBlockKind(BK_Block);
616 LBraceStack.back().Tok->setBlockKind(BK_Block);
617 }
618 }
619 LBraceStack.pop_back();
620 break;
621 case tok::identifier:
622 if (Tok->isNot(TT_StatementMacro))
623 break;
624 [[fallthrough]];
625 case tok::at:
626 case tok::semi:
627 case tok::kw_if:
628 case tok::kw_while:
629 case tok::kw_for:
630 case tok::kw_switch:
631 case tok::kw_try:
632 case tok::kw___try:
633 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
634 LBraceStack.back().Tok->setBlockKind(BK_Block);
635 break;
636 default:
637 break;
638 }
639
640 PrevTok = Tok;
641 Tok = NextTok;
642 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
643
644 // Assume other blocks for all unclosed opening braces.
645 for (const auto &Entry : LBraceStack)
646 if (Entry.Tok->is(BK_Unknown))
647 Entry.Tok->setBlockKind(BK_Block);
648
649 FormatTok = Tokens->setPosition(StoredPosition);
650 }
651
652 // Sets the token type of the directly previous right brace.
setPreviousRBraceType(TokenType Type)653 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
654 if (auto Prev = FormatTok->getPreviousNonComment();
655 Prev && Prev->is(tok::r_brace)) {
656 Prev->setFinalizedType(Type);
657 }
658 }
659
660 template <class T>
hash_combine(std::size_t & seed,const T & v)661 static inline void hash_combine(std::size_t &seed, const T &v) {
662 std::hash<T> hasher;
663 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
664 }
665
computePPHash() const666 size_t UnwrappedLineParser::computePPHash() const {
667 size_t h = 0;
668 for (const auto &i : PPStack) {
669 hash_combine(h, size_t(i.Kind));
670 hash_combine(h, i.Line);
671 }
672 return h;
673 }
674
675 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
676 // is not null, subtracts its length (plus the preceding space) when computing
677 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
678 // running the token annotator on it so that we can restore them afterward.
mightFitOnOneLine(UnwrappedLine & ParsedLine,const FormatToken * OpeningBrace) const679 bool UnwrappedLineParser::mightFitOnOneLine(
680 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
681 const auto ColumnLimit = Style.ColumnLimit;
682 if (ColumnLimit == 0)
683 return true;
684
685 auto &Tokens = ParsedLine.Tokens;
686 assert(!Tokens.empty());
687
688 const auto *LastToken = Tokens.back().Tok;
689 assert(LastToken);
690
691 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
692
693 int Index = 0;
694 for (const auto &Token : Tokens) {
695 assert(Token.Tok);
696 auto &SavedToken = SavedTokens[Index++];
697 SavedToken.Tok = new FormatToken;
698 SavedToken.Tok->copyFrom(*Token.Tok);
699 SavedToken.Children = std::move(Token.Children);
700 }
701
702 AnnotatedLine Line(ParsedLine);
703 assert(Line.Last == LastToken);
704
705 TokenAnnotator Annotator(Style, Keywords);
706 Annotator.annotate(Line);
707 Annotator.calculateFormattingInformation(Line);
708
709 auto Length = LastToken->TotalLength;
710 if (OpeningBrace) {
711 assert(OpeningBrace != Tokens.front().Tok);
712 if (auto Prev = OpeningBrace->Previous;
713 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
714 Length -= ColumnLimit;
715 }
716 Length -= OpeningBrace->TokenText.size() + 1;
717 }
718
719 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
720 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
721 Length -= FirstToken->TokenText.size() + 1;
722 }
723
724 Index = 0;
725 for (auto &Token : Tokens) {
726 const auto &SavedToken = SavedTokens[Index++];
727 Token.Tok->copyFrom(*SavedToken.Tok);
728 Token.Children = std::move(SavedToken.Children);
729 delete SavedToken.Tok;
730 }
731
732 // If these change PPLevel needs to be used for get correct indentation.
733 assert(!Line.InMacroBody);
734 assert(!Line.InPPDirective);
735 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
736 }
737
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool KeepBraces,IfStmtKind * IfKind,bool UnindentWhitesmithsBraces)738 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
739 unsigned AddLevels, bool MunchSemi,
740 bool KeepBraces,
741 IfStmtKind *IfKind,
742 bool UnindentWhitesmithsBraces) {
743 auto HandleVerilogBlockLabel = [this]() {
744 // ":" name
745 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
746 nextToken();
747 if (Keywords.isVerilogIdentifier(*FormatTok))
748 nextToken();
749 }
750 };
751
752 // Whether this is a Verilog-specific block that has a special header like a
753 // module.
754 const bool VerilogHierarchy =
755 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
756 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
757 (Style.isVerilog() &&
758 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
759 "'{' or macro block token expected");
760 FormatToken *Tok = FormatTok;
761 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
762 auto Index = CurrentLines->size();
763 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
764 FormatTok->setBlockKind(BK_Block);
765
766 // For Whitesmiths mode, jump to the next level prior to skipping over the
767 // braces.
768 if (!VerilogHierarchy && AddLevels > 0 &&
769 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
770 ++Line->Level;
771 }
772
773 size_t PPStartHash = computePPHash();
774
775 const unsigned InitialLevel = Line->Level;
776 if (VerilogHierarchy) {
777 AddLevels += parseVerilogHierarchyHeader();
778 } else {
779 nextToken(/*LevelDifference=*/AddLevels);
780 HandleVerilogBlockLabel();
781 }
782
783 // Bail out if there are too many levels. Otherwise, the stack might overflow.
784 if (Line->Level > 300)
785 return nullptr;
786
787 if (MacroBlock && FormatTok->is(tok::l_paren))
788 parseParens();
789
790 size_t NbPreprocessorDirectives =
791 !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
792 addUnwrappedLine();
793 size_t OpeningLineIndex =
794 CurrentLines->empty()
795 ? (UnwrappedLine::kInvalidIndex)
796 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
797
798 // Whitesmiths is weird here. The brace needs to be indented for the namespace
799 // block, but the block itself may not be indented depending on the style
800 // settings. This allows the format to back up one level in those cases.
801 if (UnindentWhitesmithsBraces)
802 --Line->Level;
803
804 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
805 MustBeDeclaration);
806 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
807 Line->Level += AddLevels;
808
809 FormatToken *IfLBrace = nullptr;
810 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
811
812 if (eof())
813 return IfLBrace;
814
815 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
816 : FormatTok->isNot(tok::r_brace)) {
817 Line->Level = InitialLevel;
818 FormatTok->setBlockKind(BK_Block);
819 return IfLBrace;
820 }
821
822 if (FormatTok->is(tok::r_brace) && Tok->is(TT_NamespaceLBrace))
823 FormatTok->setFinalizedType(TT_NamespaceRBrace);
824
825 const bool IsFunctionRBrace =
826 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
827
828 auto RemoveBraces = [=]() mutable {
829 if (!SimpleBlock)
830 return false;
831 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
832 assert(FormatTok->is(tok::r_brace));
833 const bool WrappedOpeningBrace = !Tok->Previous;
834 if (WrappedOpeningBrace && FollowedByComment)
835 return false;
836 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
837 if (KeepBraces && !HasRequiredIfBraces)
838 return false;
839 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
840 const FormatToken *Previous = Tokens->getPreviousToken();
841 assert(Previous);
842 if (Previous->is(tok::r_brace) && !Previous->Optional)
843 return false;
844 }
845 assert(!CurrentLines->empty());
846 auto &LastLine = CurrentLines->back();
847 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
848 return false;
849 if (Tok->is(TT_ElseLBrace))
850 return true;
851 if (WrappedOpeningBrace) {
852 assert(Index > 0);
853 --Index; // The line above the wrapped l_brace.
854 Tok = nullptr;
855 }
856 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
857 };
858 if (RemoveBraces()) {
859 Tok->MatchingParen = FormatTok;
860 FormatTok->MatchingParen = Tok;
861 }
862
863 size_t PPEndHash = computePPHash();
864
865 // Munch the closing brace.
866 nextToken(/*LevelDifference=*/-AddLevels);
867
868 // When this is a function block and there is an unnecessary semicolon
869 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
870 // it later).
871 if (Style.RemoveSemicolon && IsFunctionRBrace) {
872 while (FormatTok->is(tok::semi)) {
873 FormatTok->Optional = true;
874 nextToken();
875 }
876 }
877
878 HandleVerilogBlockLabel();
879
880 if (MacroBlock && FormatTok->is(tok::l_paren))
881 parseParens();
882
883 Line->Level = InitialLevel;
884
885 if (FormatTok->is(tok::kw_noexcept)) {
886 // A noexcept in a requires expression.
887 nextToken();
888 }
889
890 if (FormatTok->is(tok::arrow)) {
891 // Following the } or noexcept we can find a trailing return type arrow
892 // as part of an implicit conversion constraint.
893 nextToken();
894 parseStructuralElement();
895 }
896
897 if (MunchSemi && FormatTok->is(tok::semi))
898 nextToken();
899
900 if (PPStartHash == PPEndHash) {
901 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
902 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
903 // Update the opening line to add the forward reference as well
904 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
905 CurrentLines->size() - 1;
906 }
907 }
908
909 return IfLBrace;
910 }
911
isGoogScope(const UnwrappedLine & Line)912 static bool isGoogScope(const UnwrappedLine &Line) {
913 // FIXME: Closure-library specific stuff should not be hard-coded but be
914 // configurable.
915 if (Line.Tokens.size() < 4)
916 return false;
917 auto I = Line.Tokens.begin();
918 if (I->Tok->TokenText != "goog")
919 return false;
920 ++I;
921 if (I->Tok->isNot(tok::period))
922 return false;
923 ++I;
924 if (I->Tok->TokenText != "scope")
925 return false;
926 ++I;
927 return I->Tok->is(tok::l_paren);
928 }
929
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)930 static bool isIIFE(const UnwrappedLine &Line,
931 const AdditionalKeywords &Keywords) {
932 // Look for the start of an immediately invoked anonymous function.
933 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
934 // This is commonly done in JavaScript to create a new, anonymous scope.
935 // Example: (function() { ... })()
936 if (Line.Tokens.size() < 3)
937 return false;
938 auto I = Line.Tokens.begin();
939 if (I->Tok->isNot(tok::l_paren))
940 return false;
941 ++I;
942 if (I->Tok->isNot(Keywords.kw_function))
943 return false;
944 ++I;
945 return I->Tok->is(tok::l_paren);
946 }
947
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)948 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
949 const FormatToken &InitialToken) {
950 tok::TokenKind Kind = InitialToken.Tok.getKind();
951 if (InitialToken.is(TT_NamespaceMacro))
952 Kind = tok::kw_namespace;
953
954 switch (Kind) {
955 case tok::kw_namespace:
956 return Style.BraceWrapping.AfterNamespace;
957 case tok::kw_class:
958 return Style.BraceWrapping.AfterClass;
959 case tok::kw_union:
960 return Style.BraceWrapping.AfterUnion;
961 case tok::kw_struct:
962 return Style.BraceWrapping.AfterStruct;
963 case tok::kw_enum:
964 return Style.BraceWrapping.AfterEnum;
965 default:
966 return false;
967 }
968 }
969
parseChildBlock()970 void UnwrappedLineParser::parseChildBlock() {
971 assert(FormatTok->is(tok::l_brace));
972 FormatTok->setBlockKind(BK_Block);
973 const FormatToken *OpeningBrace = FormatTok;
974 nextToken();
975 {
976 bool SkipIndent = (Style.isJavaScript() &&
977 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
978 ScopedLineState LineState(*this);
979 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
980 /*MustBeDeclaration=*/false);
981 Line->Level += SkipIndent ? 0 : 1;
982 parseLevel(OpeningBrace);
983 flushComments(isOnNewLine(*FormatTok));
984 Line->Level -= SkipIndent ? 0 : 1;
985 }
986 nextToken();
987 }
988
parsePPDirective()989 void UnwrappedLineParser::parsePPDirective() {
990 assert(FormatTok->is(tok::hash) && "'#' expected");
991 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
992
993 nextToken();
994
995 if (!FormatTok->Tok.getIdentifierInfo()) {
996 parsePPUnknown();
997 return;
998 }
999
1000 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1001 case tok::pp_define:
1002 parsePPDefine();
1003 return;
1004 case tok::pp_if:
1005 parsePPIf(/*IfDef=*/false);
1006 break;
1007 case tok::pp_ifdef:
1008 case tok::pp_ifndef:
1009 parsePPIf(/*IfDef=*/true);
1010 break;
1011 case tok::pp_else:
1012 case tok::pp_elifdef:
1013 case tok::pp_elifndef:
1014 case tok::pp_elif:
1015 parsePPElse();
1016 break;
1017 case tok::pp_endif:
1018 parsePPEndIf();
1019 break;
1020 case tok::pp_pragma:
1021 parsePPPragma();
1022 break;
1023 default:
1024 parsePPUnknown();
1025 break;
1026 }
1027 }
1028
conditionalCompilationCondition(bool Unreachable)1029 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1030 size_t Line = CurrentLines->size();
1031 if (CurrentLines == &PreprocessorDirectives)
1032 Line += Lines.size();
1033
1034 if (Unreachable ||
1035 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1036 PPStack.push_back({PP_Unreachable, Line});
1037 } else {
1038 PPStack.push_back({PP_Conditional, Line});
1039 }
1040 }
1041
conditionalCompilationStart(bool Unreachable)1042 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1043 ++PPBranchLevel;
1044 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1045 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1046 PPLevelBranchIndex.push_back(0);
1047 PPLevelBranchCount.push_back(0);
1048 }
1049 PPChainBranchIndex.push(Unreachable ? -1 : 0);
1050 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1051 conditionalCompilationCondition(Unreachable || Skip);
1052 }
1053
conditionalCompilationAlternative()1054 void UnwrappedLineParser::conditionalCompilationAlternative() {
1055 if (!PPStack.empty())
1056 PPStack.pop_back();
1057 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1058 if (!PPChainBranchIndex.empty())
1059 ++PPChainBranchIndex.top();
1060 conditionalCompilationCondition(
1061 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1062 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1063 }
1064
conditionalCompilationEnd()1065 void UnwrappedLineParser::conditionalCompilationEnd() {
1066 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1067 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1068 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1069 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1070 }
1071 // Guard against #endif's without #if.
1072 if (PPBranchLevel > -1)
1073 --PPBranchLevel;
1074 if (!PPChainBranchIndex.empty())
1075 PPChainBranchIndex.pop();
1076 if (!PPStack.empty())
1077 PPStack.pop_back();
1078 }
1079
parsePPIf(bool IfDef)1080 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1081 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1082 nextToken();
1083 bool Unreachable = false;
1084 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1085 Unreachable = true;
1086 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1087 Unreachable = true;
1088 conditionalCompilationStart(Unreachable);
1089 FormatToken *IfCondition = FormatTok;
1090 // If there's a #ifndef on the first line, and the only lines before it are
1091 // comments, it could be an include guard.
1092 bool MaybeIncludeGuard = IfNDef;
1093 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1094 for (auto &Line : Lines) {
1095 if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1096 MaybeIncludeGuard = false;
1097 IncludeGuard = IG_Rejected;
1098 break;
1099 }
1100 }
1101 }
1102 --PPBranchLevel;
1103 parsePPUnknown();
1104 ++PPBranchLevel;
1105 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1106 IncludeGuard = IG_IfNdefed;
1107 IncludeGuardToken = IfCondition;
1108 }
1109 }
1110
parsePPElse()1111 void UnwrappedLineParser::parsePPElse() {
1112 // If a potential include guard has an #else, it's not an include guard.
1113 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1114 IncludeGuard = IG_Rejected;
1115 // Don't crash when there is an #else without an #if.
1116 assert(PPBranchLevel >= -1);
1117 if (PPBranchLevel == -1)
1118 conditionalCompilationStart(/*Unreachable=*/true);
1119 conditionalCompilationAlternative();
1120 --PPBranchLevel;
1121 parsePPUnknown();
1122 ++PPBranchLevel;
1123 }
1124
parsePPEndIf()1125 void UnwrappedLineParser::parsePPEndIf() {
1126 conditionalCompilationEnd();
1127 parsePPUnknown();
1128 // If the #endif of a potential include guard is the last thing in the file,
1129 // then we found an include guard.
1130 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1131 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1132 IncludeGuard = IG_Found;
1133 }
1134 }
1135
parsePPDefine()1136 void UnwrappedLineParser::parsePPDefine() {
1137 nextToken();
1138
1139 if (!FormatTok->Tok.getIdentifierInfo()) {
1140 IncludeGuard = IG_Rejected;
1141 IncludeGuardToken = nullptr;
1142 parsePPUnknown();
1143 return;
1144 }
1145
1146 if (IncludeGuard == IG_IfNdefed &&
1147 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1148 IncludeGuard = IG_Defined;
1149 IncludeGuardToken = nullptr;
1150 for (auto &Line : Lines) {
1151 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1152 IncludeGuard = IG_Rejected;
1153 break;
1154 }
1155 }
1156 }
1157
1158 // In the context of a define, even keywords should be treated as normal
1159 // identifiers. Setting the kind to identifier is not enough, because we need
1160 // to treat additional keywords like __except as well, which are already
1161 // identifiers. Setting the identifier info to null interferes with include
1162 // guard processing above, and changes preprocessing nesting.
1163 FormatTok->Tok.setKind(tok::identifier);
1164 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1165 nextToken();
1166 if (FormatTok->Tok.getKind() == tok::l_paren &&
1167 !FormatTok->hasWhitespaceBefore()) {
1168 parseParens();
1169 }
1170 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1171 Line->Level += PPBranchLevel + 1;
1172 addUnwrappedLine();
1173 ++Line->Level;
1174
1175 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1176 assert((int)Line->PPLevel >= 0);
1177 Line->InMacroBody = true;
1178
1179 if (Style.SkipMacroDefinitionBody) {
1180 do {
1181 FormatTok->Finalized = true;
1182 nextToken();
1183 } while (!eof());
1184 addUnwrappedLine();
1185 return;
1186 }
1187
1188 if (FormatTok->is(tok::identifier) &&
1189 Tokens->peekNextToken()->is(tok::colon)) {
1190 nextToken();
1191 nextToken();
1192 }
1193
1194 // Errors during a preprocessor directive can only affect the layout of the
1195 // preprocessor directive, and thus we ignore them. An alternative approach
1196 // would be to use the same approach we use on the file level (no
1197 // re-indentation if there was a structural error) within the macro
1198 // definition.
1199 parseFile();
1200 }
1201
parsePPPragma()1202 void UnwrappedLineParser::parsePPPragma() {
1203 Line->InPragmaDirective = true;
1204 parsePPUnknown();
1205 }
1206
parsePPUnknown()1207 void UnwrappedLineParser::parsePPUnknown() {
1208 do {
1209 nextToken();
1210 } while (!eof());
1211 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1212 Line->Level += PPBranchLevel + 1;
1213 addUnwrappedLine();
1214 }
1215
1216 // Here we exclude certain tokens that are not usually the first token in an
1217 // unwrapped line. This is used in attempt to distinguish macro calls without
1218 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)1219 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1220 // Semicolon can be a null-statement, l_square can be a start of a macro or
1221 // a C++11 attribute, but this doesn't seem to be common.
1222 assert(Tok.isNot(TT_AttributeSquare));
1223 return !Tok.isOneOf(tok::semi, tok::l_brace,
1224 // Tokens that can only be used as binary operators and a
1225 // part of overloaded operator names.
1226 tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1227 tok::less, tok::greater, tok::slash, tok::percent,
1228 tok::lessless, tok::greatergreater, tok::equal,
1229 tok::plusequal, tok::minusequal, tok::starequal,
1230 tok::slashequal, tok::percentequal, tok::ampequal,
1231 tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1232 tok::lesslessequal,
1233 // Colon is used in labels, base class lists, initializer
1234 // lists, range-based for loops, ternary operator, but
1235 // should never be the first token in an unwrapped line.
1236 tok::colon,
1237 // 'noexcept' is a trailing annotation.
1238 tok::kw_noexcept);
1239 }
1240
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1241 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1242 const FormatToken *FormatTok) {
1243 // FIXME: This returns true for C/C++ keywords like 'struct'.
1244 return FormatTok->is(tok::identifier) &&
1245 (!FormatTok->Tok.getIdentifierInfo() ||
1246 !FormatTok->isOneOf(
1247 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1248 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1249 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1250 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1251 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1252 Keywords.kw_instanceof, Keywords.kw_interface,
1253 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1254 }
1255
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1256 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1257 const FormatToken *FormatTok) {
1258 return FormatTok->Tok.isLiteral() ||
1259 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1260 mustBeJSIdent(Keywords, FormatTok);
1261 }
1262
1263 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1264 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1265 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1266 const FormatToken *FormatTok) {
1267 return FormatTok->isOneOf(
1268 tok::kw_return, Keywords.kw_yield,
1269 // conditionals
1270 tok::kw_if, tok::kw_else,
1271 // loops
1272 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1273 // switch/case
1274 tok::kw_switch, tok::kw_case,
1275 // exceptions
1276 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1277 // declaration
1278 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1279 Keywords.kw_async, Keywords.kw_function,
1280 // import/export
1281 Keywords.kw_import, tok::kw_export);
1282 }
1283
1284 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)1285 static bool isC78Type(const FormatToken &Tok) {
1286 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1287 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1288 tok::identifier);
1289 }
1290
1291 // This function checks whether a token starts the first parameter declaration
1292 // in a K&R C (aka C78) function definition, e.g.:
1293 // int f(a, b)
1294 // short a, b;
1295 // {
1296 // return a + b;
1297 // }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1298 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1299 const FormatToken *FuncName) {
1300 assert(Tok);
1301 assert(Next);
1302 assert(FuncName);
1303
1304 if (FuncName->isNot(tok::identifier))
1305 return false;
1306
1307 const FormatToken *Prev = FuncName->Previous;
1308 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1309 return false;
1310
1311 if (!isC78Type(*Tok) &&
1312 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1313 return false;
1314 }
1315
1316 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1317 return false;
1318
1319 Tok = Tok->Previous;
1320 if (!Tok || Tok->isNot(tok::r_paren))
1321 return false;
1322
1323 Tok = Tok->Previous;
1324 if (!Tok || Tok->isNot(tok::identifier))
1325 return false;
1326
1327 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1328 }
1329
parseModuleImport()1330 bool UnwrappedLineParser::parseModuleImport() {
1331 assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1332
1333 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1334 !Token->Tok.getIdentifierInfo() &&
1335 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1336 return false;
1337 }
1338
1339 nextToken();
1340 while (!eof()) {
1341 if (FormatTok->is(tok::colon)) {
1342 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1343 }
1344 // Handle import <foo/bar.h> as we would an include statement.
1345 else if (FormatTok->is(tok::less)) {
1346 nextToken();
1347 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1348 // Mark tokens up to the trailing line comments as implicit string
1349 // literals.
1350 if (FormatTok->isNot(tok::comment) &&
1351 !FormatTok->TokenText.starts_with("//")) {
1352 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1353 }
1354 nextToken();
1355 }
1356 }
1357 if (FormatTok->is(tok::semi)) {
1358 nextToken();
1359 break;
1360 }
1361 nextToken();
1362 }
1363
1364 addUnwrappedLine();
1365 return true;
1366 }
1367
1368 // readTokenWithJavaScriptASI reads the next token and terminates the current
1369 // line if JavaScript Automatic Semicolon Insertion must
1370 // happen between the current token and the next token.
1371 //
1372 // This method is conservative - it cannot cover all edge cases of JavaScript,
1373 // but only aims to correctly handle certain well known cases. It *must not*
1374 // return true in speculative cases.
readTokenWithJavaScriptASI()1375 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1376 FormatToken *Previous = FormatTok;
1377 readToken();
1378 FormatToken *Next = FormatTok;
1379
1380 bool IsOnSameLine =
1381 CommentsBeforeNextToken.empty()
1382 ? Next->NewlinesBefore == 0
1383 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1384 if (IsOnSameLine)
1385 return;
1386
1387 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1388 bool PreviousStartsTemplateExpr =
1389 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1390 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1391 // If the line contains an '@' sign, the previous token might be an
1392 // annotation, which can precede another identifier/value.
1393 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1394 return LineNode.Tok->is(tok::at);
1395 });
1396 if (HasAt)
1397 return;
1398 }
1399 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1400 return addUnwrappedLine();
1401 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1402 bool NextEndsTemplateExpr =
1403 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1404 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1405 (PreviousMustBeValue ||
1406 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1407 tok::minusminus))) {
1408 return addUnwrappedLine();
1409 }
1410 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1411 isJSDeclOrStmt(Keywords, Next)) {
1412 return addUnwrappedLine();
1413 }
1414 }
1415
parseStructuralElement(const FormatToken * OpeningBrace,IfStmtKind * IfKind,FormatToken ** IfLeftBrace,bool * HasDoWhile,bool * HasLabel)1416 void UnwrappedLineParser::parseStructuralElement(
1417 const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1418 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1419 if (Style.Language == FormatStyle::LK_TableGen &&
1420 FormatTok->is(tok::pp_include)) {
1421 nextToken();
1422 if (FormatTok->is(tok::string_literal))
1423 nextToken();
1424 addUnwrappedLine();
1425 return;
1426 }
1427
1428 if (Style.isCpp()) {
1429 while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1430 }
1431 } else if (Style.isVerilog()) {
1432 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1433 parseForOrWhileLoop(/*HasParens=*/false);
1434 return;
1435 }
1436 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1437 parseForOrWhileLoop();
1438 return;
1439 }
1440 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1441 Keywords.kw_assume, Keywords.kw_cover)) {
1442 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1443 return;
1444 }
1445
1446 // Skip things that can exist before keywords like 'if' and 'case'.
1447 while (true) {
1448 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1449 Keywords.kw_unique0)) {
1450 nextToken();
1451 } else if (FormatTok->is(tok::l_paren) &&
1452 Tokens->peekNextToken()->is(tok::star)) {
1453 parseParens();
1454 } else {
1455 break;
1456 }
1457 }
1458 }
1459
1460 // Tokens that only make sense at the beginning of a line.
1461 switch (FormatTok->Tok.getKind()) {
1462 case tok::kw_asm:
1463 nextToken();
1464 if (FormatTok->is(tok::l_brace)) {
1465 FormatTok->setFinalizedType(TT_InlineASMBrace);
1466 nextToken();
1467 while (FormatTok && !eof()) {
1468 if (FormatTok->is(tok::r_brace)) {
1469 FormatTok->setFinalizedType(TT_InlineASMBrace);
1470 nextToken();
1471 addUnwrappedLine();
1472 break;
1473 }
1474 FormatTok->Finalized = true;
1475 nextToken();
1476 }
1477 }
1478 break;
1479 case tok::kw_namespace:
1480 parseNamespace();
1481 return;
1482 case tok::kw_public:
1483 case tok::kw_protected:
1484 case tok::kw_private:
1485 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1486 Style.isCSharp()) {
1487 nextToken();
1488 } else {
1489 parseAccessSpecifier();
1490 }
1491 return;
1492 case tok::kw_if: {
1493 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1494 // field/method declaration.
1495 break;
1496 }
1497 FormatToken *Tok = parseIfThenElse(IfKind);
1498 if (IfLeftBrace)
1499 *IfLeftBrace = Tok;
1500 return;
1501 }
1502 case tok::kw_for:
1503 case tok::kw_while:
1504 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1505 // field/method declaration.
1506 break;
1507 }
1508 parseForOrWhileLoop();
1509 return;
1510 case tok::kw_do:
1511 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1512 // field/method declaration.
1513 break;
1514 }
1515 parseDoWhile();
1516 if (HasDoWhile)
1517 *HasDoWhile = true;
1518 return;
1519 case tok::kw_switch:
1520 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1521 // 'switch: string' field declaration.
1522 break;
1523 }
1524 parseSwitch();
1525 return;
1526 case tok::kw_default:
1527 // In Verilog default along with other labels are handled in the next loop.
1528 if (Style.isVerilog())
1529 break;
1530 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1531 // 'default: string' field declaration.
1532 break;
1533 }
1534 nextToken();
1535 if (FormatTok->is(tok::colon)) {
1536 FormatTok->setFinalizedType(TT_CaseLabelColon);
1537 parseLabel();
1538 return;
1539 }
1540 // e.g. "default void f() {}" in a Java interface.
1541 break;
1542 case tok::kw_case:
1543 // Proto: there are no switch/case statements.
1544 if (Style.Language == FormatStyle::LK_Proto) {
1545 nextToken();
1546 return;
1547 }
1548 if (Style.isVerilog()) {
1549 parseBlock();
1550 addUnwrappedLine();
1551 return;
1552 }
1553 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1554 // 'case: string' field declaration.
1555 nextToken();
1556 break;
1557 }
1558 parseCaseLabel();
1559 return;
1560 case tok::kw_try:
1561 case tok::kw___try:
1562 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1563 // field/method declaration.
1564 break;
1565 }
1566 parseTryCatch();
1567 return;
1568 case tok::kw_extern:
1569 nextToken();
1570 if (Style.isVerilog()) {
1571 // In Verilog and extern module declaration looks like a start of module.
1572 // But there is no body and endmodule. So we handle it separately.
1573 if (Keywords.isVerilogHierarchy(*FormatTok)) {
1574 parseVerilogHierarchyHeader();
1575 return;
1576 }
1577 } else if (FormatTok->is(tok::string_literal)) {
1578 nextToken();
1579 if (FormatTok->is(tok::l_brace)) {
1580 if (Style.BraceWrapping.AfterExternBlock)
1581 addUnwrappedLine();
1582 // Either we indent or for backwards compatibility we follow the
1583 // AfterExternBlock style.
1584 unsigned AddLevels =
1585 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1586 (Style.BraceWrapping.AfterExternBlock &&
1587 Style.IndentExternBlock ==
1588 FormatStyle::IEBS_AfterExternBlock)
1589 ? 1u
1590 : 0u;
1591 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1592 addUnwrappedLine();
1593 return;
1594 }
1595 }
1596 break;
1597 case tok::kw_export:
1598 if (Style.isJavaScript()) {
1599 parseJavaScriptEs6ImportExport();
1600 return;
1601 }
1602 if (Style.isCpp()) {
1603 nextToken();
1604 if (FormatTok->is(tok::kw_namespace)) {
1605 parseNamespace();
1606 return;
1607 }
1608 if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1609 return;
1610 }
1611 break;
1612 case tok::kw_inline:
1613 nextToken();
1614 if (FormatTok->is(tok::kw_namespace)) {
1615 parseNamespace();
1616 return;
1617 }
1618 break;
1619 case tok::identifier:
1620 if (FormatTok->is(TT_ForEachMacro)) {
1621 parseForOrWhileLoop();
1622 return;
1623 }
1624 if (FormatTok->is(TT_MacroBlockBegin)) {
1625 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1626 /*MunchSemi=*/false);
1627 return;
1628 }
1629 if (FormatTok->is(Keywords.kw_import)) {
1630 if (Style.isJavaScript()) {
1631 parseJavaScriptEs6ImportExport();
1632 return;
1633 }
1634 if (Style.Language == FormatStyle::LK_Proto) {
1635 nextToken();
1636 if (FormatTok->is(tok::kw_public))
1637 nextToken();
1638 if (FormatTok->isNot(tok::string_literal))
1639 return;
1640 nextToken();
1641 if (FormatTok->is(tok::semi))
1642 nextToken();
1643 addUnwrappedLine();
1644 return;
1645 }
1646 if (Style.isCpp() && parseModuleImport())
1647 return;
1648 }
1649 if (Style.isCpp() &&
1650 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1651 Keywords.kw_slots, Keywords.kw_qslots)) {
1652 nextToken();
1653 if (FormatTok->is(tok::colon)) {
1654 nextToken();
1655 addUnwrappedLine();
1656 return;
1657 }
1658 }
1659 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1660 parseStatementMacro();
1661 return;
1662 }
1663 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1664 parseNamespace();
1665 return;
1666 }
1667 // In Verilog labels can be any expression, so we don't do them here.
1668 // JS doesn't have macros, and within classes colons indicate fields, not
1669 // labels.
1670 // TableGen doesn't have labels.
1671 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1672 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1673 nextToken();
1674 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1675 FormatTok->setFinalizedType(TT_GotoLabelColon);
1676 parseLabel(!Style.IndentGotoLabels);
1677 if (HasLabel)
1678 *HasLabel = true;
1679 return;
1680 }
1681 // In all other cases, parse the declaration.
1682 break;
1683 default:
1684 break;
1685 }
1686
1687 const bool InRequiresExpression =
1688 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1689 do {
1690 const FormatToken *Previous = FormatTok->Previous;
1691 switch (FormatTok->Tok.getKind()) {
1692 case tok::at:
1693 nextToken();
1694 if (FormatTok->is(tok::l_brace)) {
1695 nextToken();
1696 parseBracedList();
1697 break;
1698 } else if (Style.Language == FormatStyle::LK_Java &&
1699 FormatTok->is(Keywords.kw_interface)) {
1700 nextToken();
1701 break;
1702 }
1703 switch (FormatTok->Tok.getObjCKeywordID()) {
1704 case tok::objc_public:
1705 case tok::objc_protected:
1706 case tok::objc_package:
1707 case tok::objc_private:
1708 return parseAccessSpecifier();
1709 case tok::objc_interface:
1710 case tok::objc_implementation:
1711 return parseObjCInterfaceOrImplementation();
1712 case tok::objc_protocol:
1713 if (parseObjCProtocol())
1714 return;
1715 break;
1716 case tok::objc_end:
1717 return; // Handled by the caller.
1718 case tok::objc_optional:
1719 case tok::objc_required:
1720 nextToken();
1721 addUnwrappedLine();
1722 return;
1723 case tok::objc_autoreleasepool:
1724 nextToken();
1725 if (FormatTok->is(tok::l_brace)) {
1726 if (Style.BraceWrapping.AfterControlStatement ==
1727 FormatStyle::BWACS_Always) {
1728 addUnwrappedLine();
1729 }
1730 parseBlock();
1731 }
1732 addUnwrappedLine();
1733 return;
1734 case tok::objc_synchronized:
1735 nextToken();
1736 if (FormatTok->is(tok::l_paren)) {
1737 // Skip synchronization object
1738 parseParens();
1739 }
1740 if (FormatTok->is(tok::l_brace)) {
1741 if (Style.BraceWrapping.AfterControlStatement ==
1742 FormatStyle::BWACS_Always) {
1743 addUnwrappedLine();
1744 }
1745 parseBlock();
1746 }
1747 addUnwrappedLine();
1748 return;
1749 case tok::objc_try:
1750 // This branch isn't strictly necessary (the kw_try case below would
1751 // do this too after the tok::at is parsed above). But be explicit.
1752 parseTryCatch();
1753 return;
1754 default:
1755 break;
1756 }
1757 break;
1758 case tok::kw_requires: {
1759 if (Style.isCpp()) {
1760 bool ParsedClause = parseRequires();
1761 if (ParsedClause)
1762 return;
1763 } else {
1764 nextToken();
1765 }
1766 break;
1767 }
1768 case tok::kw_enum:
1769 // Ignore if this is part of "template <enum ...".
1770 if (Previous && Previous->is(tok::less)) {
1771 nextToken();
1772 break;
1773 }
1774
1775 // parseEnum falls through and does not yet add an unwrapped line as an
1776 // enum definition can start a structural element.
1777 if (!parseEnum())
1778 break;
1779 // This only applies to C++ and Verilog.
1780 if (!Style.isCpp() && !Style.isVerilog()) {
1781 addUnwrappedLine();
1782 return;
1783 }
1784 break;
1785 case tok::kw_typedef:
1786 nextToken();
1787 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1788 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1789 Keywords.kw_CF_CLOSED_ENUM,
1790 Keywords.kw_NS_CLOSED_ENUM)) {
1791 parseEnum();
1792 }
1793 break;
1794 case tok::kw_class:
1795 if (Style.isVerilog()) {
1796 parseBlock();
1797 addUnwrappedLine();
1798 return;
1799 }
1800 if (Style.isTableGen()) {
1801 // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1802 // This is same as def and so on.
1803 nextToken();
1804 break;
1805 }
1806 [[fallthrough]];
1807 case tok::kw_struct:
1808 case tok::kw_union:
1809 if (parseStructLike())
1810 return;
1811 break;
1812 case tok::kw_decltype:
1813 nextToken();
1814 if (FormatTok->is(tok::l_paren)) {
1815 parseParens();
1816 assert(FormatTok->Previous);
1817 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1818 tok::l_paren)) {
1819 Line->SeenDecltypeAuto = true;
1820 }
1821 }
1822 break;
1823 case tok::period:
1824 nextToken();
1825 // In Java, classes have an implicit static member "class".
1826 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1827 FormatTok->is(tok::kw_class)) {
1828 nextToken();
1829 }
1830 if (Style.isJavaScript() && FormatTok &&
1831 FormatTok->Tok.getIdentifierInfo()) {
1832 // JavaScript only has pseudo keywords, all keywords are allowed to
1833 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1834 nextToken();
1835 }
1836 break;
1837 case tok::semi:
1838 nextToken();
1839 addUnwrappedLine();
1840 return;
1841 case tok::r_brace:
1842 addUnwrappedLine();
1843 return;
1844 case tok::l_paren: {
1845 parseParens();
1846 // Break the unwrapped line if a K&R C function definition has a parameter
1847 // declaration.
1848 if (OpeningBrace || !Style.isCpp() || !Previous || eof())
1849 break;
1850 if (isC78ParameterDecl(FormatTok,
1851 Tokens->peekNextToken(/*SkipComment=*/true),
1852 Previous)) {
1853 addUnwrappedLine();
1854 return;
1855 }
1856 break;
1857 }
1858 case tok::kw_operator:
1859 nextToken();
1860 if (FormatTok->isBinaryOperator())
1861 nextToken();
1862 break;
1863 case tok::caret:
1864 nextToken();
1865 // Block return type.
1866 if (FormatTok->Tok.isAnyIdentifier() ||
1867 FormatTok->isSimpleTypeSpecifier()) {
1868 nextToken();
1869 // Return types: pointers are ok too.
1870 while (FormatTok->is(tok::star))
1871 nextToken();
1872 }
1873 // Block argument list.
1874 if (FormatTok->is(tok::l_paren))
1875 parseParens();
1876 // Block body.
1877 if (FormatTok->is(tok::l_brace))
1878 parseChildBlock();
1879 break;
1880 case tok::l_brace:
1881 if (InRequiresExpression)
1882 FormatTok->setFinalizedType(TT_BracedListLBrace);
1883 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1884 IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1885 // A block outside of parentheses must be the last part of a
1886 // structural element.
1887 // FIXME: Figure out cases where this is not true, and add projections
1888 // for them (the one we know is missing are lambdas).
1889 if (Style.Language == FormatStyle::LK_Java &&
1890 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1891 // If necessary, we could set the type to something different than
1892 // TT_FunctionLBrace.
1893 if (Style.BraceWrapping.AfterControlStatement ==
1894 FormatStyle::BWACS_Always) {
1895 addUnwrappedLine();
1896 }
1897 } else if (Style.BraceWrapping.AfterFunction) {
1898 addUnwrappedLine();
1899 }
1900 FormatTok->setFinalizedType(TT_FunctionLBrace);
1901 parseBlock();
1902 IsDecltypeAutoFunction = false;
1903 addUnwrappedLine();
1904 return;
1905 }
1906 // Otherwise this was a braced init list, and the structural
1907 // element continues.
1908 break;
1909 case tok::kw_try:
1910 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1911 // field/method declaration.
1912 nextToken();
1913 break;
1914 }
1915 // We arrive here when parsing function-try blocks.
1916 if (Style.BraceWrapping.AfterFunction)
1917 addUnwrappedLine();
1918 parseTryCatch();
1919 return;
1920 case tok::identifier: {
1921 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1922 Line->MustBeDeclaration) {
1923 addUnwrappedLine();
1924 parseCSharpGenericTypeConstraint();
1925 break;
1926 }
1927 if (FormatTok->is(TT_MacroBlockEnd)) {
1928 addUnwrappedLine();
1929 return;
1930 }
1931
1932 // Function declarations (as opposed to function expressions) are parsed
1933 // on their own unwrapped line by continuing this loop. Function
1934 // expressions (functions that are not on their own line) must not create
1935 // a new unwrapped line, so they are special cased below.
1936 size_t TokenCount = Line->Tokens.size();
1937 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1938 (TokenCount > 1 ||
1939 (TokenCount == 1 &&
1940 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1941 tryToParseJSFunction();
1942 break;
1943 }
1944 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1945 FormatTok->is(Keywords.kw_interface)) {
1946 if (Style.isJavaScript()) {
1947 // In JavaScript/TypeScript, "interface" can be used as a standalone
1948 // identifier, e.g. in `var interface = 1;`. If "interface" is
1949 // followed by another identifier, it is very like to be an actual
1950 // interface declaration.
1951 unsigned StoredPosition = Tokens->getPosition();
1952 FormatToken *Next = Tokens->getNextToken();
1953 FormatTok = Tokens->setPosition(StoredPosition);
1954 if (!mustBeJSIdent(Keywords, Next)) {
1955 nextToken();
1956 break;
1957 }
1958 }
1959 parseRecord();
1960 addUnwrappedLine();
1961 return;
1962 }
1963
1964 if (Style.isVerilog()) {
1965 if (FormatTok->is(Keywords.kw_table)) {
1966 parseVerilogTable();
1967 return;
1968 }
1969 if (Keywords.isVerilogBegin(*FormatTok) ||
1970 Keywords.isVerilogHierarchy(*FormatTok)) {
1971 parseBlock();
1972 addUnwrappedLine();
1973 return;
1974 }
1975 }
1976
1977 if (!Style.isCpp() && FormatTok->is(Keywords.kw_interface)) {
1978 if (parseStructLike())
1979 return;
1980 break;
1981 }
1982
1983 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1984 parseStatementMacro();
1985 return;
1986 }
1987
1988 // See if the following token should start a new unwrapped line.
1989 StringRef Text = FormatTok->TokenText;
1990
1991 FormatToken *PreviousToken = FormatTok;
1992 nextToken();
1993
1994 // JS doesn't have macros, and within classes colons indicate fields, not
1995 // labels.
1996 if (Style.isJavaScript())
1997 break;
1998
1999 auto OneTokenSoFar = [&]() {
2000 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2001 while (I != E && I->Tok->is(tok::comment))
2002 ++I;
2003 if (Style.isVerilog())
2004 while (I != E && I->Tok->is(tok::hash))
2005 ++I;
2006 return I != E && (++I == E);
2007 };
2008 if (OneTokenSoFar()) {
2009 // Recognize function-like macro usages without trailing semicolon as
2010 // well as free-standing macros like Q_OBJECT.
2011 bool FunctionLike = FormatTok->is(tok::l_paren);
2012 if (FunctionLike)
2013 parseParens();
2014
2015 bool FollowedByNewline =
2016 CommentsBeforeNextToken.empty()
2017 ? FormatTok->NewlinesBefore > 0
2018 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2019
2020 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2021 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2022 if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2023 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2024 addUnwrappedLine();
2025 return;
2026 }
2027 }
2028 break;
2029 }
2030 case tok::equal:
2031 if ((Style.isJavaScript() || Style.isCSharp()) &&
2032 FormatTok->is(TT_FatArrow)) {
2033 tryToParseChildBlock();
2034 break;
2035 }
2036
2037 nextToken();
2038 if (FormatTok->is(tok::l_brace)) {
2039 // Block kind should probably be set to BK_BracedInit for any language.
2040 // C# needs this change to ensure that array initialisers and object
2041 // initialisers are indented the same way.
2042 if (Style.isCSharp())
2043 FormatTok->setBlockKind(BK_BracedInit);
2044 // TableGen's defset statement has syntax of the form,
2045 // `defset <type> <name> = { <statement>... }`
2046 if (Style.isTableGen() &&
2047 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2048 FormatTok->setFinalizedType(TT_FunctionLBrace);
2049 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2050 /*MunchSemi=*/false);
2051 addUnwrappedLine();
2052 break;
2053 }
2054 nextToken();
2055 parseBracedList();
2056 } else if (Style.Language == FormatStyle::LK_Proto &&
2057 FormatTok->is(tok::less)) {
2058 nextToken();
2059 parseBracedList(/*IsAngleBracket=*/true);
2060 }
2061 break;
2062 case tok::l_square:
2063 parseSquare();
2064 break;
2065 case tok::kw_new:
2066 parseNew();
2067 break;
2068 case tok::kw_case:
2069 // Proto: there are no switch/case statements.
2070 if (Style.Language == FormatStyle::LK_Proto) {
2071 nextToken();
2072 return;
2073 }
2074 // In Verilog switch is called case.
2075 if (Style.isVerilog()) {
2076 parseBlock();
2077 addUnwrappedLine();
2078 return;
2079 }
2080 if (Style.isJavaScript() && Line->MustBeDeclaration) {
2081 // 'case: string' field declaration.
2082 nextToken();
2083 break;
2084 }
2085 parseCaseLabel();
2086 break;
2087 case tok::kw_default:
2088 nextToken();
2089 if (Style.isVerilog()) {
2090 if (FormatTok->is(tok::colon)) {
2091 // The label will be handled in the next iteration.
2092 break;
2093 }
2094 if (FormatTok->is(Keywords.kw_clocking)) {
2095 // A default clocking block.
2096 parseBlock();
2097 addUnwrappedLine();
2098 return;
2099 }
2100 parseVerilogCaseLabel();
2101 return;
2102 }
2103 break;
2104 case tok::colon:
2105 nextToken();
2106 if (Style.isVerilog()) {
2107 parseVerilogCaseLabel();
2108 return;
2109 }
2110 break;
2111 default:
2112 nextToken();
2113 break;
2114 }
2115 } while (!eof());
2116 }
2117
tryToParsePropertyAccessor()2118 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2119 assert(FormatTok->is(tok::l_brace));
2120 if (!Style.isCSharp())
2121 return false;
2122 // See if it's a property accessor.
2123 if (FormatTok->Previous->isNot(tok::identifier))
2124 return false;
2125
2126 // See if we are inside a property accessor.
2127 //
2128 // Record the current tokenPosition so that we can advance and
2129 // reset the current token. `Next` is not set yet so we need
2130 // another way to advance along the token stream.
2131 unsigned int StoredPosition = Tokens->getPosition();
2132 FormatToken *Tok = Tokens->getNextToken();
2133
2134 // A trivial property accessor is of the form:
2135 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2136 // Track these as they do not require line breaks to be introduced.
2137 bool HasSpecialAccessor = false;
2138 bool IsTrivialPropertyAccessor = true;
2139 while (!eof()) {
2140 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2141 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2142 Keywords.kw_init, Keywords.kw_set)) {
2143 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2144 HasSpecialAccessor = true;
2145 Tok = Tokens->getNextToken();
2146 continue;
2147 }
2148 if (Tok->isNot(tok::r_brace))
2149 IsTrivialPropertyAccessor = false;
2150 break;
2151 }
2152
2153 if (!HasSpecialAccessor) {
2154 Tokens->setPosition(StoredPosition);
2155 return false;
2156 }
2157
2158 // Try to parse the property accessor:
2159 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2160 Tokens->setPosition(StoredPosition);
2161 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2162 addUnwrappedLine();
2163 nextToken();
2164 do {
2165 switch (FormatTok->Tok.getKind()) {
2166 case tok::r_brace:
2167 nextToken();
2168 if (FormatTok->is(tok::equal)) {
2169 while (!eof() && FormatTok->isNot(tok::semi))
2170 nextToken();
2171 nextToken();
2172 }
2173 addUnwrappedLine();
2174 return true;
2175 case tok::l_brace:
2176 ++Line->Level;
2177 parseBlock(/*MustBeDeclaration=*/true);
2178 addUnwrappedLine();
2179 --Line->Level;
2180 break;
2181 case tok::equal:
2182 if (FormatTok->is(TT_FatArrow)) {
2183 ++Line->Level;
2184 do {
2185 nextToken();
2186 } while (!eof() && FormatTok->isNot(tok::semi));
2187 nextToken();
2188 addUnwrappedLine();
2189 --Line->Level;
2190 break;
2191 }
2192 nextToken();
2193 break;
2194 default:
2195 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2196 Keywords.kw_set) &&
2197 !IsTrivialPropertyAccessor) {
2198 // Non-trivial get/set needs to be on its own line.
2199 addUnwrappedLine();
2200 }
2201 nextToken();
2202 }
2203 } while (!eof());
2204
2205 // Unreachable for well-formed code (paired '{' and '}').
2206 return true;
2207 }
2208
tryToParseLambda()2209 bool UnwrappedLineParser::tryToParseLambda() {
2210 assert(FormatTok->is(tok::l_square));
2211 if (!Style.isCpp()) {
2212 nextToken();
2213 return false;
2214 }
2215 FormatToken &LSquare = *FormatTok;
2216 if (!tryToParseLambdaIntroducer())
2217 return false;
2218
2219 bool SeenArrow = false;
2220 bool InTemplateParameterList = false;
2221
2222 while (FormatTok->isNot(tok::l_brace)) {
2223 if (FormatTok->isSimpleTypeSpecifier()) {
2224 nextToken();
2225 continue;
2226 }
2227 switch (FormatTok->Tok.getKind()) {
2228 case tok::l_brace:
2229 break;
2230 case tok::l_paren:
2231 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2232 break;
2233 case tok::l_square:
2234 parseSquare();
2235 break;
2236 case tok::less:
2237 assert(FormatTok->Previous);
2238 if (FormatTok->Previous->is(tok::r_square))
2239 InTemplateParameterList = true;
2240 nextToken();
2241 break;
2242 case tok::kw_auto:
2243 case tok::kw_class:
2244 case tok::kw_template:
2245 case tok::kw_typename:
2246 case tok::amp:
2247 case tok::star:
2248 case tok::kw_const:
2249 case tok::kw_constexpr:
2250 case tok::kw_consteval:
2251 case tok::comma:
2252 case tok::greater:
2253 case tok::identifier:
2254 case tok::numeric_constant:
2255 case tok::coloncolon:
2256 case tok::kw_mutable:
2257 case tok::kw_noexcept:
2258 case tok::kw_static:
2259 nextToken();
2260 break;
2261 // Specialization of a template with an integer parameter can contain
2262 // arithmetic, logical, comparison and ternary operators.
2263 //
2264 // FIXME: This also accepts sequences of operators that are not in the scope
2265 // of a template argument list.
2266 //
2267 // In a C++ lambda a template type can only occur after an arrow. We use
2268 // this as an heuristic to distinguish between Objective-C expressions
2269 // followed by an `a->b` expression, such as:
2270 // ([obj func:arg] + a->b)
2271 // Otherwise the code below would parse as a lambda.
2272 case tok::plus:
2273 case tok::minus:
2274 case tok::exclaim:
2275 case tok::tilde:
2276 case tok::slash:
2277 case tok::percent:
2278 case tok::lessless:
2279 case tok::pipe:
2280 case tok::pipepipe:
2281 case tok::ampamp:
2282 case tok::caret:
2283 case tok::equalequal:
2284 case tok::exclaimequal:
2285 case tok::greaterequal:
2286 case tok::lessequal:
2287 case tok::question:
2288 case tok::colon:
2289 case tok::ellipsis:
2290 case tok::kw_true:
2291 case tok::kw_false:
2292 if (SeenArrow || InTemplateParameterList) {
2293 nextToken();
2294 break;
2295 }
2296 return true;
2297 case tok::arrow:
2298 // This might or might not actually be a lambda arrow (this could be an
2299 // ObjC method invocation followed by a dereferencing arrow). We might
2300 // reset this back to TT_Unknown in TokenAnnotator.
2301 FormatTok->setFinalizedType(TT_TrailingReturnArrow);
2302 SeenArrow = true;
2303 nextToken();
2304 break;
2305 case tok::kw_requires: {
2306 auto *RequiresToken = FormatTok;
2307 nextToken();
2308 parseRequiresClause(RequiresToken);
2309 break;
2310 }
2311 case tok::equal:
2312 if (!InTemplateParameterList)
2313 return true;
2314 nextToken();
2315 break;
2316 default:
2317 return true;
2318 }
2319 }
2320
2321 FormatTok->setFinalizedType(TT_LambdaLBrace);
2322 LSquare.setFinalizedType(TT_LambdaLSquare);
2323
2324 NestedLambdas.push_back(Line->SeenDecltypeAuto);
2325 parseChildBlock();
2326 assert(!NestedLambdas.empty());
2327 NestedLambdas.pop_back();
2328
2329 return true;
2330 }
2331
tryToParseLambdaIntroducer()2332 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2333 const FormatToken *Previous = FormatTok->Previous;
2334 const FormatToken *LeftSquare = FormatTok;
2335 nextToken();
2336 if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2337 !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2338 tok::kw_co_yield, tok::kw_co_return)) ||
2339 Previous->closesScope())) ||
2340 LeftSquare->isCppStructuredBinding(Style)) {
2341 return false;
2342 }
2343 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2344 return false;
2345 if (FormatTok->is(tok::r_square)) {
2346 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2347 if (Next->is(tok::greater))
2348 return false;
2349 }
2350 parseSquare(/*LambdaIntroducer=*/true);
2351 return true;
2352 }
2353
tryToParseJSFunction()2354 void UnwrappedLineParser::tryToParseJSFunction() {
2355 assert(FormatTok->is(Keywords.kw_function));
2356 if (FormatTok->is(Keywords.kw_async))
2357 nextToken();
2358 // Consume "function".
2359 nextToken();
2360
2361 // Consume * (generator function). Treat it like C++'s overloaded operators.
2362 if (FormatTok->is(tok::star)) {
2363 FormatTok->setFinalizedType(TT_OverloadedOperator);
2364 nextToken();
2365 }
2366
2367 // Consume function name.
2368 if (FormatTok->is(tok::identifier))
2369 nextToken();
2370
2371 if (FormatTok->isNot(tok::l_paren))
2372 return;
2373
2374 // Parse formal parameter list.
2375 parseParens();
2376
2377 if (FormatTok->is(tok::colon)) {
2378 // Parse a type definition.
2379 nextToken();
2380
2381 // Eat the type declaration. For braced inline object types, balance braces,
2382 // otherwise just parse until finding an l_brace for the function body.
2383 if (FormatTok->is(tok::l_brace))
2384 tryToParseBracedList();
2385 else
2386 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2387 nextToken();
2388 }
2389
2390 if (FormatTok->is(tok::semi))
2391 return;
2392
2393 parseChildBlock();
2394 }
2395
tryToParseBracedList()2396 bool UnwrappedLineParser::tryToParseBracedList() {
2397 if (FormatTok->is(BK_Unknown))
2398 calculateBraceTypes();
2399 assert(FormatTok->isNot(BK_Unknown));
2400 if (FormatTok->is(BK_Block))
2401 return false;
2402 nextToken();
2403 parseBracedList();
2404 return true;
2405 }
2406
tryToParseChildBlock()2407 bool UnwrappedLineParser::tryToParseChildBlock() {
2408 assert(Style.isJavaScript() || Style.isCSharp());
2409 assert(FormatTok->is(TT_FatArrow));
2410 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2411 // They always start an expression or a child block if followed by a curly
2412 // brace.
2413 nextToken();
2414 if (FormatTok->isNot(tok::l_brace))
2415 return false;
2416 parseChildBlock();
2417 return true;
2418 }
2419
parseBracedList(bool IsAngleBracket,bool IsEnum)2420 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2421 bool HasError = false;
2422
2423 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2424 // replace this by using parseAssignmentExpression() inside.
2425 do {
2426 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2427 tryToParseChildBlock()) {
2428 continue;
2429 }
2430 if (Style.isJavaScript()) {
2431 if (FormatTok->is(Keywords.kw_function)) {
2432 tryToParseJSFunction();
2433 continue;
2434 }
2435 if (FormatTok->is(tok::l_brace)) {
2436 // Could be a method inside of a braced list `{a() { return 1; }}`.
2437 if (tryToParseBracedList())
2438 continue;
2439 parseChildBlock();
2440 }
2441 }
2442 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2443 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2444 addUnwrappedLine();
2445 nextToken();
2446 return !HasError;
2447 }
2448 switch (FormatTok->Tok.getKind()) {
2449 case tok::l_square:
2450 if (Style.isCSharp())
2451 parseSquare();
2452 else
2453 tryToParseLambda();
2454 break;
2455 case tok::l_paren:
2456 parseParens();
2457 // JavaScript can just have free standing methods and getters/setters in
2458 // object literals. Detect them by a "{" following ")".
2459 if (Style.isJavaScript()) {
2460 if (FormatTok->is(tok::l_brace))
2461 parseChildBlock();
2462 break;
2463 }
2464 break;
2465 case tok::l_brace:
2466 // Assume there are no blocks inside a braced init list apart
2467 // from the ones we explicitly parse out (like lambdas).
2468 FormatTok->setBlockKind(BK_BracedInit);
2469 nextToken();
2470 parseBracedList();
2471 break;
2472 case tok::less:
2473 nextToken();
2474 if (IsAngleBracket)
2475 parseBracedList(/*IsAngleBracket=*/true);
2476 break;
2477 case tok::semi:
2478 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2479 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2480 // used for error recovery if we have otherwise determined that this is
2481 // a braced list.
2482 if (Style.isJavaScript()) {
2483 nextToken();
2484 break;
2485 }
2486 HasError = true;
2487 if (!IsEnum)
2488 return false;
2489 nextToken();
2490 break;
2491 case tok::comma:
2492 nextToken();
2493 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2494 addUnwrappedLine();
2495 break;
2496 default:
2497 nextToken();
2498 break;
2499 }
2500 } while (!eof());
2501 return false;
2502 }
2503
2504 /// \brief Parses a pair of parentheses (and everything between them).
2505 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2506 /// double ampersands. This applies for all nested scopes as well.
2507 ///
2508 /// Returns whether there is a `=` token between the parentheses.
parseParens(TokenType AmpAmpTokenType)2509 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2510 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2511 auto *LeftParen = FormatTok;
2512 bool SeenEqual = false;
2513 bool MightBeFoldExpr = false;
2514 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2515 nextToken();
2516 do {
2517 switch (FormatTok->Tok.getKind()) {
2518 case tok::l_paren:
2519 if (parseParens(AmpAmpTokenType))
2520 SeenEqual = true;
2521 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2522 parseChildBlock();
2523 break;
2524 case tok::r_paren:
2525 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2526 Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2527 const auto *Prev = LeftParen->Previous;
2528 const auto *Next = Tokens->peekNextToken();
2529 const bool DoubleParens =
2530 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2531 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2532 const bool Blacklisted =
2533 PrevPrev &&
2534 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2535 (SeenEqual &&
2536 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2537 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2538 const bool ReturnParens =
2539 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2540 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2541 (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2542 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2543 Next->is(tok::semi);
2544 if ((DoubleParens && !Blacklisted) || ReturnParens) {
2545 LeftParen->Optional = true;
2546 FormatTok->Optional = true;
2547 }
2548 }
2549 nextToken();
2550 return SeenEqual;
2551 case tok::r_brace:
2552 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2553 return SeenEqual;
2554 case tok::l_square:
2555 tryToParseLambda();
2556 break;
2557 case tok::l_brace:
2558 if (!tryToParseBracedList())
2559 parseChildBlock();
2560 break;
2561 case tok::at:
2562 nextToken();
2563 if (FormatTok->is(tok::l_brace)) {
2564 nextToken();
2565 parseBracedList();
2566 }
2567 break;
2568 case tok::ellipsis:
2569 MightBeFoldExpr = true;
2570 nextToken();
2571 break;
2572 case tok::equal:
2573 SeenEqual = true;
2574 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2575 tryToParseChildBlock();
2576 else
2577 nextToken();
2578 break;
2579 case tok::kw_class:
2580 if (Style.isJavaScript())
2581 parseRecord(/*ParseAsExpr=*/true);
2582 else
2583 nextToken();
2584 break;
2585 case tok::identifier:
2586 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2587 tryToParseJSFunction();
2588 else
2589 nextToken();
2590 break;
2591 case tok::kw_requires: {
2592 auto RequiresToken = FormatTok;
2593 nextToken();
2594 parseRequiresExpression(RequiresToken);
2595 break;
2596 }
2597 case tok::ampamp:
2598 if (AmpAmpTokenType != TT_Unknown)
2599 FormatTok->setFinalizedType(AmpAmpTokenType);
2600 [[fallthrough]];
2601 default:
2602 nextToken();
2603 break;
2604 }
2605 } while (!eof());
2606 return SeenEqual;
2607 }
2608
parseSquare(bool LambdaIntroducer)2609 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2610 if (!LambdaIntroducer) {
2611 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2612 if (tryToParseLambda())
2613 return;
2614 }
2615 do {
2616 switch (FormatTok->Tok.getKind()) {
2617 case tok::l_paren:
2618 parseParens();
2619 break;
2620 case tok::r_square:
2621 nextToken();
2622 return;
2623 case tok::r_brace:
2624 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2625 return;
2626 case tok::l_square:
2627 parseSquare();
2628 break;
2629 case tok::l_brace: {
2630 if (!tryToParseBracedList())
2631 parseChildBlock();
2632 break;
2633 }
2634 case tok::at:
2635 nextToken();
2636 if (FormatTok->is(tok::l_brace)) {
2637 nextToken();
2638 parseBracedList();
2639 }
2640 break;
2641 default:
2642 nextToken();
2643 break;
2644 }
2645 } while (!eof());
2646 }
2647
keepAncestorBraces()2648 void UnwrappedLineParser::keepAncestorBraces() {
2649 if (!Style.RemoveBracesLLVM)
2650 return;
2651
2652 const int MaxNestingLevels = 2;
2653 const int Size = NestedTooDeep.size();
2654 if (Size >= MaxNestingLevels)
2655 NestedTooDeep[Size - MaxNestingLevels] = true;
2656 NestedTooDeep.push_back(false);
2657 }
2658
getLastNonComment(const UnwrappedLine & Line)2659 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2660 for (const auto &Token : llvm::reverse(Line.Tokens))
2661 if (Token.Tok->isNot(tok::comment))
2662 return Token.Tok;
2663
2664 return nullptr;
2665 }
2666
parseUnbracedBody(bool CheckEOF)2667 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2668 FormatToken *Tok = nullptr;
2669
2670 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2671 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2672 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2673 ? getLastNonComment(*Line)
2674 : Line->Tokens.back().Tok;
2675 assert(Tok);
2676 if (Tok->BraceCount < 0) {
2677 assert(Tok->BraceCount == -1);
2678 Tok = nullptr;
2679 } else {
2680 Tok->BraceCount = -1;
2681 }
2682 }
2683
2684 addUnwrappedLine();
2685 ++Line->Level;
2686 parseStructuralElement();
2687
2688 if (Tok) {
2689 assert(!Line->InPPDirective);
2690 Tok = nullptr;
2691 for (const auto &L : llvm::reverse(*CurrentLines)) {
2692 if (!L.InPPDirective && getLastNonComment(L)) {
2693 Tok = L.Tokens.back().Tok;
2694 break;
2695 }
2696 }
2697 assert(Tok);
2698 ++Tok->BraceCount;
2699 }
2700
2701 if (CheckEOF && eof())
2702 addUnwrappedLine();
2703
2704 --Line->Level;
2705 }
2706
markOptionalBraces(FormatToken * LeftBrace)2707 static void markOptionalBraces(FormatToken *LeftBrace) {
2708 if (!LeftBrace)
2709 return;
2710
2711 assert(LeftBrace->is(tok::l_brace));
2712
2713 FormatToken *RightBrace = LeftBrace->MatchingParen;
2714 if (!RightBrace) {
2715 assert(!LeftBrace->Optional);
2716 return;
2717 }
2718
2719 assert(RightBrace->is(tok::r_brace));
2720 assert(RightBrace->MatchingParen == LeftBrace);
2721 assert(LeftBrace->Optional == RightBrace->Optional);
2722
2723 LeftBrace->Optional = true;
2724 RightBrace->Optional = true;
2725 }
2726
handleAttributes()2727 void UnwrappedLineParser::handleAttributes() {
2728 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2729 if (FormatTok->isAttribute())
2730 nextToken();
2731 else if (FormatTok->is(tok::l_square))
2732 handleCppAttributes();
2733 }
2734
handleCppAttributes()2735 bool UnwrappedLineParser::handleCppAttributes() {
2736 // Handle [[likely]] / [[unlikely]] attributes.
2737 assert(FormatTok->is(tok::l_square));
2738 if (!tryToParseSimpleAttribute())
2739 return false;
2740 parseSquare();
2741 return true;
2742 }
2743
2744 /// Returns whether \c Tok begins a block.
isBlockBegin(const FormatToken & Tok) const2745 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2746 // FIXME: rename the function or make
2747 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2748 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2749 : Tok.is(tok::l_brace);
2750 }
2751
parseIfThenElse(IfStmtKind * IfKind,bool KeepBraces,bool IsVerilogAssert)2752 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2753 bool KeepBraces,
2754 bool IsVerilogAssert) {
2755 assert((FormatTok->is(tok::kw_if) ||
2756 (Style.isVerilog() &&
2757 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2758 Keywords.kw_assume, Keywords.kw_cover))) &&
2759 "'if' expected");
2760 nextToken();
2761
2762 if (IsVerilogAssert) {
2763 // Handle `assert #0` and `assert final`.
2764 if (FormatTok->is(Keywords.kw_verilogHash)) {
2765 nextToken();
2766 if (FormatTok->is(tok::numeric_constant))
2767 nextToken();
2768 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2769 Keywords.kw_sequence)) {
2770 nextToken();
2771 }
2772 }
2773
2774 // TableGen's if statement has the form of `if <cond> then { ... }`.
2775 if (Style.isTableGen()) {
2776 while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2777 // Simply skip until then. This range only contains a value.
2778 nextToken();
2779 }
2780 }
2781
2782 // Handle `if !consteval`.
2783 if (FormatTok->is(tok::exclaim))
2784 nextToken();
2785
2786 bool KeepIfBraces = true;
2787 if (FormatTok->is(tok::kw_consteval)) {
2788 nextToken();
2789 } else {
2790 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2791 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2792 nextToken();
2793 if (FormatTok->is(tok::l_paren)) {
2794 FormatTok->setFinalizedType(TT_ConditionLParen);
2795 parseParens();
2796 }
2797 }
2798 handleAttributes();
2799 // The then action is optional in Verilog assert statements.
2800 if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2801 nextToken();
2802 addUnwrappedLine();
2803 return nullptr;
2804 }
2805
2806 bool NeedsUnwrappedLine = false;
2807 keepAncestorBraces();
2808
2809 FormatToken *IfLeftBrace = nullptr;
2810 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2811
2812 if (isBlockBegin(*FormatTok)) {
2813 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2814 IfLeftBrace = FormatTok;
2815 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2816 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2817 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2818 setPreviousRBraceType(TT_ControlStatementRBrace);
2819 if (Style.BraceWrapping.BeforeElse)
2820 addUnwrappedLine();
2821 else
2822 NeedsUnwrappedLine = true;
2823 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2824 addUnwrappedLine();
2825 } else {
2826 parseUnbracedBody();
2827 }
2828
2829 if (Style.RemoveBracesLLVM) {
2830 assert(!NestedTooDeep.empty());
2831 KeepIfBraces = KeepIfBraces ||
2832 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2833 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2834 IfBlockKind == IfStmtKind::IfElseIf;
2835 }
2836
2837 bool KeepElseBraces = KeepIfBraces;
2838 FormatToken *ElseLeftBrace = nullptr;
2839 IfStmtKind Kind = IfStmtKind::IfOnly;
2840
2841 if (FormatTok->is(tok::kw_else)) {
2842 if (Style.RemoveBracesLLVM) {
2843 NestedTooDeep.back() = false;
2844 Kind = IfStmtKind::IfElse;
2845 }
2846 nextToken();
2847 handleAttributes();
2848 if (isBlockBegin(*FormatTok)) {
2849 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2850 FormatTok->setFinalizedType(TT_ElseLBrace);
2851 ElseLeftBrace = FormatTok;
2852 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2853 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2854 FormatToken *IfLBrace =
2855 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2856 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2857 setPreviousRBraceType(TT_ElseRBrace);
2858 if (FormatTok->is(tok::kw_else)) {
2859 KeepElseBraces = KeepElseBraces ||
2860 ElseBlockKind == IfStmtKind::IfOnly ||
2861 ElseBlockKind == IfStmtKind::IfElseIf;
2862 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2863 KeepElseBraces = true;
2864 assert(ElseLeftBrace->MatchingParen);
2865 markOptionalBraces(ElseLeftBrace);
2866 }
2867 addUnwrappedLine();
2868 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2869 const FormatToken *Previous = Tokens->getPreviousToken();
2870 assert(Previous);
2871 const bool IsPrecededByComment = Previous->is(tok::comment);
2872 if (IsPrecededByComment) {
2873 addUnwrappedLine();
2874 ++Line->Level;
2875 }
2876 bool TooDeep = true;
2877 if (Style.RemoveBracesLLVM) {
2878 Kind = IfStmtKind::IfElseIf;
2879 TooDeep = NestedTooDeep.pop_back_val();
2880 }
2881 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2882 if (Style.RemoveBracesLLVM)
2883 NestedTooDeep.push_back(TooDeep);
2884 if (IsPrecededByComment)
2885 --Line->Level;
2886 } else {
2887 parseUnbracedBody(/*CheckEOF=*/true);
2888 }
2889 } else {
2890 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2891 if (NeedsUnwrappedLine)
2892 addUnwrappedLine();
2893 }
2894
2895 if (!Style.RemoveBracesLLVM)
2896 return nullptr;
2897
2898 assert(!NestedTooDeep.empty());
2899 KeepElseBraces = KeepElseBraces ||
2900 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2901 NestedTooDeep.back();
2902
2903 NestedTooDeep.pop_back();
2904
2905 if (!KeepIfBraces && !KeepElseBraces) {
2906 markOptionalBraces(IfLeftBrace);
2907 markOptionalBraces(ElseLeftBrace);
2908 } else if (IfLeftBrace) {
2909 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2910 if (IfRightBrace) {
2911 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2912 assert(!IfLeftBrace->Optional);
2913 assert(!IfRightBrace->Optional);
2914 IfLeftBrace->MatchingParen = nullptr;
2915 IfRightBrace->MatchingParen = nullptr;
2916 }
2917 }
2918
2919 if (IfKind)
2920 *IfKind = Kind;
2921
2922 return IfLeftBrace;
2923 }
2924
parseTryCatch()2925 void UnwrappedLineParser::parseTryCatch() {
2926 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2927 nextToken();
2928 bool NeedsUnwrappedLine = false;
2929 if (FormatTok->is(tok::colon)) {
2930 // We are in a function try block, what comes is an initializer list.
2931 nextToken();
2932
2933 // In case identifiers were removed by clang-tidy, what might follow is
2934 // multiple commas in sequence - before the first identifier.
2935 while (FormatTok->is(tok::comma))
2936 nextToken();
2937
2938 while (FormatTok->is(tok::identifier)) {
2939 nextToken();
2940 if (FormatTok->is(tok::l_paren))
2941 parseParens();
2942 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2943 FormatTok->is(tok::l_brace)) {
2944 do {
2945 nextToken();
2946 } while (FormatTok->isNot(tok::r_brace));
2947 nextToken();
2948 }
2949
2950 // In case identifiers were removed by clang-tidy, what might follow is
2951 // multiple commas in sequence - after the first identifier.
2952 while (FormatTok->is(tok::comma))
2953 nextToken();
2954 }
2955 }
2956 // Parse try with resource.
2957 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2958 parseParens();
2959
2960 keepAncestorBraces();
2961
2962 if (FormatTok->is(tok::l_brace)) {
2963 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2964 parseBlock();
2965 if (Style.BraceWrapping.BeforeCatch)
2966 addUnwrappedLine();
2967 else
2968 NeedsUnwrappedLine = true;
2969 } else if (FormatTok->isNot(tok::kw_catch)) {
2970 // The C++ standard requires a compound-statement after a try.
2971 // If there's none, we try to assume there's a structuralElement
2972 // and try to continue.
2973 addUnwrappedLine();
2974 ++Line->Level;
2975 parseStructuralElement();
2976 --Line->Level;
2977 }
2978 while (true) {
2979 if (FormatTok->is(tok::at))
2980 nextToken();
2981 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2982 tok::kw___finally) ||
2983 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2984 FormatTok->is(Keywords.kw_finally)) ||
2985 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2986 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2987 break;
2988 }
2989 nextToken();
2990 while (FormatTok->isNot(tok::l_brace)) {
2991 if (FormatTok->is(tok::l_paren)) {
2992 parseParens();
2993 continue;
2994 }
2995 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2996 if (Style.RemoveBracesLLVM)
2997 NestedTooDeep.pop_back();
2998 return;
2999 }
3000 nextToken();
3001 }
3002 NeedsUnwrappedLine = false;
3003 Line->MustBeDeclaration = false;
3004 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3005 parseBlock();
3006 if (Style.BraceWrapping.BeforeCatch)
3007 addUnwrappedLine();
3008 else
3009 NeedsUnwrappedLine = true;
3010 }
3011
3012 if (Style.RemoveBracesLLVM)
3013 NestedTooDeep.pop_back();
3014
3015 if (NeedsUnwrappedLine)
3016 addUnwrappedLine();
3017 }
3018
parseNamespace()3019 void UnwrappedLineParser::parseNamespace() {
3020 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3021 "'namespace' expected");
3022
3023 const FormatToken &InitialToken = *FormatTok;
3024 nextToken();
3025 if (InitialToken.is(TT_NamespaceMacro)) {
3026 parseParens();
3027 } else {
3028 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3029 tok::l_square, tok::period, tok::l_paren) ||
3030 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3031 if (FormatTok->is(tok::l_square))
3032 parseSquare();
3033 else if (FormatTok->is(tok::l_paren))
3034 parseParens();
3035 else
3036 nextToken();
3037 }
3038 }
3039 if (FormatTok->is(tok::l_brace)) {
3040 FormatTok->setFinalizedType(TT_NamespaceLBrace);
3041
3042 if (ShouldBreakBeforeBrace(Style, InitialToken))
3043 addUnwrappedLine();
3044
3045 unsigned AddLevels =
3046 Style.NamespaceIndentation == FormatStyle::NI_All ||
3047 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3048 DeclarationScopeStack.size() > 1)
3049 ? 1u
3050 : 0u;
3051 bool ManageWhitesmithsBraces =
3052 AddLevels == 0u &&
3053 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3054
3055 // If we're in Whitesmiths mode, indent the brace if we're not indenting
3056 // the whole block.
3057 if (ManageWhitesmithsBraces)
3058 ++Line->Level;
3059
3060 // Munch the semicolon after a namespace. This is more common than one would
3061 // think. Putting the semicolon into its own line is very ugly.
3062 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3063 /*KeepBraces=*/true, /*IfKind=*/nullptr,
3064 ManageWhitesmithsBraces);
3065
3066 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3067
3068 if (ManageWhitesmithsBraces)
3069 --Line->Level;
3070 }
3071 // FIXME: Add error handling.
3072 }
3073
parseNew()3074 void UnwrappedLineParser::parseNew() {
3075 assert(FormatTok->is(tok::kw_new) && "'new' expected");
3076 nextToken();
3077
3078 if (Style.isCSharp()) {
3079 do {
3080 // Handle constructor invocation, e.g. `new(field: value)`.
3081 if (FormatTok->is(tok::l_paren))
3082 parseParens();
3083
3084 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3085 if (FormatTok->is(tok::l_brace))
3086 parseBracedList();
3087
3088 if (FormatTok->isOneOf(tok::semi, tok::comma))
3089 return;
3090
3091 nextToken();
3092 } while (!eof());
3093 }
3094
3095 if (Style.Language != FormatStyle::LK_Java)
3096 return;
3097
3098 // In Java, we can parse everything up to the parens, which aren't optional.
3099 do {
3100 // There should not be a ;, { or } before the new's open paren.
3101 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3102 return;
3103
3104 // Consume the parens.
3105 if (FormatTok->is(tok::l_paren)) {
3106 parseParens();
3107
3108 // If there is a class body of an anonymous class, consume that as child.
3109 if (FormatTok->is(tok::l_brace))
3110 parseChildBlock();
3111 return;
3112 }
3113 nextToken();
3114 } while (!eof());
3115 }
3116
parseLoopBody(bool KeepBraces,bool WrapRightBrace)3117 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3118 keepAncestorBraces();
3119
3120 if (isBlockBegin(*FormatTok)) {
3121 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3122 FormatToken *LeftBrace = FormatTok;
3123 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3124 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3125 /*MunchSemi=*/true, KeepBraces);
3126 setPreviousRBraceType(TT_ControlStatementRBrace);
3127 if (!KeepBraces) {
3128 assert(!NestedTooDeep.empty());
3129 if (!NestedTooDeep.back())
3130 markOptionalBraces(LeftBrace);
3131 }
3132 if (WrapRightBrace)
3133 addUnwrappedLine();
3134 } else {
3135 parseUnbracedBody();
3136 }
3137
3138 if (!KeepBraces)
3139 NestedTooDeep.pop_back();
3140 }
3141
parseForOrWhileLoop(bool HasParens)3142 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3143 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3144 (Style.isVerilog() &&
3145 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3146 Keywords.kw_always_ff, Keywords.kw_always_latch,
3147 Keywords.kw_final, Keywords.kw_initial,
3148 Keywords.kw_foreach, Keywords.kw_forever,
3149 Keywords.kw_repeat))) &&
3150 "'for', 'while' or foreach macro expected");
3151 const bool KeepBraces = !Style.RemoveBracesLLVM ||
3152 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3153
3154 nextToken();
3155 // JS' for await ( ...
3156 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3157 nextToken();
3158 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
3159 nextToken();
3160 if (HasParens && FormatTok->is(tok::l_paren)) {
3161 // The type is only set for Verilog basically because we were afraid to
3162 // change the existing behavior for loops. See the discussion on D121756 for
3163 // details.
3164 if (Style.isVerilog())
3165 FormatTok->setFinalizedType(TT_ConditionLParen);
3166 parseParens();
3167 }
3168
3169 if (Style.isVerilog()) {
3170 // Event control.
3171 parseVerilogSensitivityList();
3172 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3173 Tokens->getPreviousToken()->is(tok::r_paren)) {
3174 nextToken();
3175 addUnwrappedLine();
3176 return;
3177 }
3178
3179 handleAttributes();
3180 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3181 }
3182
parseDoWhile()3183 void UnwrappedLineParser::parseDoWhile() {
3184 assert(FormatTok->is(tok::kw_do) && "'do' expected");
3185 nextToken();
3186
3187 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3188
3189 // FIXME: Add error handling.
3190 if (FormatTok->isNot(tok::kw_while)) {
3191 addUnwrappedLine();
3192 return;
3193 }
3194
3195 FormatTok->setFinalizedType(TT_DoWhile);
3196
3197 // If in Whitesmiths mode, the line with the while() needs to be indented
3198 // to the same level as the block.
3199 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3200 ++Line->Level;
3201
3202 nextToken();
3203 parseStructuralElement();
3204 }
3205
parseLabel(bool LeftAlignLabel)3206 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3207 nextToken();
3208 unsigned OldLineLevel = Line->Level;
3209 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3210 --Line->Level;
3211 if (LeftAlignLabel)
3212 Line->Level = 0;
3213
3214 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3215 FormatTok->is(tok::l_brace)) {
3216
3217 CompoundStatementIndenter Indenter(this, Line->Level,
3218 Style.BraceWrapping.AfterCaseLabel,
3219 Style.BraceWrapping.IndentBraces);
3220 parseBlock();
3221 if (FormatTok->is(tok::kw_break)) {
3222 if (Style.BraceWrapping.AfterControlStatement ==
3223 FormatStyle::BWACS_Always) {
3224 addUnwrappedLine();
3225 if (!Style.IndentCaseBlocks &&
3226 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3227 ++Line->Level;
3228 }
3229 }
3230 parseStructuralElement();
3231 }
3232 addUnwrappedLine();
3233 } else {
3234 if (FormatTok->is(tok::semi))
3235 nextToken();
3236 addUnwrappedLine();
3237 }
3238 Line->Level = OldLineLevel;
3239 if (FormatTok->isNot(tok::l_brace)) {
3240 parseStructuralElement();
3241 addUnwrappedLine();
3242 }
3243 }
3244
parseCaseLabel()3245 void UnwrappedLineParser::parseCaseLabel() {
3246 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3247
3248 // FIXME: fix handling of complex expressions here.
3249 do {
3250 nextToken();
3251 if (FormatTok->is(tok::colon)) {
3252 FormatTok->setFinalizedType(TT_CaseLabelColon);
3253 break;
3254 }
3255 } while (!eof());
3256 parseLabel();
3257 }
3258
parseSwitch()3259 void UnwrappedLineParser::parseSwitch() {
3260 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3261 nextToken();
3262 if (FormatTok->is(tok::l_paren))
3263 parseParens();
3264
3265 keepAncestorBraces();
3266
3267 if (FormatTok->is(tok::l_brace)) {
3268 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3269 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3270 parseBlock();
3271 setPreviousRBraceType(TT_ControlStatementRBrace);
3272 addUnwrappedLine();
3273 } else {
3274 addUnwrappedLine();
3275 ++Line->Level;
3276 parseStructuralElement();
3277 --Line->Level;
3278 }
3279
3280 if (Style.RemoveBracesLLVM)
3281 NestedTooDeep.pop_back();
3282 }
3283
3284 // Operators that can follow a C variable.
isCOperatorFollowingVar(tok::TokenKind kind)3285 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3286 switch (kind) {
3287 case tok::ampamp:
3288 case tok::ampequal:
3289 case tok::arrow:
3290 case tok::caret:
3291 case tok::caretequal:
3292 case tok::comma:
3293 case tok::ellipsis:
3294 case tok::equal:
3295 case tok::equalequal:
3296 case tok::exclaim:
3297 case tok::exclaimequal:
3298 case tok::greater:
3299 case tok::greaterequal:
3300 case tok::greatergreater:
3301 case tok::greatergreaterequal:
3302 case tok::l_paren:
3303 case tok::l_square:
3304 case tok::less:
3305 case tok::lessequal:
3306 case tok::lessless:
3307 case tok::lesslessequal:
3308 case tok::minus:
3309 case tok::minusequal:
3310 case tok::minusminus:
3311 case tok::percent:
3312 case tok::percentequal:
3313 case tok::period:
3314 case tok::pipe:
3315 case tok::pipeequal:
3316 case tok::pipepipe:
3317 case tok::plus:
3318 case tok::plusequal:
3319 case tok::plusplus:
3320 case tok::question:
3321 case tok::r_brace:
3322 case tok::r_paren:
3323 case tok::r_square:
3324 case tok::semi:
3325 case tok::slash:
3326 case tok::slashequal:
3327 case tok::star:
3328 case tok::starequal:
3329 return true;
3330 default:
3331 return false;
3332 }
3333 }
3334
parseAccessSpecifier()3335 void UnwrappedLineParser::parseAccessSpecifier() {
3336 FormatToken *AccessSpecifierCandidate = FormatTok;
3337 nextToken();
3338 // Understand Qt's slots.
3339 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3340 nextToken();
3341 // Otherwise, we don't know what it is, and we'd better keep the next token.
3342 if (FormatTok->is(tok::colon)) {
3343 nextToken();
3344 addUnwrappedLine();
3345 } else if (FormatTok->isNot(tok::coloncolon) &&
3346 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3347 // Not a variable name nor namespace name.
3348 addUnwrappedLine();
3349 } else if (AccessSpecifierCandidate) {
3350 // Consider the access specifier to be a C identifier.
3351 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3352 }
3353 }
3354
3355 /// \brief Parses a requires, decides if it is a clause or an expression.
3356 /// \pre The current token has to be the requires keyword.
3357 /// \returns true if it parsed a clause.
parseRequires()3358 bool clang::format::UnwrappedLineParser::parseRequires() {
3359 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3360 auto RequiresToken = FormatTok;
3361
3362 // We try to guess if it is a requires clause, or a requires expression. For
3363 // that we first consume the keyword and check the next token.
3364 nextToken();
3365
3366 switch (FormatTok->Tok.getKind()) {
3367 case tok::l_brace:
3368 // This can only be an expression, never a clause.
3369 parseRequiresExpression(RequiresToken);
3370 return false;
3371 case tok::l_paren:
3372 // Clauses and expression can start with a paren, it's unclear what we have.
3373 break;
3374 default:
3375 // All other tokens can only be a clause.
3376 parseRequiresClause(RequiresToken);
3377 return true;
3378 }
3379
3380 // Looking forward we would have to decide if there are function declaration
3381 // like arguments to the requires expression:
3382 // requires (T t) {
3383 // Or there is a constraint expression for the requires clause:
3384 // requires (C<T> && ...
3385
3386 // But first let's look behind.
3387 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3388
3389 if (!PreviousNonComment ||
3390 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3391 // If there is no token, or an expression left brace, we are a requires
3392 // clause within a requires expression.
3393 parseRequiresClause(RequiresToken);
3394 return true;
3395 }
3396
3397 switch (PreviousNonComment->Tok.getKind()) {
3398 case tok::greater:
3399 case tok::r_paren:
3400 case tok::kw_noexcept:
3401 case tok::kw_const:
3402 // This is a requires clause.
3403 parseRequiresClause(RequiresToken);
3404 return true;
3405 case tok::amp:
3406 case tok::ampamp: {
3407 // This can be either:
3408 // if (... && requires (T t) ...)
3409 // Or
3410 // void member(...) && requires (C<T> ...
3411 // We check the one token before that for a const:
3412 // void member(...) const && requires (C<T> ...
3413 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3414 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3415 parseRequiresClause(RequiresToken);
3416 return true;
3417 }
3418 break;
3419 }
3420 default:
3421 if (PreviousNonComment->isTypeOrIdentifier()) {
3422 // This is a requires clause.
3423 parseRequiresClause(RequiresToken);
3424 return true;
3425 }
3426 // It's an expression.
3427 parseRequiresExpression(RequiresToken);
3428 return false;
3429 }
3430
3431 // Now we look forward and try to check if the paren content is a parameter
3432 // list. The parameters can be cv-qualified and contain references or
3433 // pointers.
3434 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3435 // of stuff: typename, const, *, &, &&, ::, identifiers.
3436
3437 unsigned StoredPosition = Tokens->getPosition();
3438 FormatToken *NextToken = Tokens->getNextToken();
3439 int Lookahead = 0;
3440 auto PeekNext = [&Lookahead, &NextToken, this] {
3441 ++Lookahead;
3442 NextToken = Tokens->getNextToken();
3443 };
3444
3445 bool FoundType = false;
3446 bool LastWasColonColon = false;
3447 int OpenAngles = 0;
3448
3449 for (; Lookahead < 50; PeekNext()) {
3450 switch (NextToken->Tok.getKind()) {
3451 case tok::kw_volatile:
3452 case tok::kw_const:
3453 case tok::comma:
3454 if (OpenAngles == 0) {
3455 FormatTok = Tokens->setPosition(StoredPosition);
3456 parseRequiresExpression(RequiresToken);
3457 return false;
3458 }
3459 break;
3460 case tok::r_paren:
3461 case tok::pipepipe:
3462 FormatTok = Tokens->setPosition(StoredPosition);
3463 parseRequiresClause(RequiresToken);
3464 return true;
3465 case tok::eof:
3466 // Break out of the loop.
3467 Lookahead = 50;
3468 break;
3469 case tok::coloncolon:
3470 LastWasColonColon = true;
3471 break;
3472 case tok::identifier:
3473 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3474 FormatTok = Tokens->setPosition(StoredPosition);
3475 parseRequiresExpression(RequiresToken);
3476 return false;
3477 }
3478 FoundType = true;
3479 LastWasColonColon = false;
3480 break;
3481 case tok::less:
3482 ++OpenAngles;
3483 break;
3484 case tok::greater:
3485 --OpenAngles;
3486 break;
3487 default:
3488 if (NextToken->isSimpleTypeSpecifier()) {
3489 FormatTok = Tokens->setPosition(StoredPosition);
3490 parseRequiresExpression(RequiresToken);
3491 return false;
3492 }
3493 break;
3494 }
3495 }
3496 // This seems to be a complicated expression, just assume it's a clause.
3497 FormatTok = Tokens->setPosition(StoredPosition);
3498 parseRequiresClause(RequiresToken);
3499 return true;
3500 }
3501
3502 /// \brief Parses a requires clause.
3503 /// \param RequiresToken The requires keyword token, which starts this clause.
3504 /// \pre We need to be on the next token after the requires keyword.
3505 /// \sa parseRequiresExpression
3506 ///
3507 /// Returns if it either has finished parsing the clause, or it detects, that
3508 /// the clause is incorrect.
parseRequiresClause(FormatToken * RequiresToken)3509 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3510 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3511 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3512
3513 // If there is no previous token, we are within a requires expression,
3514 // otherwise we will always have the template or function declaration in front
3515 // of it.
3516 bool InRequiresExpression =
3517 !RequiresToken->Previous ||
3518 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3519
3520 RequiresToken->setFinalizedType(InRequiresExpression
3521 ? TT_RequiresClauseInARequiresExpression
3522 : TT_RequiresClause);
3523
3524 // NOTE: parseConstraintExpression is only ever called from this function.
3525 // It could be inlined into here.
3526 parseConstraintExpression();
3527
3528 if (!InRequiresExpression)
3529 FormatTok->Previous->ClosesRequiresClause = true;
3530 }
3531
3532 /// \brief Parses a requires expression.
3533 /// \param RequiresToken The requires keyword token, which starts this clause.
3534 /// \pre We need to be on the next token after the requires keyword.
3535 /// \sa parseRequiresClause
3536 ///
3537 /// Returns if it either has finished parsing the expression, or it detects,
3538 /// that the expression is incorrect.
parseRequiresExpression(FormatToken * RequiresToken)3539 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3540 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3541 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3542
3543 RequiresToken->setFinalizedType(TT_RequiresExpression);
3544
3545 if (FormatTok->is(tok::l_paren)) {
3546 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3547 parseParens();
3548 }
3549
3550 if (FormatTok->is(tok::l_brace)) {
3551 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3552 parseChildBlock();
3553 }
3554 }
3555
3556 /// \brief Parses a constraint expression.
3557 ///
3558 /// This is the body of a requires clause. It returns, when the parsing is
3559 /// complete, or the expression is incorrect.
parseConstraintExpression()3560 void UnwrappedLineParser::parseConstraintExpression() {
3561 // The special handling for lambdas is needed since tryToParseLambda() eats a
3562 // token and if a requires expression is the last part of a requires clause
3563 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3564 // not set on the correct token. Thus we need to be aware if we even expect a
3565 // lambda to be possible.
3566 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3567 bool LambdaNextTimeAllowed = true;
3568
3569 // Within lambda declarations, it is permitted to put a requires clause after
3570 // its template parameter list, which would place the requires clause right
3571 // before the parentheses of the parameters of the lambda declaration. Thus,
3572 // we track if we expect to see grouping parentheses at all.
3573 // Without this check, `requires foo<T> (T t)` in the below example would be
3574 // seen as the whole requires clause, accidentally eating the parameters of
3575 // the lambda.
3576 // [&]<typename T> requires foo<T> (T t) { ... };
3577 bool TopLevelParensAllowed = true;
3578
3579 do {
3580 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3581
3582 switch (FormatTok->Tok.getKind()) {
3583 case tok::kw_requires: {
3584 auto RequiresToken = FormatTok;
3585 nextToken();
3586 parseRequiresExpression(RequiresToken);
3587 break;
3588 }
3589
3590 case tok::l_paren:
3591 if (!TopLevelParensAllowed)
3592 return;
3593 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3594 TopLevelParensAllowed = false;
3595 break;
3596
3597 case tok::l_square:
3598 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3599 return;
3600 break;
3601
3602 case tok::kw_const:
3603 case tok::semi:
3604 case tok::kw_class:
3605 case tok::kw_struct:
3606 case tok::kw_union:
3607 return;
3608
3609 case tok::l_brace:
3610 // Potential function body.
3611 return;
3612
3613 case tok::ampamp:
3614 case tok::pipepipe:
3615 FormatTok->setFinalizedType(TT_BinaryOperator);
3616 nextToken();
3617 LambdaNextTimeAllowed = true;
3618 TopLevelParensAllowed = true;
3619 break;
3620
3621 case tok::comma:
3622 case tok::comment:
3623 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3624 nextToken();
3625 break;
3626
3627 case tok::kw_sizeof:
3628 case tok::greater:
3629 case tok::greaterequal:
3630 case tok::greatergreater:
3631 case tok::less:
3632 case tok::lessequal:
3633 case tok::lessless:
3634 case tok::equalequal:
3635 case tok::exclaim:
3636 case tok::exclaimequal:
3637 case tok::plus:
3638 case tok::minus:
3639 case tok::star:
3640 case tok::slash:
3641 LambdaNextTimeAllowed = true;
3642 TopLevelParensAllowed = true;
3643 // Just eat them.
3644 nextToken();
3645 break;
3646
3647 case tok::numeric_constant:
3648 case tok::coloncolon:
3649 case tok::kw_true:
3650 case tok::kw_false:
3651 TopLevelParensAllowed = false;
3652 // Just eat them.
3653 nextToken();
3654 break;
3655
3656 case tok::kw_static_cast:
3657 case tok::kw_const_cast:
3658 case tok::kw_reinterpret_cast:
3659 case tok::kw_dynamic_cast:
3660 nextToken();
3661 if (FormatTok->isNot(tok::less))
3662 return;
3663
3664 nextToken();
3665 parseBracedList(/*IsAngleBracket=*/true);
3666 break;
3667
3668 default:
3669 if (!FormatTok->Tok.getIdentifierInfo()) {
3670 // Identifiers are part of the default case, we check for more then
3671 // tok::identifier to handle builtin type traits.
3672 return;
3673 }
3674
3675 // We need to differentiate identifiers for a template deduction guide,
3676 // variables, or function return types (the constraint expression has
3677 // ended before that), and basically all other cases. But it's easier to
3678 // check the other way around.
3679 assert(FormatTok->Previous);
3680 switch (FormatTok->Previous->Tok.getKind()) {
3681 case tok::coloncolon: // Nested identifier.
3682 case tok::ampamp: // Start of a function or variable for the
3683 case tok::pipepipe: // constraint expression. (binary)
3684 case tok::exclaim: // The same as above, but unary.
3685 case tok::kw_requires: // Initial identifier of a requires clause.
3686 case tok::equal: // Initial identifier of a concept declaration.
3687 break;
3688 default:
3689 return;
3690 }
3691
3692 // Read identifier with optional template declaration.
3693 nextToken();
3694 if (FormatTok->is(tok::less)) {
3695 nextToken();
3696 parseBracedList(/*IsAngleBracket=*/true);
3697 }
3698 TopLevelParensAllowed = false;
3699 break;
3700 }
3701 } while (!eof());
3702 }
3703
parseEnum()3704 bool UnwrappedLineParser::parseEnum() {
3705 const FormatToken &InitialToken = *FormatTok;
3706
3707 // Won't be 'enum' for NS_ENUMs.
3708 if (FormatTok->is(tok::kw_enum))
3709 nextToken();
3710
3711 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3712 // declarations. An "enum" keyword followed by a colon would be a syntax
3713 // error and thus assume it is just an identifier.
3714 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3715 return false;
3716
3717 // In protobuf, "enum" can be used as a field name.
3718 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3719 return false;
3720
3721 // Eat up enum class ...
3722 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3723 nextToken();
3724
3725 while (FormatTok->Tok.getIdentifierInfo() ||
3726 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3727 tok::greater, tok::comma, tok::question,
3728 tok::l_square, tok::r_square)) {
3729 if (Style.isVerilog()) {
3730 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3731 nextToken();
3732 // In Verilog the base type can have dimensions.
3733 while (FormatTok->is(tok::l_square))
3734 parseSquare();
3735 } else {
3736 nextToken();
3737 }
3738 // We can have macros or attributes in between 'enum' and the enum name.
3739 if (FormatTok->is(tok::l_paren))
3740 parseParens();
3741 assert(FormatTok->isNot(TT_AttributeSquare));
3742 if (FormatTok->is(tok::identifier)) {
3743 nextToken();
3744 // If there are two identifiers in a row, this is likely an elaborate
3745 // return type. In Java, this can be "implements", etc.
3746 if (Style.isCpp() && FormatTok->is(tok::identifier))
3747 return false;
3748 }
3749 }
3750
3751 // Just a declaration or something is wrong.
3752 if (FormatTok->isNot(tok::l_brace))
3753 return true;
3754 FormatTok->setFinalizedType(TT_EnumLBrace);
3755 FormatTok->setBlockKind(BK_Block);
3756
3757 if (Style.Language == FormatStyle::LK_Java) {
3758 // Java enums are different.
3759 parseJavaEnumBody();
3760 return true;
3761 }
3762 if (Style.Language == FormatStyle::LK_Proto) {
3763 parseBlock(/*MustBeDeclaration=*/true);
3764 return true;
3765 }
3766
3767 if (!Style.AllowShortEnumsOnASingleLine &&
3768 ShouldBreakBeforeBrace(Style, InitialToken)) {
3769 addUnwrappedLine();
3770 }
3771 // Parse enum body.
3772 nextToken();
3773 if (!Style.AllowShortEnumsOnASingleLine) {
3774 addUnwrappedLine();
3775 Line->Level += 1;
3776 }
3777 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3778 if (!Style.AllowShortEnumsOnASingleLine)
3779 Line->Level -= 1;
3780 if (HasError) {
3781 if (FormatTok->is(tok::semi))
3782 nextToken();
3783 addUnwrappedLine();
3784 }
3785 setPreviousRBraceType(TT_EnumRBrace);
3786 return true;
3787
3788 // There is no addUnwrappedLine() here so that we fall through to parsing a
3789 // structural element afterwards. Thus, in "enum A {} n, m;",
3790 // "} n, m;" will end up in one unwrapped line.
3791 }
3792
parseStructLike()3793 bool UnwrappedLineParser::parseStructLike() {
3794 // parseRecord falls through and does not yet add an unwrapped line as a
3795 // record declaration or definition can start a structural element.
3796 parseRecord();
3797 // This does not apply to Java, JavaScript and C#.
3798 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3799 Style.isCSharp()) {
3800 if (FormatTok->is(tok::semi))
3801 nextToken();
3802 addUnwrappedLine();
3803 return true;
3804 }
3805 return false;
3806 }
3807
3808 namespace {
3809 // A class used to set and restore the Token position when peeking
3810 // ahead in the token source.
3811 class ScopedTokenPosition {
3812 unsigned StoredPosition;
3813 FormatTokenSource *Tokens;
3814
3815 public:
ScopedTokenPosition(FormatTokenSource * Tokens)3816 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3817 assert(Tokens && "Tokens expected to not be null");
3818 StoredPosition = Tokens->getPosition();
3819 }
3820
~ScopedTokenPosition()3821 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3822 };
3823 } // namespace
3824
3825 // Look to see if we have [[ by looking ahead, if
3826 // its not then rewind to the original position.
tryToParseSimpleAttribute()3827 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3828 ScopedTokenPosition AutoPosition(Tokens);
3829 FormatToken *Tok = Tokens->getNextToken();
3830 // We already read the first [ check for the second.
3831 if (Tok->isNot(tok::l_square))
3832 return false;
3833 // Double check that the attribute is just something
3834 // fairly simple.
3835 while (Tok->isNot(tok::eof)) {
3836 if (Tok->is(tok::r_square))
3837 break;
3838 Tok = Tokens->getNextToken();
3839 }
3840 if (Tok->is(tok::eof))
3841 return false;
3842 Tok = Tokens->getNextToken();
3843 if (Tok->isNot(tok::r_square))
3844 return false;
3845 Tok = Tokens->getNextToken();
3846 if (Tok->is(tok::semi))
3847 return false;
3848 return true;
3849 }
3850
parseJavaEnumBody()3851 void UnwrappedLineParser::parseJavaEnumBody() {
3852 assert(FormatTok->is(tok::l_brace));
3853 const FormatToken *OpeningBrace = FormatTok;
3854
3855 // Determine whether the enum is simple, i.e. does not have a semicolon or
3856 // constants with class bodies. Simple enums can be formatted like braced
3857 // lists, contracted to a single line, etc.
3858 unsigned StoredPosition = Tokens->getPosition();
3859 bool IsSimple = true;
3860 FormatToken *Tok = Tokens->getNextToken();
3861 while (Tok->isNot(tok::eof)) {
3862 if (Tok->is(tok::r_brace))
3863 break;
3864 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3865 IsSimple = false;
3866 break;
3867 }
3868 // FIXME: This will also mark enums with braces in the arguments to enum
3869 // constants as "not simple". This is probably fine in practice, though.
3870 Tok = Tokens->getNextToken();
3871 }
3872 FormatTok = Tokens->setPosition(StoredPosition);
3873
3874 if (IsSimple) {
3875 nextToken();
3876 parseBracedList();
3877 addUnwrappedLine();
3878 return;
3879 }
3880
3881 // Parse the body of a more complex enum.
3882 // First add a line for everything up to the "{".
3883 nextToken();
3884 addUnwrappedLine();
3885 ++Line->Level;
3886
3887 // Parse the enum constants.
3888 while (!eof()) {
3889 if (FormatTok->is(tok::l_brace)) {
3890 // Parse the constant's class body.
3891 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3892 /*MunchSemi=*/false);
3893 } else if (FormatTok->is(tok::l_paren)) {
3894 parseParens();
3895 } else if (FormatTok->is(tok::comma)) {
3896 nextToken();
3897 addUnwrappedLine();
3898 } else if (FormatTok->is(tok::semi)) {
3899 nextToken();
3900 addUnwrappedLine();
3901 break;
3902 } else if (FormatTok->is(tok::r_brace)) {
3903 addUnwrappedLine();
3904 break;
3905 } else {
3906 nextToken();
3907 }
3908 }
3909
3910 // Parse the class body after the enum's ";" if any.
3911 parseLevel(OpeningBrace);
3912 nextToken();
3913 --Line->Level;
3914 addUnwrappedLine();
3915 }
3916
parseRecord(bool ParseAsExpr)3917 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3918 const FormatToken &InitialToken = *FormatTok;
3919 nextToken();
3920
3921 auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3922 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
3923 };
3924 // The actual identifier can be a nested name specifier, and in macros
3925 // it is often token-pasted.
3926 // An [[attribute]] can be before the identifier.
3927 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3928 tok::kw_alignas, tok::l_square) ||
3929 FormatTok->isAttribute() ||
3930 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3931 FormatTok->isOneOf(tok::period, tok::comma))) {
3932 if (Style.isJavaScript() &&
3933 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3934 // JavaScript/TypeScript supports inline object types in
3935 // extends/implements positions:
3936 // class Foo implements {bar: number} { }
3937 nextToken();
3938 if (FormatTok->is(tok::l_brace)) {
3939 tryToParseBracedList();
3940 continue;
3941 }
3942 }
3943 if (FormatTok->is(tok::l_square) && handleCppAttributes())
3944 continue;
3945 nextToken();
3946 // We can have macros in between 'class' and the class name.
3947 if (!IsNonMacroIdentifier(FormatTok->Previous) &&
3948 FormatTok->is(tok::l_paren)) {
3949 parseParens();
3950 }
3951 }
3952
3953 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3954 int AngleNestingLevel = 0;
3955 do {
3956 if (FormatTok->is(tok::less))
3957 ++AngleNestingLevel;
3958 else if (FormatTok->is(tok::greater))
3959 --AngleNestingLevel;
3960
3961 if (AngleNestingLevel == 0 && FormatTok->is(tok::l_paren) &&
3962 IsNonMacroIdentifier(FormatTok->Previous)) {
3963 break;
3964 }
3965 if (FormatTok->is(tok::l_brace)) {
3966 calculateBraceTypes(/*ExpectClassBody=*/true);
3967 if (!tryToParseBracedList())
3968 break;
3969 }
3970 if (FormatTok->is(tok::l_square)) {
3971 FormatToken *Previous = FormatTok->Previous;
3972 if (!Previous ||
3973 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3974 // Don't try parsing a lambda if we had a closing parenthesis before,
3975 // it was probably a pointer to an array: int (*)[].
3976 if (!tryToParseLambda())
3977 continue;
3978 } else {
3979 parseSquare();
3980 continue;
3981 }
3982 }
3983 if (FormatTok->is(tok::semi))
3984 return;
3985 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3986 addUnwrappedLine();
3987 nextToken();
3988 parseCSharpGenericTypeConstraint();
3989 break;
3990 }
3991 nextToken();
3992 } while (!eof());
3993 }
3994
3995 auto GetBraceTypes =
3996 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
3997 switch (RecordTok.Tok.getKind()) {
3998 case tok::kw_class:
3999 return {TT_ClassLBrace, TT_ClassRBrace};
4000 case tok::kw_struct:
4001 return {TT_StructLBrace, TT_StructRBrace};
4002 case tok::kw_union:
4003 return {TT_UnionLBrace, TT_UnionRBrace};
4004 default:
4005 // Useful for e.g. interface.
4006 return {TT_RecordLBrace, TT_RecordRBrace};
4007 }
4008 };
4009 if (FormatTok->is(tok::l_brace)) {
4010 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4011 FormatTok->setFinalizedType(OpenBraceType);
4012 if (ParseAsExpr) {
4013 parseChildBlock();
4014 } else {
4015 if (ShouldBreakBeforeBrace(Style, InitialToken))
4016 addUnwrappedLine();
4017
4018 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4019 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4020 }
4021 setPreviousRBraceType(ClosingBraceType);
4022 }
4023 // There is no addUnwrappedLine() here so that we fall through to parsing a
4024 // structural element afterwards. Thus, in "class A {} n, m;",
4025 // "} n, m;" will end up in one unwrapped line.
4026 }
4027
parseObjCMethod()4028 void UnwrappedLineParser::parseObjCMethod() {
4029 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4030 "'(' or identifier expected.");
4031 do {
4032 if (FormatTok->is(tok::semi)) {
4033 nextToken();
4034 addUnwrappedLine();
4035 return;
4036 } else if (FormatTok->is(tok::l_brace)) {
4037 if (Style.BraceWrapping.AfterFunction)
4038 addUnwrappedLine();
4039 parseBlock();
4040 addUnwrappedLine();
4041 return;
4042 } else {
4043 nextToken();
4044 }
4045 } while (!eof());
4046 }
4047
parseObjCProtocolList()4048 void UnwrappedLineParser::parseObjCProtocolList() {
4049 assert(FormatTok->is(tok::less) && "'<' expected.");
4050 do {
4051 nextToken();
4052 // Early exit in case someone forgot a close angle.
4053 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4054 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4055 return;
4056 }
4057 } while (!eof() && FormatTok->isNot(tok::greater));
4058 nextToken(); // Skip '>'.
4059 }
4060
parseObjCUntilAtEnd()4061 void UnwrappedLineParser::parseObjCUntilAtEnd() {
4062 do {
4063 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4064 nextToken();
4065 addUnwrappedLine();
4066 break;
4067 }
4068 if (FormatTok->is(tok::l_brace)) {
4069 parseBlock();
4070 // In ObjC interfaces, nothing should be following the "}".
4071 addUnwrappedLine();
4072 } else if (FormatTok->is(tok::r_brace)) {
4073 // Ignore stray "}". parseStructuralElement doesn't consume them.
4074 nextToken();
4075 addUnwrappedLine();
4076 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4077 nextToken();
4078 parseObjCMethod();
4079 } else {
4080 parseStructuralElement();
4081 }
4082 } while (!eof());
4083 }
4084
parseObjCInterfaceOrImplementation()4085 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4086 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4087 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4088 nextToken();
4089 nextToken(); // interface name
4090
4091 // @interface can be followed by a lightweight generic
4092 // specialization list, then either a base class or a category.
4093 if (FormatTok->is(tok::less))
4094 parseObjCLightweightGenerics();
4095 if (FormatTok->is(tok::colon)) {
4096 nextToken();
4097 nextToken(); // base class name
4098 // The base class can also have lightweight generics applied to it.
4099 if (FormatTok->is(tok::less))
4100 parseObjCLightweightGenerics();
4101 } else if (FormatTok->is(tok::l_paren)) {
4102 // Skip category, if present.
4103 parseParens();
4104 }
4105
4106 if (FormatTok->is(tok::less))
4107 parseObjCProtocolList();
4108
4109 if (FormatTok->is(tok::l_brace)) {
4110 if (Style.BraceWrapping.AfterObjCDeclaration)
4111 addUnwrappedLine();
4112 parseBlock(/*MustBeDeclaration=*/true);
4113 }
4114
4115 // With instance variables, this puts '}' on its own line. Without instance
4116 // variables, this ends the @interface line.
4117 addUnwrappedLine();
4118
4119 parseObjCUntilAtEnd();
4120 }
4121
parseObjCLightweightGenerics()4122 void UnwrappedLineParser::parseObjCLightweightGenerics() {
4123 assert(FormatTok->is(tok::less));
4124 // Unlike protocol lists, generic parameterizations support
4125 // nested angles:
4126 //
4127 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4128 // NSObject <NSCopying, NSSecureCoding>
4129 //
4130 // so we need to count how many open angles we have left.
4131 unsigned NumOpenAngles = 1;
4132 do {
4133 nextToken();
4134 // Early exit in case someone forgot a close angle.
4135 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4136 FormatTok->isObjCAtKeyword(tok::objc_end)) {
4137 break;
4138 }
4139 if (FormatTok->is(tok::less)) {
4140 ++NumOpenAngles;
4141 } else if (FormatTok->is(tok::greater)) {
4142 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4143 --NumOpenAngles;
4144 }
4145 } while (!eof() && NumOpenAngles != 0);
4146 nextToken(); // Skip '>'.
4147 }
4148
4149 // Returns true for the declaration/definition form of @protocol,
4150 // false for the expression form.
parseObjCProtocol()4151 bool UnwrappedLineParser::parseObjCProtocol() {
4152 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4153 nextToken();
4154
4155 if (FormatTok->is(tok::l_paren)) {
4156 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4157 return false;
4158 }
4159
4160 // The definition/declaration form,
4161 // @protocol Foo
4162 // - (int)someMethod;
4163 // @end
4164
4165 nextToken(); // protocol name
4166
4167 if (FormatTok->is(tok::less))
4168 parseObjCProtocolList();
4169
4170 // Check for protocol declaration.
4171 if (FormatTok->is(tok::semi)) {
4172 nextToken();
4173 addUnwrappedLine();
4174 return true;
4175 }
4176
4177 addUnwrappedLine();
4178 parseObjCUntilAtEnd();
4179 return true;
4180 }
4181
parseJavaScriptEs6ImportExport()4182 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4183 bool IsImport = FormatTok->is(Keywords.kw_import);
4184 assert(IsImport || FormatTok->is(tok::kw_export));
4185 nextToken();
4186
4187 // Consume the "default" in "export default class/function".
4188 if (FormatTok->is(tok::kw_default))
4189 nextToken();
4190
4191 // Consume "async function", "function" and "default function", so that these
4192 // get parsed as free-standing JS functions, i.e. do not require a trailing
4193 // semicolon.
4194 if (FormatTok->is(Keywords.kw_async))
4195 nextToken();
4196 if (FormatTok->is(Keywords.kw_function)) {
4197 nextToken();
4198 return;
4199 }
4200
4201 // For imports, `export *`, `export {...}`, consume the rest of the line up
4202 // to the terminating `;`. For everything else, just return and continue
4203 // parsing the structural element, i.e. the declaration or expression for
4204 // `export default`.
4205 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4206 !FormatTok->isStringLiteral() &&
4207 !(FormatTok->is(Keywords.kw_type) &&
4208 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4209 return;
4210 }
4211
4212 while (!eof()) {
4213 if (FormatTok->is(tok::semi))
4214 return;
4215 if (Line->Tokens.empty()) {
4216 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4217 // import statement should terminate.
4218 return;
4219 }
4220 if (FormatTok->is(tok::l_brace)) {
4221 FormatTok->setBlockKind(BK_Block);
4222 nextToken();
4223 parseBracedList();
4224 } else {
4225 nextToken();
4226 }
4227 }
4228 }
4229
parseStatementMacro()4230 void UnwrappedLineParser::parseStatementMacro() {
4231 nextToken();
4232 if (FormatTok->is(tok::l_paren))
4233 parseParens();
4234 if (FormatTok->is(tok::semi))
4235 nextToken();
4236 addUnwrappedLine();
4237 }
4238
parseVerilogHierarchyIdentifier()4239 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4240 // consume things like a::`b.c[d:e] or a::*
4241 while (true) {
4242 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4243 tok::coloncolon, tok::hash) ||
4244 Keywords.isVerilogIdentifier(*FormatTok)) {
4245 nextToken();
4246 } else if (FormatTok->is(tok::l_square)) {
4247 parseSquare();
4248 } else {
4249 break;
4250 }
4251 }
4252 }
4253
parseVerilogSensitivityList()4254 void UnwrappedLineParser::parseVerilogSensitivityList() {
4255 if (FormatTok->isNot(tok::at))
4256 return;
4257 nextToken();
4258 // A block event expression has 2 at signs.
4259 if (FormatTok->is(tok::at))
4260 nextToken();
4261 switch (FormatTok->Tok.getKind()) {
4262 case tok::star:
4263 nextToken();
4264 break;
4265 case tok::l_paren:
4266 parseParens();
4267 break;
4268 default:
4269 parseVerilogHierarchyIdentifier();
4270 break;
4271 }
4272 }
4273
parseVerilogHierarchyHeader()4274 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4275 unsigned AddLevels = 0;
4276
4277 if (FormatTok->is(Keywords.kw_clocking)) {
4278 nextToken();
4279 if (Keywords.isVerilogIdentifier(*FormatTok))
4280 nextToken();
4281 parseVerilogSensitivityList();
4282 if (FormatTok->is(tok::semi))
4283 nextToken();
4284 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4285 Keywords.kw_casez, Keywords.kw_randcase,
4286 Keywords.kw_randsequence)) {
4287 if (Style.IndentCaseLabels)
4288 AddLevels++;
4289 nextToken();
4290 if (FormatTok->is(tok::l_paren)) {
4291 FormatTok->setFinalizedType(TT_ConditionLParen);
4292 parseParens();
4293 }
4294 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4295 nextToken();
4296 // The case header has no semicolon.
4297 } else {
4298 // "module" etc.
4299 nextToken();
4300 // all the words like the name of the module and specifiers like
4301 // "automatic" and the width of function return type
4302 while (true) {
4303 if (FormatTok->is(tok::l_square)) {
4304 auto Prev = FormatTok->getPreviousNonComment();
4305 if (Prev && Keywords.isVerilogIdentifier(*Prev))
4306 Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4307 parseSquare();
4308 } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4309 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4310 nextToken();
4311 } else {
4312 break;
4313 }
4314 }
4315
4316 auto NewLine = [this]() {
4317 addUnwrappedLine();
4318 Line->IsContinuation = true;
4319 };
4320
4321 // package imports
4322 while (FormatTok->is(Keywords.kw_import)) {
4323 NewLine();
4324 nextToken();
4325 parseVerilogHierarchyIdentifier();
4326 if (FormatTok->is(tok::semi))
4327 nextToken();
4328 }
4329
4330 // parameters and ports
4331 if (FormatTok->is(Keywords.kw_verilogHash)) {
4332 NewLine();
4333 nextToken();
4334 if (FormatTok->is(tok::l_paren)) {
4335 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4336 parseParens();
4337 }
4338 }
4339 if (FormatTok->is(tok::l_paren)) {
4340 NewLine();
4341 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4342 parseParens();
4343 }
4344
4345 // extends and implements
4346 if (FormatTok->is(Keywords.kw_extends)) {
4347 NewLine();
4348 nextToken();
4349 parseVerilogHierarchyIdentifier();
4350 if (FormatTok->is(tok::l_paren))
4351 parseParens();
4352 }
4353 if (FormatTok->is(Keywords.kw_implements)) {
4354 NewLine();
4355 do {
4356 nextToken();
4357 parseVerilogHierarchyIdentifier();
4358 } while (FormatTok->is(tok::comma));
4359 }
4360
4361 // Coverage event for cover groups.
4362 if (FormatTok->is(tok::at)) {
4363 NewLine();
4364 parseVerilogSensitivityList();
4365 }
4366
4367 if (FormatTok->is(tok::semi))
4368 nextToken(/*LevelDifference=*/1);
4369 addUnwrappedLine();
4370 }
4371
4372 return AddLevels;
4373 }
4374
parseVerilogTable()4375 void UnwrappedLineParser::parseVerilogTable() {
4376 assert(FormatTok->is(Keywords.kw_table));
4377 nextToken(/*LevelDifference=*/1);
4378 addUnwrappedLine();
4379
4380 auto InitialLevel = Line->Level++;
4381 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4382 FormatToken *Tok = FormatTok;
4383 nextToken();
4384 if (Tok->is(tok::semi))
4385 addUnwrappedLine();
4386 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4387 Tok->setFinalizedType(TT_VerilogTableItem);
4388 }
4389 Line->Level = InitialLevel;
4390 nextToken(/*LevelDifference=*/-1);
4391 addUnwrappedLine();
4392 }
4393
parseVerilogCaseLabel()4394 void UnwrappedLineParser::parseVerilogCaseLabel() {
4395 // The label will get unindented in AnnotatingParser. If there are no leading
4396 // spaces, indent the rest here so that things inside the block will be
4397 // indented relative to things outside. We don't use parseLabel because we
4398 // don't know whether this colon is a label or a ternary expression at this
4399 // point.
4400 auto OrigLevel = Line->Level;
4401 auto FirstLine = CurrentLines->size();
4402 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4403 ++Line->Level;
4404 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4405 --Line->Level;
4406 parseStructuralElement();
4407 // Restore the indentation in both the new line and the line that has the
4408 // label.
4409 if (CurrentLines->size() > FirstLine)
4410 (*CurrentLines)[FirstLine].Level = OrigLevel;
4411 Line->Level = OrigLevel;
4412 }
4413
containsExpansion(const UnwrappedLine & Line) const4414 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4415 for (const auto &N : Line.Tokens) {
4416 if (N.Tok->MacroCtx)
4417 return true;
4418 for (const UnwrappedLine &Child : N.Children)
4419 if (containsExpansion(Child))
4420 return true;
4421 }
4422 return false;
4423 }
4424
addUnwrappedLine(LineLevel AdjustLevel)4425 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4426 if (Line->Tokens.empty())
4427 return;
4428 LLVM_DEBUG({
4429 if (!parsingPPDirective()) {
4430 llvm::dbgs() << "Adding unwrapped line:\n";
4431 printDebugInfo(*Line);
4432 }
4433 });
4434
4435 // If this line closes a block when in Whitesmiths mode, remember that
4436 // information so that the level can be decreased after the line is added.
4437 // This has to happen after the addition of the line since the line itself
4438 // needs to be indented.
4439 bool ClosesWhitesmithsBlock =
4440 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4441 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4442
4443 // If the current line was expanded from a macro call, we use it to
4444 // reconstruct an unwrapped line from the structure of the expanded unwrapped
4445 // line and the unexpanded token stream.
4446 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4447 if (!Reconstruct)
4448 Reconstruct.emplace(Line->Level, Unexpanded);
4449 Reconstruct->addLine(*Line);
4450
4451 // While the reconstructed unexpanded lines are stored in the normal
4452 // flow of lines, the expanded lines are stored on the side to be analyzed
4453 // in an extra step.
4454 CurrentExpandedLines.push_back(std::move(*Line));
4455
4456 if (Reconstruct->finished()) {
4457 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4458 assert(!Reconstructed.Tokens.empty() &&
4459 "Reconstructed must at least contain the macro identifier.");
4460 assert(!parsingPPDirective());
4461 LLVM_DEBUG({
4462 llvm::dbgs() << "Adding unexpanded line:\n";
4463 printDebugInfo(Reconstructed);
4464 });
4465 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4466 Lines.push_back(std::move(Reconstructed));
4467 CurrentExpandedLines.clear();
4468 Reconstruct.reset();
4469 }
4470 } else {
4471 // At the top level we only get here when no unexpansion is going on, or
4472 // when conditional formatting led to unfinished macro reconstructions.
4473 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4474 CurrentLines->push_back(std::move(*Line));
4475 }
4476 Line->Tokens.clear();
4477 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4478 Line->FirstStartColumn = 0;
4479 Line->IsContinuation = false;
4480 Line->SeenDecltypeAuto = false;
4481
4482 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4483 --Line->Level;
4484 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4485 CurrentLines->append(
4486 std::make_move_iterator(PreprocessorDirectives.begin()),
4487 std::make_move_iterator(PreprocessorDirectives.end()));
4488 PreprocessorDirectives.clear();
4489 }
4490 // Disconnect the current token from the last token on the previous line.
4491 FormatTok->Previous = nullptr;
4492 }
4493
eof() const4494 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4495
isOnNewLine(const FormatToken & FormatTok)4496 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4497 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4498 FormatTok.NewlinesBefore > 0;
4499 }
4500
4501 // Checks if \p FormatTok is a line comment that continues the line comment
4502 // section on \p Line.
4503 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)4504 continuesLineCommentSection(const FormatToken &FormatTok,
4505 const UnwrappedLine &Line,
4506 const llvm::Regex &CommentPragmasRegex) {
4507 if (Line.Tokens.empty())
4508 return false;
4509
4510 StringRef IndentContent = FormatTok.TokenText;
4511 if (FormatTok.TokenText.starts_with("//") ||
4512 FormatTok.TokenText.starts_with("/*")) {
4513 IndentContent = FormatTok.TokenText.substr(2);
4514 }
4515 if (CommentPragmasRegex.match(IndentContent))
4516 return false;
4517
4518 // If Line starts with a line comment, then FormatTok continues the comment
4519 // section if its original column is greater or equal to the original start
4520 // column of the line.
4521 //
4522 // Define the min column token of a line as follows: if a line ends in '{' or
4523 // contains a '{' followed by a line comment, then the min column token is
4524 // that '{'. Otherwise, the min column token of the line is the first token of
4525 // the line.
4526 //
4527 // If Line starts with a token other than a line comment, then FormatTok
4528 // continues the comment section if its original column is greater than the
4529 // original start column of the min column token of the line.
4530 //
4531 // For example, the second line comment continues the first in these cases:
4532 //
4533 // // first line
4534 // // second line
4535 //
4536 // and:
4537 //
4538 // // first line
4539 // // second line
4540 //
4541 // and:
4542 //
4543 // int i; // first line
4544 // // second line
4545 //
4546 // and:
4547 //
4548 // do { // first line
4549 // // second line
4550 // int i;
4551 // } while (true);
4552 //
4553 // and:
4554 //
4555 // enum {
4556 // a, // first line
4557 // // second line
4558 // b
4559 // };
4560 //
4561 // The second line comment doesn't continue the first in these cases:
4562 //
4563 // // first line
4564 // // second line
4565 //
4566 // and:
4567 //
4568 // int i; // first line
4569 // // second line
4570 //
4571 // and:
4572 //
4573 // do { // first line
4574 // // second line
4575 // int i;
4576 // } while (true);
4577 //
4578 // and:
4579 //
4580 // enum {
4581 // a, // first line
4582 // // second line
4583 // };
4584 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4585
4586 // Scan for '{//'. If found, use the column of '{' as a min column for line
4587 // comment section continuation.
4588 const FormatToken *PreviousToken = nullptr;
4589 for (const UnwrappedLineNode &Node : Line.Tokens) {
4590 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4591 isLineComment(*Node.Tok)) {
4592 MinColumnToken = PreviousToken;
4593 break;
4594 }
4595 PreviousToken = Node.Tok;
4596
4597 // Grab the last newline preceding a token in this unwrapped line.
4598 if (Node.Tok->NewlinesBefore > 0)
4599 MinColumnToken = Node.Tok;
4600 }
4601 if (PreviousToken && PreviousToken->is(tok::l_brace))
4602 MinColumnToken = PreviousToken;
4603
4604 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4605 MinColumnToken);
4606 }
4607
flushComments(bool NewlineBeforeNext)4608 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4609 bool JustComments = Line->Tokens.empty();
4610 for (FormatToken *Tok : CommentsBeforeNextToken) {
4611 // Line comments that belong to the same line comment section are put on the
4612 // same line since later we might want to reflow content between them.
4613 // Additional fine-grained breaking of line comment sections is controlled
4614 // by the class BreakableLineCommentSection in case it is desirable to keep
4615 // several line comment sections in the same unwrapped line.
4616 //
4617 // FIXME: Consider putting separate line comment sections as children to the
4618 // unwrapped line instead.
4619 Tok->ContinuesLineCommentSection =
4620 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4621 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4622 addUnwrappedLine();
4623 pushToken(Tok);
4624 }
4625 if (NewlineBeforeNext && JustComments)
4626 addUnwrappedLine();
4627 CommentsBeforeNextToken.clear();
4628 }
4629
nextToken(int LevelDifference)4630 void UnwrappedLineParser::nextToken(int LevelDifference) {
4631 if (eof())
4632 return;
4633 flushComments(isOnNewLine(*FormatTok));
4634 pushToken(FormatTok);
4635 FormatToken *Previous = FormatTok;
4636 if (!Style.isJavaScript())
4637 readToken(LevelDifference);
4638 else
4639 readTokenWithJavaScriptASI();
4640 FormatTok->Previous = Previous;
4641 if (Style.isVerilog()) {
4642 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4643 // keywords like `begin`, we can't treat them the same as left braces
4644 // because some contexts require one of them. For example structs use
4645 // braces and if blocks use keywords, and a left brace can occur in an if
4646 // statement, but it is not a block. For keywords like `end`, we simply
4647 // treat them the same as right braces.
4648 if (Keywords.isVerilogEnd(*FormatTok))
4649 FormatTok->Tok.setKind(tok::r_brace);
4650 }
4651 }
4652
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)4653 void UnwrappedLineParser::distributeComments(
4654 const SmallVectorImpl<FormatToken *> &Comments,
4655 const FormatToken *NextTok) {
4656 // Whether or not a line comment token continues a line is controlled by
4657 // the method continuesLineCommentSection, with the following caveat:
4658 //
4659 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4660 // that each comment line from the trail is aligned with the next token, if
4661 // the next token exists. If a trail exists, the beginning of the maximal
4662 // trail is marked as a start of a new comment section.
4663 //
4664 // For example in this code:
4665 //
4666 // int a; // line about a
4667 // // line 1 about b
4668 // // line 2 about b
4669 // int b;
4670 //
4671 // the two lines about b form a maximal trail, so there are two sections, the
4672 // first one consisting of the single comment "// line about a" and the
4673 // second one consisting of the next two comments.
4674 if (Comments.empty())
4675 return;
4676 bool ShouldPushCommentsInCurrentLine = true;
4677 bool HasTrailAlignedWithNextToken = false;
4678 unsigned StartOfTrailAlignedWithNextToken = 0;
4679 if (NextTok) {
4680 // We are skipping the first element intentionally.
4681 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4682 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4683 HasTrailAlignedWithNextToken = true;
4684 StartOfTrailAlignedWithNextToken = i;
4685 }
4686 }
4687 }
4688 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4689 FormatToken *FormatTok = Comments[i];
4690 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4691 FormatTok->ContinuesLineCommentSection = false;
4692 } else {
4693 FormatTok->ContinuesLineCommentSection =
4694 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4695 }
4696 if (!FormatTok->ContinuesLineCommentSection &&
4697 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4698 ShouldPushCommentsInCurrentLine = false;
4699 }
4700 if (ShouldPushCommentsInCurrentLine)
4701 pushToken(FormatTok);
4702 else
4703 CommentsBeforeNextToken.push_back(FormatTok);
4704 }
4705 }
4706
readToken(int LevelDifference)4707 void UnwrappedLineParser::readToken(int LevelDifference) {
4708 SmallVector<FormatToken *, 1> Comments;
4709 bool PreviousWasComment = false;
4710 bool FirstNonCommentOnLine = false;
4711 do {
4712 FormatTok = Tokens->getNextToken();
4713 assert(FormatTok);
4714 while (FormatTok->getType() == TT_ConflictStart ||
4715 FormatTok->getType() == TT_ConflictEnd ||
4716 FormatTok->getType() == TT_ConflictAlternative) {
4717 if (FormatTok->getType() == TT_ConflictStart)
4718 conditionalCompilationStart(/*Unreachable=*/false);
4719 else if (FormatTok->getType() == TT_ConflictAlternative)
4720 conditionalCompilationAlternative();
4721 else if (FormatTok->getType() == TT_ConflictEnd)
4722 conditionalCompilationEnd();
4723 FormatTok = Tokens->getNextToken();
4724 FormatTok->MustBreakBefore = true;
4725 FormatTok->MustBreakBeforeFinalized = true;
4726 }
4727
4728 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4729 const FormatToken &Tok,
4730 bool PreviousWasComment) {
4731 auto IsFirstOnLine = [](const FormatToken &Tok) {
4732 return Tok.HasUnescapedNewline || Tok.IsFirst;
4733 };
4734
4735 // Consider preprocessor directives preceded by block comments as first
4736 // on line.
4737 if (PreviousWasComment)
4738 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4739 return IsFirstOnLine(Tok);
4740 };
4741
4742 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4743 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4744 PreviousWasComment = FormatTok->is(tok::comment);
4745
4746 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4747 (!Style.isVerilog() ||
4748 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4749 FirstNonCommentOnLine) {
4750 distributeComments(Comments, FormatTok);
4751 Comments.clear();
4752 // If there is an unfinished unwrapped line, we flush the preprocessor
4753 // directives only after that unwrapped line was finished later.
4754 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4755 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4756 assert((LevelDifference >= 0 ||
4757 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4758 "LevelDifference makes Line->Level negative");
4759 Line->Level += LevelDifference;
4760 // Comments stored before the preprocessor directive need to be output
4761 // before the preprocessor directive, at the same level as the
4762 // preprocessor directive, as we consider them to apply to the directive.
4763 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4764 PPBranchLevel > 0) {
4765 Line->Level += PPBranchLevel;
4766 }
4767 flushComments(isOnNewLine(*FormatTok));
4768 parsePPDirective();
4769 PreviousWasComment = FormatTok->is(tok::comment);
4770 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4771 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4772 }
4773
4774 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4775 !Line->InPPDirective) {
4776 continue;
4777 }
4778
4779 if (FormatTok->is(tok::identifier) &&
4780 Macros.defined(FormatTok->TokenText) &&
4781 // FIXME: Allow expanding macros in preprocessor directives.
4782 !Line->InPPDirective) {
4783 FormatToken *ID = FormatTok;
4784 unsigned Position = Tokens->getPosition();
4785
4786 // To correctly parse the code, we need to replace the tokens of the macro
4787 // call with its expansion.
4788 auto PreCall = std::move(Line);
4789 Line.reset(new UnwrappedLine);
4790 bool OldInExpansion = InExpansion;
4791 InExpansion = true;
4792 // We parse the macro call into a new line.
4793 auto Args = parseMacroCall();
4794 InExpansion = OldInExpansion;
4795 assert(Line->Tokens.front().Tok == ID);
4796 // And remember the unexpanded macro call tokens.
4797 auto UnexpandedLine = std::move(Line);
4798 // Reset to the old line.
4799 Line = std::move(PreCall);
4800
4801 LLVM_DEBUG({
4802 llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4803 if (Args) {
4804 llvm::dbgs() << "(";
4805 for (const auto &Arg : Args.value())
4806 for (const auto &T : Arg)
4807 llvm::dbgs() << T->TokenText << " ";
4808 llvm::dbgs() << ")";
4809 }
4810 llvm::dbgs() << "\n";
4811 });
4812 if (Macros.objectLike(ID->TokenText) && Args &&
4813 !Macros.hasArity(ID->TokenText, Args->size())) {
4814 // The macro is either
4815 // - object-like, but we got argumnets, or
4816 // - overloaded to be both object-like and function-like, but none of
4817 // the function-like arities match the number of arguments.
4818 // Thus, expand as object-like macro.
4819 LLVM_DEBUG(llvm::dbgs()
4820 << "Macro \"" << ID->TokenText
4821 << "\" not overloaded for arity " << Args->size()
4822 << "or not function-like, using object-like overload.");
4823 Args.reset();
4824 UnexpandedLine->Tokens.resize(1);
4825 Tokens->setPosition(Position);
4826 nextToken();
4827 assert(!Args && Macros.objectLike(ID->TokenText));
4828 }
4829 if ((!Args && Macros.objectLike(ID->TokenText)) ||
4830 (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4831 // Next, we insert the expanded tokens in the token stream at the
4832 // current position, and continue parsing.
4833 Unexpanded[ID] = std::move(UnexpandedLine);
4834 SmallVector<FormatToken *, 8> Expansion =
4835 Macros.expand(ID, std::move(Args));
4836 if (!Expansion.empty())
4837 FormatTok = Tokens->insertTokens(Expansion);
4838
4839 LLVM_DEBUG({
4840 llvm::dbgs() << "Expanded: ";
4841 for (const auto &T : Expansion)
4842 llvm::dbgs() << T->TokenText << " ";
4843 llvm::dbgs() << "\n";
4844 });
4845 } else {
4846 LLVM_DEBUG({
4847 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4848 << "\", because it was used ";
4849 if (Args)
4850 llvm::dbgs() << "with " << Args->size();
4851 else
4852 llvm::dbgs() << "without";
4853 llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4854 });
4855 Tokens->setPosition(Position);
4856 FormatTok = ID;
4857 }
4858 }
4859
4860 if (FormatTok->isNot(tok::comment)) {
4861 distributeComments(Comments, FormatTok);
4862 Comments.clear();
4863 return;
4864 }
4865
4866 Comments.push_back(FormatTok);
4867 } while (!eof());
4868
4869 distributeComments(Comments, nullptr);
4870 Comments.clear();
4871 }
4872
4873 namespace {
4874 template <typename Iterator>
pushTokens(Iterator Begin,Iterator End,llvm::SmallVectorImpl<FormatToken * > & Into)4875 void pushTokens(Iterator Begin, Iterator End,
4876 llvm::SmallVectorImpl<FormatToken *> &Into) {
4877 for (auto I = Begin; I != End; ++I) {
4878 Into.push_back(I->Tok);
4879 for (const auto &Child : I->Children)
4880 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4881 }
4882 }
4883 } // namespace
4884
4885 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
parseMacroCall()4886 UnwrappedLineParser::parseMacroCall() {
4887 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
4888 assert(Line->Tokens.empty());
4889 nextToken();
4890 if (FormatTok->isNot(tok::l_paren))
4891 return Args;
4892 unsigned Position = Tokens->getPosition();
4893 FormatToken *Tok = FormatTok;
4894 nextToken();
4895 Args.emplace();
4896 auto ArgStart = std::prev(Line->Tokens.end());
4897
4898 int Parens = 0;
4899 do {
4900 switch (FormatTok->Tok.getKind()) {
4901 case tok::l_paren:
4902 ++Parens;
4903 nextToken();
4904 break;
4905 case tok::r_paren: {
4906 if (Parens > 0) {
4907 --Parens;
4908 nextToken();
4909 break;
4910 }
4911 Args->push_back({});
4912 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4913 nextToken();
4914 return Args;
4915 }
4916 case tok::comma: {
4917 if (Parens > 0) {
4918 nextToken();
4919 break;
4920 }
4921 Args->push_back({});
4922 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
4923 nextToken();
4924 ArgStart = std::prev(Line->Tokens.end());
4925 break;
4926 }
4927 default:
4928 nextToken();
4929 break;
4930 }
4931 } while (!eof());
4932 Line->Tokens.resize(1);
4933 Tokens->setPosition(Position);
4934 FormatTok = Tok;
4935 return {};
4936 }
4937
pushToken(FormatToken * Tok)4938 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4939 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4940 if (MustBreakBeforeNextToken) {
4941 Line->Tokens.back().Tok->MustBreakBefore = true;
4942 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
4943 MustBreakBeforeNextToken = false;
4944 }
4945 }
4946
4947 } // end namespace format
4948 } // end namespace clang
4949