1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
21
22 #define DEBUG_TYPE "format-token-annotator"
23
24 namespace clang {
25 namespace format {
26
mustBreakAfterAttributes(const FormatToken & Tok,const FormatStyle & Style)27 static bool mustBreakAfterAttributes(const FormatToken &Tok,
28 const FormatStyle &Style) {
29 switch (Style.BreakAfterAttributes) {
30 case FormatStyle::ABS_Always:
31 return true;
32 case FormatStyle::ABS_Leave:
33 return Tok.NewlinesBefore > 0;
34 default:
35 return false;
36 }
37 }
38
39 namespace {
40
41 /// Returns \c true if the line starts with a token that can start a statement
42 /// with an initializer.
startsWithInitStatement(const AnnotatedLine & Line)43 static bool startsWithInitStatement(const AnnotatedLine &Line) {
44 return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
45 Line.startsWith(tok::kw_switch);
46 }
47
48 /// Returns \c true if the token can be used as an identifier in
49 /// an Objective-C \c \@selector, \c false otherwise.
50 ///
51 /// Because getFormattingLangOpts() always lexes source code as
52 /// Objective-C++, C++ keywords like \c new and \c delete are
53 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
54 ///
55 /// For Objective-C and Objective-C++, both identifiers and keywords
56 /// are valid inside @selector(...) (or a macro which
57 /// invokes @selector(...)). So, we allow treat any identifier or
58 /// keyword as a potential Objective-C selector component.
canBeObjCSelectorComponent(const FormatToken & Tok)59 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
60 return Tok.Tok.getIdentifierInfo();
61 }
62
63 /// With `Left` being '(', check if we're at either `[...](` or
64 /// `[...]<...>(`, where the [ opens a lambda capture list.
isLambdaParameterList(const FormatToken * Left)65 static bool isLambdaParameterList(const FormatToken *Left) {
66 // Skip <...> if present.
67 if (Left->Previous && Left->Previous->is(tok::greater) &&
68 Left->Previous->MatchingParen &&
69 Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
70 Left = Left->Previous->MatchingParen;
71 }
72
73 // Check for `[...]`.
74 return Left->Previous && Left->Previous->is(tok::r_square) &&
75 Left->Previous->MatchingParen &&
76 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
77 }
78
79 /// Returns \c true if the token is followed by a boolean condition, \c false
80 /// otherwise.
isKeywordWithCondition(const FormatToken & Tok)81 static bool isKeywordWithCondition(const FormatToken &Tok) {
82 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
83 tok::kw_constexpr, tok::kw_catch);
84 }
85
86 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
isCppAttribute(bool IsCpp,const FormatToken & Tok)87 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
88 if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
89 return false;
90 // The first square bracket is part of an ObjC array literal
91 if (Tok.Previous && Tok.Previous->is(tok::at))
92 return false;
93 const FormatToken *AttrTok = Tok.Next->Next;
94 if (!AttrTok)
95 return false;
96 // C++17 '[[using ns: foo, bar(baz, blech)]]'
97 // We assume nobody will name an ObjC variable 'using'.
98 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
99 return true;
100 if (AttrTok->isNot(tok::identifier))
101 return false;
102 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
103 // ObjC message send. We assume nobody will use : in a C++11 attribute
104 // specifier parameter, although this is technically valid:
105 // [[foo(:)]].
106 if (AttrTok->is(tok::colon) ||
107 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
108 AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
109 return false;
110 }
111 if (AttrTok->is(tok::ellipsis))
112 return true;
113 AttrTok = AttrTok->Next;
114 }
115 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
116 }
117
118 /// A parser that gathers additional information about tokens.
119 ///
120 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
121 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
122 /// into template parameter lists.
123 class AnnotatingParser {
124 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,const AdditionalKeywords & Keywords,SmallVector<ScopeType> & Scopes)125 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
126 const AdditionalKeywords &Keywords,
127 SmallVector<ScopeType> &Scopes)
128 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
129 Keywords(Keywords), Scopes(Scopes) {
130 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
131 resetTokenMetadata();
132 }
133
134 private:
getScopeType(const FormatToken & Token) const135 ScopeType getScopeType(const FormatToken &Token) const {
136 switch (Token.getType()) {
137 case TT_FunctionLBrace:
138 case TT_LambdaLBrace:
139 return ST_Function;
140 case TT_ClassLBrace:
141 case TT_StructLBrace:
142 case TT_UnionLBrace:
143 return ST_Class;
144 default:
145 return ST_Other;
146 }
147 }
148
parseAngle()149 bool parseAngle() {
150 if (!CurrentToken || !CurrentToken->Previous)
151 return false;
152 if (NonTemplateLess.count(CurrentToken->Previous) > 0)
153 return false;
154
155 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
156 if (Previous.Previous) {
157 if (Previous.Previous->Tok.isLiteral())
158 return false;
159 if (Previous.Previous->is(tok::r_brace))
160 return false;
161 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
162 (!Previous.Previous->MatchingParen ||
163 Previous.Previous->MatchingParen->isNot(
164 TT_OverloadedOperatorLParen))) {
165 return false;
166 }
167 if (Previous.Previous->is(tok::kw_operator) &&
168 CurrentToken->is(tok::l_paren)) {
169 return false;
170 }
171 }
172
173 FormatToken *Left = CurrentToken->Previous;
174 Left->ParentBracket = Contexts.back().ContextKind;
175 ScopedContextCreator ContextCreator(*this, tok::less, 12);
176
177 // If this angle is in the context of an expression, we need to be more
178 // hesitant to detect it as opening template parameters.
179 bool InExprContext = Contexts.back().IsExpression;
180
181 Contexts.back().IsExpression = false;
182 // If there's a template keyword before the opening angle bracket, this is a
183 // template parameter, not an argument.
184 if (Left->Previous && Left->Previous->isNot(tok::kw_template))
185 Contexts.back().ContextType = Context::TemplateArgument;
186
187 if (Style.Language == FormatStyle::LK_Java &&
188 CurrentToken->is(tok::question)) {
189 next();
190 }
191
192 while (CurrentToken) {
193 if (CurrentToken->is(tok::greater)) {
194 // Try to do a better job at looking for ">>" within the condition of
195 // a statement. Conservatively insert spaces between consecutive ">"
196 // tokens to prevent splitting right bitshift operators and potentially
197 // altering program semantics. This check is overly conservative and
198 // will prevent spaces from being inserted in select nested template
199 // parameter cases, but should not alter program semantics.
200 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
201 Left->ParentBracket != tok::less &&
202 CurrentToken->getStartOfNonWhitespace() ==
203 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
204 -1)) {
205 return false;
206 }
207 Left->MatchingParen = CurrentToken;
208 CurrentToken->MatchingParen = Left;
209 // In TT_Proto, we must distignuish between:
210 // map<key, value>
211 // msg < item: data >
212 // msg: < item: data >
213 // In TT_TextProto, map<key, value> does not occur.
214 if (Style.Language == FormatStyle::LK_TextProto ||
215 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
216 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
217 CurrentToken->setType(TT_DictLiteral);
218 } else {
219 CurrentToken->setType(TT_TemplateCloser);
220 CurrentToken->Tok.setLength(1);
221 }
222 if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral())
223 return false;
224 next();
225 return true;
226 }
227 if (CurrentToken->is(tok::question) &&
228 Style.Language == FormatStyle::LK_Java) {
229 next();
230 continue;
231 }
232 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
233 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
234 !Style.isCSharp() && !Style.isProto())) {
235 return false;
236 }
237 // If a && or || is found and interpreted as a binary operator, this set
238 // of angles is likely part of something like "a < b && c > d". If the
239 // angles are inside an expression, the ||/&& might also be a binary
240 // operator that was misinterpreted because we are parsing template
241 // parameters.
242 // FIXME: This is getting out of hand, write a decent parser.
243 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
244 CurrentToken->Previous->is(TT_BinaryOperator) &&
245 Contexts[Contexts.size() - 2].IsExpression &&
246 !Line.startsWith(tok::kw_template)) {
247 return false;
248 }
249 updateParameterCount(Left, CurrentToken);
250 if (Style.Language == FormatStyle::LK_Proto) {
251 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
252 if (CurrentToken->is(tok::colon) ||
253 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
254 Previous->isNot(tok::colon))) {
255 Previous->setType(TT_SelectorName);
256 }
257 }
258 }
259 if (!consumeToken())
260 return false;
261 }
262 return false;
263 }
264
parseUntouchableParens()265 bool parseUntouchableParens() {
266 while (CurrentToken) {
267 CurrentToken->Finalized = true;
268 switch (CurrentToken->Tok.getKind()) {
269 case tok::l_paren:
270 next();
271 if (!parseUntouchableParens())
272 return false;
273 continue;
274 case tok::r_paren:
275 next();
276 return true;
277 default:
278 // no-op
279 break;
280 }
281 next();
282 }
283 return false;
284 }
285
parseParens(bool LookForDecls=false)286 bool parseParens(bool LookForDecls = false) {
287 if (!CurrentToken)
288 return false;
289 assert(CurrentToken->Previous && "Unknown previous token");
290 FormatToken &OpeningParen = *CurrentToken->Previous;
291 assert(OpeningParen.is(tok::l_paren));
292 FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
293 OpeningParen.ParentBracket = Contexts.back().ContextKind;
294 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
295
296 // FIXME: This is a bit of a hack. Do better.
297 Contexts.back().ColonIsForRangeExpr =
298 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
299
300 if (OpeningParen.Previous &&
301 OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
302 OpeningParen.Finalized = true;
303 return parseUntouchableParens();
304 }
305
306 bool StartsObjCMethodExpr = false;
307 if (!Style.isVerilog()) {
308 if (FormatToken *MaybeSel = OpeningParen.Previous) {
309 // @selector( starts a selector.
310 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
311 MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
312 StartsObjCMethodExpr = true;
313 }
314 }
315 }
316
317 if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
318 // Find the previous kw_operator token.
319 FormatToken *Prev = &OpeningParen;
320 while (Prev->isNot(tok::kw_operator)) {
321 Prev = Prev->Previous;
322 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
323 }
324
325 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
326 // i.e. the operator is called as a member function,
327 // then the argument must be an expression.
328 bool OperatorCalledAsMemberFunction =
329 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
330 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
331 } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) {
332 Contexts.back().IsExpression = true;
333 Contexts.back().ContextType = Context::VerilogInstancePortList;
334 } else if (Style.isJavaScript() &&
335 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
336 Line.startsWith(tok::kw_export, Keywords.kw_type,
337 tok::identifier))) {
338 // type X = (...);
339 // export type X = (...);
340 Contexts.back().IsExpression = false;
341 } else if (OpeningParen.Previous &&
342 (OpeningParen.Previous->isOneOf(
343 tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit,
344 tok::kw_while, tok::l_paren, tok::comma,
345 TT_BinaryOperator) ||
346 OpeningParen.Previous->isIf())) {
347 // static_assert, if and while usually contain expressions.
348 Contexts.back().IsExpression = true;
349 } else if (Style.isJavaScript() && OpeningParen.Previous &&
350 (OpeningParen.Previous->is(Keywords.kw_function) ||
351 (OpeningParen.Previous->endsSequence(tok::identifier,
352 Keywords.kw_function)))) {
353 // function(...) or function f(...)
354 Contexts.back().IsExpression = false;
355 } else if (Style.isJavaScript() && OpeningParen.Previous &&
356 OpeningParen.Previous->is(TT_JsTypeColon)) {
357 // let x: (SomeType);
358 Contexts.back().IsExpression = false;
359 } else if (isLambdaParameterList(&OpeningParen)) {
360 // This is a parameter list of a lambda expression.
361 Contexts.back().IsExpression = false;
362 } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
363 Contexts.back().IsExpression = false;
364 } else if (OpeningParen.Previous &&
365 OpeningParen.Previous->is(tok::kw__Generic)) {
366 Contexts.back().ContextType = Context::C11GenericSelection;
367 Contexts.back().IsExpression = true;
368 } else if (Line.InPPDirective &&
369 (!OpeningParen.Previous ||
370 OpeningParen.Previous->isNot(tok::identifier))) {
371 Contexts.back().IsExpression = true;
372 } else if (Contexts[Contexts.size() - 2].CaretFound) {
373 // This is the parameter list of an ObjC block.
374 Contexts.back().IsExpression = false;
375 } else if (OpeningParen.Previous &&
376 OpeningParen.Previous->is(TT_ForEachMacro)) {
377 // The first argument to a foreach macro is a declaration.
378 Contexts.back().ContextType = Context::ForEachMacro;
379 Contexts.back().IsExpression = false;
380 } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
381 OpeningParen.Previous->MatchingParen->isOneOf(
382 TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
383 Contexts.back().IsExpression = false;
384 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
385 bool IsForOrCatch =
386 OpeningParen.Previous &&
387 OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
388 Contexts.back().IsExpression = !IsForOrCatch;
389 }
390
391 // Infer the role of the l_paren based on the previous token if we haven't
392 // detected one yet.
393 if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
394 if (PrevNonComment->isAttribute()) {
395 OpeningParen.setType(TT_AttributeLParen);
396 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
397 tok::kw_typeof,
398 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
399 #include "clang/Basic/TransformTypeTraits.def"
400 tok::kw__Atomic)) {
401 OpeningParen.setType(TT_TypeDeclarationParen);
402 // decltype() and typeof() usually contain expressions.
403 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
404 Contexts.back().IsExpression = true;
405 }
406 }
407
408 if (StartsObjCMethodExpr) {
409 Contexts.back().ColonIsObjCMethodExpr = true;
410 OpeningParen.setType(TT_ObjCMethodExpr);
411 }
412
413 // MightBeFunctionType and ProbablyFunctionType are used for
414 // function pointer and reference types as well as Objective-C
415 // block types:
416 //
417 // void (*FunctionPointer)(void);
418 // void (&FunctionReference)(void);
419 // void (&&FunctionReference)(void);
420 // void (^ObjCBlock)(void);
421 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
422 bool ProbablyFunctionType =
423 CurrentToken->isPointerOrReference() || CurrentToken->is(tok::caret);
424 bool HasMultipleLines = false;
425 bool HasMultipleParametersOnALine = false;
426 bool MightBeObjCForRangeLoop =
427 OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
428 FormatToken *PossibleObjCForInToken = nullptr;
429 while (CurrentToken) {
430 // LookForDecls is set when "if (" has been seen. Check for
431 // 'identifier' '*' 'identifier' followed by not '=' -- this
432 // '*' has to be a binary operator but determineStarAmpUsage() will
433 // categorize it as an unary operator, so set the right type here.
434 if (LookForDecls && CurrentToken->Next) {
435 FormatToken *Prev = CurrentToken->getPreviousNonComment();
436 if (Prev) {
437 FormatToken *PrevPrev = Prev->getPreviousNonComment();
438 FormatToken *Next = CurrentToken->Next;
439 if (PrevPrev && PrevPrev->is(tok::identifier) &&
440 PrevPrev->isNot(TT_TypeName) && Prev->isPointerOrReference() &&
441 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
442 Prev->setType(TT_BinaryOperator);
443 LookForDecls = false;
444 }
445 }
446 }
447
448 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
449 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
450 tok::coloncolon)) {
451 ProbablyFunctionType = true;
452 }
453 if (CurrentToken->is(tok::comma))
454 MightBeFunctionType = false;
455 if (CurrentToken->Previous->is(TT_BinaryOperator))
456 Contexts.back().IsExpression = true;
457 if (CurrentToken->is(tok::r_paren)) {
458 if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
459 ProbablyFunctionType && CurrentToken->Next &&
460 (CurrentToken->Next->is(tok::l_paren) ||
461 (CurrentToken->Next->is(tok::l_square) &&
462 Line.MustBeDeclaration))) {
463 OpeningParen.setType(OpeningParen.Next->is(tok::caret)
464 ? TT_ObjCBlockLParen
465 : TT_FunctionTypeLParen);
466 }
467 OpeningParen.MatchingParen = CurrentToken;
468 CurrentToken->MatchingParen = &OpeningParen;
469
470 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
471 OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
472 // Detect the case where macros are used to generate lambdas or
473 // function bodies, e.g.:
474 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
475 for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
476 Tok = Tok->Next) {
477 if (Tok->is(TT_BinaryOperator) && Tok->isPointerOrReference())
478 Tok->setType(TT_PointerOrReference);
479 }
480 }
481
482 if (StartsObjCMethodExpr) {
483 CurrentToken->setType(TT_ObjCMethodExpr);
484 if (Contexts.back().FirstObjCSelectorName) {
485 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
486 Contexts.back().LongestObjCSelectorName;
487 }
488 }
489
490 if (OpeningParen.is(TT_AttributeLParen))
491 CurrentToken->setType(TT_AttributeRParen);
492 if (OpeningParen.is(TT_TypeDeclarationParen))
493 CurrentToken->setType(TT_TypeDeclarationParen);
494 if (OpeningParen.Previous &&
495 OpeningParen.Previous->is(TT_JavaAnnotation)) {
496 CurrentToken->setType(TT_JavaAnnotation);
497 }
498 if (OpeningParen.Previous &&
499 OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
500 CurrentToken->setType(TT_LeadingJavaAnnotation);
501 }
502 if (OpeningParen.Previous &&
503 OpeningParen.Previous->is(TT_AttributeSquare)) {
504 CurrentToken->setType(TT_AttributeSquare);
505 }
506
507 if (!HasMultipleLines)
508 OpeningParen.setPackingKind(PPK_Inconclusive);
509 else if (HasMultipleParametersOnALine)
510 OpeningParen.setPackingKind(PPK_BinPacked);
511 else
512 OpeningParen.setPackingKind(PPK_OnePerLine);
513
514 next();
515 return true;
516 }
517 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
518 return false;
519
520 if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
521 OpeningParen.setType(TT_Unknown);
522 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
523 !CurrentToken->Next->HasUnescapedNewline &&
524 !CurrentToken->Next->isTrailingComment()) {
525 HasMultipleParametersOnALine = true;
526 }
527 bool ProbablyFunctionTypeLParen =
528 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
529 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
530 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
531 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
532 !(CurrentToken->is(tok::l_brace) ||
533 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
534 Contexts.back().IsExpression = false;
535 }
536 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
537 MightBeObjCForRangeLoop = false;
538 if (PossibleObjCForInToken) {
539 PossibleObjCForInToken->setType(TT_Unknown);
540 PossibleObjCForInToken = nullptr;
541 }
542 }
543 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
544 PossibleObjCForInToken = CurrentToken;
545 PossibleObjCForInToken->setType(TT_ObjCForIn);
546 }
547 // When we discover a 'new', we set CanBeExpression to 'false' in order to
548 // parse the type correctly. Reset that after a comma.
549 if (CurrentToken->is(tok::comma))
550 Contexts.back().CanBeExpression = true;
551
552 FormatToken *Tok = CurrentToken;
553 if (!consumeToken())
554 return false;
555 updateParameterCount(&OpeningParen, Tok);
556 if (CurrentToken && CurrentToken->HasUnescapedNewline)
557 HasMultipleLines = true;
558 }
559 return false;
560 }
561
isCSharpAttributeSpecifier(const FormatToken & Tok)562 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
563 if (!Style.isCSharp())
564 return false;
565
566 // `identifier[i]` is not an attribute.
567 if (Tok.Previous && Tok.Previous->is(tok::identifier))
568 return false;
569
570 // Chains of [] in `identifier[i][j][k]` are not attributes.
571 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
572 auto *MatchingParen = Tok.Previous->MatchingParen;
573 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
574 return false;
575 }
576
577 const FormatToken *AttrTok = Tok.Next;
578 if (!AttrTok)
579 return false;
580
581 // Just an empty declaration e.g. string [].
582 if (AttrTok->is(tok::r_square))
583 return false;
584
585 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
586 while (AttrTok && AttrTok->isNot(tok::r_square))
587 AttrTok = AttrTok->Next;
588
589 if (!AttrTok)
590 return false;
591
592 // Allow an attribute to be the only content of a file.
593 AttrTok = AttrTok->Next;
594 if (!AttrTok)
595 return true;
596
597 // Limit this to being an access modifier that follows.
598 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
599 tok::comment, tok::kw_class, tok::kw_static,
600 tok::l_square, Keywords.kw_internal)) {
601 return true;
602 }
603
604 // incase its a [XXX] retval func(....
605 if (AttrTok->Next &&
606 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
607 return true;
608 }
609
610 return false;
611 }
612
parseSquare()613 bool parseSquare() {
614 if (!CurrentToken)
615 return false;
616
617 // A '[' could be an index subscript (after an identifier or after
618 // ')' or ']'), it could be the start of an Objective-C method
619 // expression, it could the start of an Objective-C array literal,
620 // or it could be a C++ attribute specifier [[foo::bar]].
621 FormatToken *Left = CurrentToken->Previous;
622 Left->ParentBracket = Contexts.back().ContextKind;
623 FormatToken *Parent = Left->getPreviousNonComment();
624
625 // Cases where '>' is followed by '['.
626 // In C++, this can happen either in array of templates (foo<int>[10])
627 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
628 bool CppArrayTemplates =
629 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
630 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
631 Contexts.back().ContextType == Context::TemplateArgument);
632
633 const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
634 const bool IsCpp11AttributeSpecifier =
635 isCppAttribute(Style.isCpp(), *Left) || IsInnerSquare;
636
637 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
638 bool IsCSharpAttributeSpecifier =
639 isCSharpAttributeSpecifier(*Left) ||
640 Contexts.back().InCSharpAttributeSpecifier;
641
642 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
643 bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
644 bool StartsObjCMethodExpr =
645 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
646 Style.isCpp() && !IsCpp11AttributeSpecifier &&
647 !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
648 Left->isNot(TT_LambdaLSquare) &&
649 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
650 (!Parent ||
651 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
652 tok::kw_return, tok::kw_throw) ||
653 Parent->isUnaryOperator() ||
654 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
655 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
656 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
657 prec::Unknown));
658 bool ColonFound = false;
659
660 unsigned BindingIncrease = 1;
661 if (IsCppStructuredBinding) {
662 Left->setType(TT_StructuredBindingLSquare);
663 } else if (Left->is(TT_Unknown)) {
664 if (StartsObjCMethodExpr) {
665 Left->setType(TT_ObjCMethodExpr);
666 } else if (InsideInlineASM) {
667 Left->setType(TT_InlineASMSymbolicNameLSquare);
668 } else if (IsCpp11AttributeSpecifier) {
669 Left->setType(TT_AttributeSquare);
670 if (!IsInnerSquare && Left->Previous)
671 Left->Previous->EndsCppAttributeGroup = false;
672 } else if (Style.isJavaScript() && Parent &&
673 Contexts.back().ContextKind == tok::l_brace &&
674 Parent->isOneOf(tok::l_brace, tok::comma)) {
675 Left->setType(TT_JsComputedPropertyName);
676 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
677 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
678 Left->setType(TT_DesignatedInitializerLSquare);
679 } else if (IsCSharpAttributeSpecifier) {
680 Left->setType(TT_AttributeSquare);
681 } else if (CurrentToken->is(tok::r_square) && Parent &&
682 Parent->is(TT_TemplateCloser)) {
683 Left->setType(TT_ArraySubscriptLSquare);
684 } else if (Style.isProto()) {
685 // Square braces in LK_Proto can either be message field attributes:
686 //
687 // optional Aaa aaa = 1 [
688 // (aaa) = aaa
689 // ];
690 //
691 // extensions 123 [
692 // (aaa) = aaa
693 // ];
694 //
695 // or text proto extensions (in options):
696 //
697 // option (Aaa.options) = {
698 // [type.type/type] {
699 // key: value
700 // }
701 // }
702 //
703 // or repeated fields (in options):
704 //
705 // option (Aaa.options) = {
706 // keys: [ 1, 2, 3 ]
707 // }
708 //
709 // In the first and the third case we want to spread the contents inside
710 // the square braces; in the second we want to keep them inline.
711 Left->setType(TT_ArrayInitializerLSquare);
712 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
713 tok::equal) &&
714 !Left->endsSequence(tok::l_square, tok::numeric_constant,
715 tok::identifier) &&
716 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
717 Left->setType(TT_ProtoExtensionLSquare);
718 BindingIncrease = 10;
719 }
720 } else if (!CppArrayTemplates && Parent &&
721 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
722 tok::comma, tok::l_paren, tok::l_square,
723 tok::question, tok::colon, tok::kw_return,
724 // Should only be relevant to JavaScript:
725 tok::kw_default)) {
726 Left->setType(TT_ArrayInitializerLSquare);
727 } else {
728 BindingIncrease = 10;
729 Left->setType(TT_ArraySubscriptLSquare);
730 }
731 }
732
733 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
734 Contexts.back().IsExpression = true;
735 if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
736 Contexts.back().IsExpression = false;
737
738 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
739 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
740 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
741
742 while (CurrentToken) {
743 if (CurrentToken->is(tok::r_square)) {
744 if (IsCpp11AttributeSpecifier) {
745 CurrentToken->setType(TT_AttributeSquare);
746 if (!IsInnerSquare)
747 CurrentToken->EndsCppAttributeGroup = true;
748 }
749 if (IsCSharpAttributeSpecifier) {
750 CurrentToken->setType(TT_AttributeSquare);
751 } else if (((CurrentToken->Next &&
752 CurrentToken->Next->is(tok::l_paren)) ||
753 (CurrentToken->Previous &&
754 CurrentToken->Previous->Previous == Left)) &&
755 Left->is(TT_ObjCMethodExpr)) {
756 // An ObjC method call is rarely followed by an open parenthesis. It
757 // also can't be composed of just one token, unless it's a macro that
758 // will be expanded to more tokens.
759 // FIXME: Do we incorrectly label ":" with this?
760 StartsObjCMethodExpr = false;
761 Left->setType(TT_Unknown);
762 }
763 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
764 CurrentToken->setType(TT_ObjCMethodExpr);
765 // If we haven't seen a colon yet, make sure the last identifier
766 // before the r_square is tagged as a selector name component.
767 if (!ColonFound && CurrentToken->Previous &&
768 CurrentToken->Previous->is(TT_Unknown) &&
769 canBeObjCSelectorComponent(*CurrentToken->Previous)) {
770 CurrentToken->Previous->setType(TT_SelectorName);
771 }
772 // determineStarAmpUsage() thinks that '*' '[' is allocating an
773 // array of pointers, but if '[' starts a selector then '*' is a
774 // binary operator.
775 if (Parent && Parent->is(TT_PointerOrReference))
776 Parent->overwriteFixedType(TT_BinaryOperator);
777 }
778 // An arrow after an ObjC method expression is not a lambda arrow.
779 if (CurrentToken->getType() == TT_ObjCMethodExpr &&
780 CurrentToken->Next &&
781 CurrentToken->Next->is(TT_TrailingReturnArrow)) {
782 CurrentToken->Next->overwriteFixedType(TT_Unknown);
783 }
784 Left->MatchingParen = CurrentToken;
785 CurrentToken->MatchingParen = Left;
786 // FirstObjCSelectorName is set when a colon is found. This does
787 // not work, however, when the method has no parameters.
788 // Here, we set FirstObjCSelectorName when the end of the method call is
789 // reached, in case it was not set already.
790 if (!Contexts.back().FirstObjCSelectorName) {
791 FormatToken *Previous = CurrentToken->getPreviousNonComment();
792 if (Previous && Previous->is(TT_SelectorName)) {
793 Previous->ObjCSelectorNameParts = 1;
794 Contexts.back().FirstObjCSelectorName = Previous;
795 }
796 } else {
797 Left->ParameterCount =
798 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
799 }
800 if (Contexts.back().FirstObjCSelectorName) {
801 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
802 Contexts.back().LongestObjCSelectorName;
803 if (Left->BlockParameterCount > 1)
804 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
805 }
806 next();
807 return true;
808 }
809 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
810 return false;
811 if (CurrentToken->is(tok::colon)) {
812 if (IsCpp11AttributeSpecifier &&
813 CurrentToken->endsSequence(tok::colon, tok::identifier,
814 tok::kw_using)) {
815 // Remember that this is a [[using ns: foo]] C++ attribute, so we
816 // don't add a space before the colon (unlike other colons).
817 CurrentToken->setType(TT_AttributeColon);
818 } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
819 Left->isOneOf(TT_ArraySubscriptLSquare,
820 TT_DesignatedInitializerLSquare)) {
821 Left->setType(TT_ObjCMethodExpr);
822 StartsObjCMethodExpr = true;
823 Contexts.back().ColonIsObjCMethodExpr = true;
824 if (Parent && Parent->is(tok::r_paren)) {
825 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
826 Parent->setType(TT_CastRParen);
827 }
828 }
829 ColonFound = true;
830 }
831 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
832 !ColonFound) {
833 Left->setType(TT_ArrayInitializerLSquare);
834 }
835 FormatToken *Tok = CurrentToken;
836 if (!consumeToken())
837 return false;
838 updateParameterCount(Left, Tok);
839 }
840 return false;
841 }
842
couldBeInStructArrayInitializer() const843 bool couldBeInStructArrayInitializer() const {
844 if (Contexts.size() < 2)
845 return false;
846 // We want to back up no more then 2 context levels i.e.
847 // . { { <-
848 const auto End = std::next(Contexts.rbegin(), 2);
849 auto Last = Contexts.rbegin();
850 unsigned Depth = 0;
851 for (; Last != End; ++Last)
852 if (Last->ContextKind == tok::l_brace)
853 ++Depth;
854 return Depth == 2 && Last->ContextKind != tok::l_brace;
855 }
856
parseBrace()857 bool parseBrace() {
858 if (!CurrentToken)
859 return true;
860
861 assert(CurrentToken->Previous);
862 FormatToken &OpeningBrace = *CurrentToken->Previous;
863 assert(OpeningBrace.is(tok::l_brace));
864 OpeningBrace.ParentBracket = Contexts.back().ContextKind;
865
866 if (Contexts.back().CaretFound)
867 OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
868 Contexts.back().CaretFound = false;
869
870 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
871 Contexts.back().ColonIsDictLiteral = true;
872 if (OpeningBrace.is(BK_BracedInit))
873 Contexts.back().IsExpression = true;
874 if (Style.isJavaScript() && OpeningBrace.Previous &&
875 OpeningBrace.Previous->is(TT_JsTypeColon)) {
876 Contexts.back().IsExpression = false;
877 }
878 if (Style.isVerilog() &&
879 (!OpeningBrace.getPreviousNonComment() ||
880 OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
881 Contexts.back().VerilogMayBeConcatenation = true;
882 }
883
884 unsigned CommaCount = 0;
885 while (CurrentToken) {
886 if (CurrentToken->is(tok::r_brace)) {
887 assert(!Scopes.empty());
888 assert(Scopes.back() == getScopeType(OpeningBrace));
889 Scopes.pop_back();
890 assert(OpeningBrace.Optional == CurrentToken->Optional);
891 OpeningBrace.MatchingParen = CurrentToken;
892 CurrentToken->MatchingParen = &OpeningBrace;
893 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
894 if (OpeningBrace.ParentBracket == tok::l_brace &&
895 couldBeInStructArrayInitializer() && CommaCount > 0) {
896 Contexts.back().ContextType = Context::StructArrayInitializer;
897 }
898 }
899 next();
900 return true;
901 }
902 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
903 return false;
904 updateParameterCount(&OpeningBrace, CurrentToken);
905 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
906 FormatToken *Previous = CurrentToken->getPreviousNonComment();
907 if (Previous->is(TT_JsTypeOptionalQuestion))
908 Previous = Previous->getPreviousNonComment();
909 if ((CurrentToken->is(tok::colon) &&
910 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
911 Style.isProto()) {
912 OpeningBrace.setType(TT_DictLiteral);
913 if (Previous->Tok.getIdentifierInfo() ||
914 Previous->is(tok::string_literal)) {
915 Previous->setType(TT_SelectorName);
916 }
917 }
918 if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown))
919 OpeningBrace.setType(TT_DictLiteral);
920 else if (Style.isJavaScript())
921 OpeningBrace.overwriteFixedType(TT_DictLiteral);
922 }
923 if (CurrentToken->is(tok::comma)) {
924 if (Style.isJavaScript())
925 OpeningBrace.overwriteFixedType(TT_DictLiteral);
926 ++CommaCount;
927 }
928 if (!consumeToken())
929 return false;
930 }
931 return true;
932 }
933
updateParameterCount(FormatToken * Left,FormatToken * Current)934 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
935 // For ObjC methods, the number of parameters is calculated differently as
936 // method declarations have a different structure (the parameters are not
937 // inside a bracket scope).
938 if (Current->is(tok::l_brace) && Current->is(BK_Block))
939 ++Left->BlockParameterCount;
940 if (Current->is(tok::comma)) {
941 ++Left->ParameterCount;
942 if (!Left->Role)
943 Left->Role.reset(new CommaSeparatedList(Style));
944 Left->Role->CommaFound(Current);
945 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
946 Left->ParameterCount = 1;
947 }
948 }
949
parseConditional()950 bool parseConditional() {
951 while (CurrentToken) {
952 if (CurrentToken->is(tok::colon)) {
953 CurrentToken->setType(TT_ConditionalExpr);
954 next();
955 return true;
956 }
957 if (!consumeToken())
958 return false;
959 }
960 return false;
961 }
962
parseTemplateDeclaration()963 bool parseTemplateDeclaration() {
964 if (CurrentToken && CurrentToken->is(tok::less)) {
965 CurrentToken->setType(TT_TemplateOpener);
966 next();
967 if (!parseAngle())
968 return false;
969 if (CurrentToken)
970 CurrentToken->Previous->ClosesTemplateDeclaration = true;
971 return true;
972 }
973 return false;
974 }
975
consumeToken()976 bool consumeToken() {
977 if (Style.isCpp()) {
978 const auto *Prev = CurrentToken->getPreviousNonComment();
979 if (Prev && Prev->is(tok::r_square) && Prev->is(TT_AttributeSquare) &&
980 CurrentToken->isOneOf(tok::kw_if, tok::kw_switch, tok::kw_case,
981 tok::kw_default, tok::kw_for, tok::kw_while) &&
982 mustBreakAfterAttributes(*CurrentToken, Style)) {
983 CurrentToken->MustBreakBefore = true;
984 }
985 }
986 FormatToken *Tok = CurrentToken;
987 next();
988 // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
989 // operators.
990 if (Tok->is(TT_VerilogTableItem))
991 return true;
992 switch (Tok->Tok.getKind()) {
993 case tok::plus:
994 case tok::minus:
995 if (!Tok->Previous && Line.MustBeDeclaration)
996 Tok->setType(TT_ObjCMethodSpecifier);
997 break;
998 case tok::colon:
999 if (!Tok->Previous)
1000 return false;
1001 // Goto labels and case labels are already identified in
1002 // UnwrappedLineParser.
1003 if (Tok->isTypeFinalized())
1004 break;
1005 // Colons from ?: are handled in parseConditional().
1006 if (Style.isJavaScript()) {
1007 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
1008 (Contexts.size() == 1 && // switch/case labels
1009 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
1010 Contexts.back().ContextKind == tok::l_paren || // function params
1011 Contexts.back().ContextKind == tok::l_square || // array type
1012 (!Contexts.back().IsExpression &&
1013 Contexts.back().ContextKind == tok::l_brace) || // object type
1014 (Contexts.size() == 1 &&
1015 Line.MustBeDeclaration)) { // method/property declaration
1016 Contexts.back().IsExpression = false;
1017 Tok->setType(TT_JsTypeColon);
1018 break;
1019 }
1020 } else if (Style.isCSharp()) {
1021 if (Contexts.back().InCSharpAttributeSpecifier) {
1022 Tok->setType(TT_AttributeColon);
1023 break;
1024 }
1025 if (Contexts.back().ContextKind == tok::l_paren) {
1026 Tok->setType(TT_CSharpNamedArgumentColon);
1027 break;
1028 }
1029 } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
1030 // The distribution weight operators are labeled
1031 // TT_BinaryOperator by the lexer.
1032 if (Keywords.isVerilogEnd(*Tok->Previous) ||
1033 Keywords.isVerilogBegin(*Tok->Previous)) {
1034 Tok->setType(TT_VerilogBlockLabelColon);
1035 } else if (Contexts.back().ContextKind == tok::l_square) {
1036 Tok->setType(TT_BitFieldColon);
1037 } else if (Contexts.back().ColonIsDictLiteral) {
1038 Tok->setType(TT_DictLiteral);
1039 } else if (Contexts.size() == 1) {
1040 // In Verilog a case label doesn't have the case keyword. We
1041 // assume a colon following an expression is a case label.
1042 // Colons from ?: are annotated in parseConditional().
1043 Tok->setType(TT_CaseLabelColon);
1044 if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
1045 --Line.Level;
1046 }
1047 break;
1048 }
1049 if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
1050 Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
1051 Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
1052 Tok->setType(TT_ModulePartitionColon);
1053 } else if (Contexts.back().ColonIsDictLiteral || Style.isProto()) {
1054 Tok->setType(TT_DictLiteral);
1055 if (Style.Language == FormatStyle::LK_TextProto) {
1056 if (FormatToken *Previous = Tok->getPreviousNonComment())
1057 Previous->setType(TT_SelectorName);
1058 }
1059 } else if (Contexts.back().ColonIsObjCMethodExpr ||
1060 Line.startsWith(TT_ObjCMethodSpecifier)) {
1061 Tok->setType(TT_ObjCMethodExpr);
1062 const FormatToken *BeforePrevious = Tok->Previous->Previous;
1063 // Ensure we tag all identifiers in method declarations as
1064 // TT_SelectorName.
1065 bool UnknownIdentifierInMethodDeclaration =
1066 Line.startsWith(TT_ObjCMethodSpecifier) &&
1067 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1068 if (!BeforePrevious ||
1069 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1070 !(BeforePrevious->is(TT_CastRParen) ||
1071 (BeforePrevious->is(TT_ObjCMethodExpr) &&
1072 BeforePrevious->is(tok::colon))) ||
1073 BeforePrevious->is(tok::r_square) ||
1074 Contexts.back().LongestObjCSelectorName == 0 ||
1075 UnknownIdentifierInMethodDeclaration) {
1076 Tok->Previous->setType(TT_SelectorName);
1077 if (!Contexts.back().FirstObjCSelectorName) {
1078 Contexts.back().FirstObjCSelectorName = Tok->Previous;
1079 } else if (Tok->Previous->ColumnWidth >
1080 Contexts.back().LongestObjCSelectorName) {
1081 Contexts.back().LongestObjCSelectorName =
1082 Tok->Previous->ColumnWidth;
1083 }
1084 Tok->Previous->ParameterIndex =
1085 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1086 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1087 }
1088 } else if (Contexts.back().ColonIsForRangeExpr) {
1089 Tok->setType(TT_RangeBasedForLoopColon);
1090 } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1091 Tok->setType(TT_GenericSelectionColon);
1092 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1093 Tok->setType(TT_BitFieldColon);
1094 } else if (Contexts.size() == 1 &&
1095 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1096 tok::kw_default)) {
1097 FormatToken *Prev = Tok->getPreviousNonComment();
1098 if (!Prev)
1099 break;
1100 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1101 Prev->ClosesRequiresClause) {
1102 Tok->setType(TT_CtorInitializerColon);
1103 } else if (Prev->is(tok::kw_try)) {
1104 // Member initializer list within function try block.
1105 FormatToken *PrevPrev = Prev->getPreviousNonComment();
1106 if (!PrevPrev)
1107 break;
1108 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1109 Tok->setType(TT_CtorInitializerColon);
1110 } else {
1111 Tok->setType(TT_InheritanceColon);
1112 }
1113 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1114 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1115 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1116 Tok->Next->Next->is(tok::colon)))) {
1117 // This handles a special macro in ObjC code where selectors including
1118 // the colon are passed as macro arguments.
1119 Tok->setType(TT_ObjCMethodExpr);
1120 } else if (Contexts.back().ContextKind == tok::l_paren &&
1121 !Line.InPragmaDirective) {
1122 Tok->setType(TT_InlineASMColon);
1123 }
1124 break;
1125 case tok::pipe:
1126 case tok::amp:
1127 // | and & in declarations/type expressions represent union and
1128 // intersection types, respectively.
1129 if (Style.isJavaScript() && !Contexts.back().IsExpression)
1130 Tok->setType(TT_JsTypeOperator);
1131 break;
1132 case tok::kw_if:
1133 if (CurrentToken &&
1134 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1135 next();
1136 }
1137 [[fallthrough]];
1138 case tok::kw_while:
1139 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1140 next();
1141 if (!parseParens(/*LookForDecls=*/true))
1142 return false;
1143 }
1144 break;
1145 case tok::kw_for:
1146 if (Style.isJavaScript()) {
1147 // x.for and {for: ...}
1148 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1149 (Tok->Next && Tok->Next->is(tok::colon))) {
1150 break;
1151 }
1152 // JS' for await ( ...
1153 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1154 next();
1155 }
1156 if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await))
1157 next();
1158 Contexts.back().ColonIsForRangeExpr = true;
1159 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1160 return false;
1161 next();
1162 if (!parseParens())
1163 return false;
1164 break;
1165 case tok::l_paren:
1166 // When faced with 'operator()()', the kw_operator handler incorrectly
1167 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1168 // the first two parens OverloadedOperators and the second l_paren an
1169 // OverloadedOperatorLParen.
1170 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1171 Tok->Previous->MatchingParen &&
1172 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1173 Tok->Previous->setType(TT_OverloadedOperator);
1174 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1175 Tok->setType(TT_OverloadedOperatorLParen);
1176 }
1177
1178 if (Style.isVerilog()) {
1179 // Identify the parameter list and port list in a module instantiation.
1180 // This is still needed when we already have
1181 // UnwrappedLineParser::parseVerilogHierarchyHeader because that
1182 // function is only responsible for the definition, not the
1183 // instantiation.
1184 auto IsInstancePort = [&]() {
1185 const FormatToken *Prev = Tok->getPreviousNonComment();
1186 const FormatToken *PrevPrev;
1187 // In the following example all 4 left parentheses will be treated as
1188 // 'TT_VerilogInstancePortLParen'.
1189 //
1190 // module_x instance_1(port_1); // Case A.
1191 // module_x #(parameter_1) // Case B.
1192 // instance_2(port_1), // Case C.
1193 // instance_3(port_1); // Case D.
1194 if (!Prev || !(PrevPrev = Prev->getPreviousNonComment()))
1195 return false;
1196 // Case A.
1197 if (Keywords.isVerilogIdentifier(*Prev) &&
1198 Keywords.isVerilogIdentifier(*PrevPrev)) {
1199 return true;
1200 }
1201 // Case B.
1202 if (Prev->is(Keywords.kw_verilogHash) &&
1203 Keywords.isVerilogIdentifier(*PrevPrev)) {
1204 return true;
1205 }
1206 // Case C.
1207 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::r_paren))
1208 return true;
1209 // Case D.
1210 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::comma)) {
1211 const FormatToken *PrevParen = PrevPrev->getPreviousNonComment();
1212 if (PrevParen->is(tok::r_paren) && PrevParen->MatchingParen &&
1213 PrevParen->MatchingParen->is(TT_VerilogInstancePortLParen)) {
1214 return true;
1215 }
1216 }
1217 return false;
1218 };
1219
1220 if (IsInstancePort())
1221 Tok->setFinalizedType(TT_VerilogInstancePortLParen);
1222 }
1223
1224 if (!parseParens())
1225 return false;
1226 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1227 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1228 !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
1229 if (const auto *Previous = Tok->Previous;
1230 !Previous ||
1231 (!Previous->isAttribute() &&
1232 !Previous->isOneOf(TT_RequiresClause, TT_LeadingJavaAnnotation))) {
1233 Line.MightBeFunctionDecl = true;
1234 }
1235 }
1236 break;
1237 case tok::l_square:
1238 if (!parseSquare())
1239 return false;
1240 break;
1241 case tok::l_brace:
1242 if (Style.Language == FormatStyle::LK_TextProto) {
1243 FormatToken *Previous = Tok->getPreviousNonComment();
1244 if (Previous && Previous->getType() != TT_DictLiteral)
1245 Previous->setType(TT_SelectorName);
1246 }
1247 Scopes.push_back(getScopeType(*Tok));
1248 if (!parseBrace())
1249 return false;
1250 break;
1251 case tok::less:
1252 if (parseAngle()) {
1253 Tok->setType(TT_TemplateOpener);
1254 // In TT_Proto, we must distignuish between:
1255 // map<key, value>
1256 // msg < item: data >
1257 // msg: < item: data >
1258 // In TT_TextProto, map<key, value> does not occur.
1259 if (Style.Language == FormatStyle::LK_TextProto ||
1260 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1261 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1262 Tok->setType(TT_DictLiteral);
1263 FormatToken *Previous = Tok->getPreviousNonComment();
1264 if (Previous && Previous->getType() != TT_DictLiteral)
1265 Previous->setType(TT_SelectorName);
1266 }
1267 } else {
1268 Tok->setType(TT_BinaryOperator);
1269 NonTemplateLess.insert(Tok);
1270 CurrentToken = Tok;
1271 next();
1272 }
1273 break;
1274 case tok::r_paren:
1275 case tok::r_square:
1276 return false;
1277 case tok::r_brace:
1278 // Don't pop scope when encountering unbalanced r_brace.
1279 if (!Scopes.empty())
1280 Scopes.pop_back();
1281 // Lines can start with '}'.
1282 if (Tok->Previous)
1283 return false;
1284 break;
1285 case tok::greater:
1286 if (Style.Language != FormatStyle::LK_TextProto)
1287 Tok->setType(TT_BinaryOperator);
1288 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1289 Tok->SpacesRequiredBefore = 1;
1290 break;
1291 case tok::kw_operator:
1292 if (Style.isProto())
1293 break;
1294 while (CurrentToken &&
1295 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1296 if (CurrentToken->isOneOf(tok::star, tok::amp))
1297 CurrentToken->setType(TT_PointerOrReference);
1298 auto Next = CurrentToken->getNextNonComment();
1299 if (!Next)
1300 break;
1301 if (Next->is(tok::less))
1302 next();
1303 else
1304 consumeToken();
1305 if (!CurrentToken)
1306 break;
1307 auto Previous = CurrentToken->getPreviousNonComment();
1308 assert(Previous);
1309 if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator))
1310 break;
1311 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1312 tok::star, tok::arrow, tok::amp, tok::ampamp) ||
1313 // User defined literal.
1314 Previous->TokenText.starts_with("\"\"")) {
1315 Previous->setType(TT_OverloadedOperator);
1316 if (CurrentToken->isOneOf(tok::less, tok::greater))
1317 break;
1318 }
1319 }
1320 if (CurrentToken && CurrentToken->is(tok::l_paren))
1321 CurrentToken->setType(TT_OverloadedOperatorLParen);
1322 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1323 CurrentToken->Previous->setType(TT_OverloadedOperator);
1324 break;
1325 case tok::question:
1326 if (Style.isJavaScript() && Tok->Next &&
1327 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1328 tok::r_brace, tok::r_square)) {
1329 // Question marks before semicolons, colons, etc. indicate optional
1330 // types (fields, parameters), e.g.
1331 // function(x?: string, y?) {...}
1332 // class X { y?; }
1333 Tok->setType(TT_JsTypeOptionalQuestion);
1334 break;
1335 }
1336 // Declarations cannot be conditional expressions, this can only be part
1337 // of a type declaration.
1338 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1339 Style.isJavaScript()) {
1340 break;
1341 }
1342 if (Style.isCSharp()) {
1343 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1344 // nullable types.
1345
1346 // `Type?)`, `Type?>`, `Type? name;`
1347 if (Tok->Next &&
1348 (Tok->Next->startsSequence(tok::question, tok::r_paren) ||
1349 Tok->Next->startsSequence(tok::question, tok::greater) ||
1350 Tok->Next->startsSequence(tok::question, tok::identifier,
1351 tok::semi))) {
1352 Tok->setType(TT_CSharpNullable);
1353 break;
1354 }
1355
1356 // `Type? name =`
1357 if (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1358 Tok->Next->Next->is(tok::equal)) {
1359 Tok->setType(TT_CSharpNullable);
1360 break;
1361 }
1362
1363 // Line.MustBeDeclaration will be true for `Type? name;`.
1364 // But not
1365 // cond ? "A" : "B";
1366 // cond ? id : "B";
1367 // cond ? cond2 ? "A" : "B" : "C";
1368 if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
1369 (!Tok->Next ||
1370 !Tok->Next->isOneOf(tok::identifier, tok::string_literal) ||
1371 !Tok->Next->Next ||
1372 !Tok->Next->Next->isOneOf(tok::colon, tok::question))) {
1373 Tok->setType(TT_CSharpNullable);
1374 break;
1375 }
1376 }
1377 parseConditional();
1378 break;
1379 case tok::kw_template:
1380 parseTemplateDeclaration();
1381 break;
1382 case tok::comma:
1383 switch (Contexts.back().ContextType) {
1384 case Context::CtorInitializer:
1385 Tok->setType(TT_CtorInitializerComma);
1386 break;
1387 case Context::InheritanceList:
1388 Tok->setType(TT_InheritanceComma);
1389 break;
1390 case Context::VerilogInstancePortList:
1391 Tok->setFinalizedType(TT_VerilogInstancePortComma);
1392 break;
1393 default:
1394 if (Style.isVerilog() && Contexts.size() == 1 &&
1395 Line.startsWith(Keywords.kw_assign)) {
1396 Tok->setFinalizedType(TT_VerilogAssignComma);
1397 } else if (Contexts.back().FirstStartOfName &&
1398 (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1399 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1400 Line.IsMultiVariableDeclStmt = true;
1401 }
1402 break;
1403 }
1404 if (Contexts.back().ContextType == Context::ForEachMacro)
1405 Contexts.back().IsExpression = true;
1406 break;
1407 case tok::kw_default:
1408 // Unindent case labels.
1409 if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1410 (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1411 --Line.Level;
1412 }
1413 break;
1414 case tok::identifier:
1415 if (Tok->isOneOf(Keywords.kw___has_include,
1416 Keywords.kw___has_include_next)) {
1417 parseHasInclude();
1418 }
1419 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1420 Tok->Next->isNot(tok::l_paren)) {
1421 Tok->setType(TT_CSharpGenericTypeConstraint);
1422 parseCSharpGenericTypeConstraint();
1423 if (!Tok->getPreviousNonComment())
1424 Line.IsContinuation = true;
1425 }
1426 break;
1427 case tok::arrow:
1428 if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept))
1429 Tok->setType(TT_TrailingReturnArrow);
1430 break;
1431 default:
1432 break;
1433 }
1434 return true;
1435 }
1436
parseCSharpGenericTypeConstraint()1437 void parseCSharpGenericTypeConstraint() {
1438 int OpenAngleBracketsCount = 0;
1439 while (CurrentToken) {
1440 if (CurrentToken->is(tok::less)) {
1441 // parseAngle is too greedy and will consume the whole line.
1442 CurrentToken->setType(TT_TemplateOpener);
1443 ++OpenAngleBracketsCount;
1444 next();
1445 } else if (CurrentToken->is(tok::greater)) {
1446 CurrentToken->setType(TT_TemplateCloser);
1447 --OpenAngleBracketsCount;
1448 next();
1449 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1450 // We allow line breaks after GenericTypeConstraintComma's
1451 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1452 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1453 next();
1454 } else if (CurrentToken->is(Keywords.kw_where)) {
1455 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1456 next();
1457 } else if (CurrentToken->is(tok::colon)) {
1458 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1459 next();
1460 } else {
1461 next();
1462 }
1463 }
1464 }
1465
parseIncludeDirective()1466 void parseIncludeDirective() {
1467 if (CurrentToken && CurrentToken->is(tok::less)) {
1468 next();
1469 while (CurrentToken) {
1470 // Mark tokens up to the trailing line comments as implicit string
1471 // literals.
1472 if (CurrentToken->isNot(tok::comment) &&
1473 !CurrentToken->TokenText.starts_with("//")) {
1474 CurrentToken->setType(TT_ImplicitStringLiteral);
1475 }
1476 next();
1477 }
1478 }
1479 }
1480
parseWarningOrError()1481 void parseWarningOrError() {
1482 next();
1483 // We still want to format the whitespace left of the first token of the
1484 // warning or error.
1485 next();
1486 while (CurrentToken) {
1487 CurrentToken->setType(TT_ImplicitStringLiteral);
1488 next();
1489 }
1490 }
1491
parsePragma()1492 void parsePragma() {
1493 next(); // Consume "pragma".
1494 if (CurrentToken &&
1495 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1496 Keywords.kw_region)) {
1497 bool IsMarkOrRegion =
1498 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1499 next();
1500 next(); // Consume first token (so we fix leading whitespace).
1501 while (CurrentToken) {
1502 if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1503 CurrentToken->setType(TT_ImplicitStringLiteral);
1504 next();
1505 }
1506 }
1507 }
1508
parseHasInclude()1509 void parseHasInclude() {
1510 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1511 return;
1512 next(); // '('
1513 parseIncludeDirective();
1514 next(); // ')'
1515 }
1516
parsePreprocessorDirective()1517 LineType parsePreprocessorDirective() {
1518 bool IsFirstToken = CurrentToken->IsFirst;
1519 LineType Type = LT_PreprocessorDirective;
1520 next();
1521 if (!CurrentToken)
1522 return Type;
1523
1524 if (Style.isJavaScript() && IsFirstToken) {
1525 // JavaScript files can contain shebang lines of the form:
1526 // #!/usr/bin/env node
1527 // Treat these like C++ #include directives.
1528 while (CurrentToken) {
1529 // Tokens cannot be comments here.
1530 CurrentToken->setType(TT_ImplicitStringLiteral);
1531 next();
1532 }
1533 return LT_ImportStatement;
1534 }
1535
1536 if (CurrentToken->is(tok::numeric_constant)) {
1537 CurrentToken->SpacesRequiredBefore = 1;
1538 return Type;
1539 }
1540 // Hashes in the middle of a line can lead to any strange token
1541 // sequence.
1542 if (!CurrentToken->Tok.getIdentifierInfo())
1543 return Type;
1544 // In Verilog macro expansions start with a backtick just like preprocessor
1545 // directives. Thus we stop if the word is not a preprocessor directive.
1546 if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1547 return LT_Invalid;
1548 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1549 case tok::pp_include:
1550 case tok::pp_include_next:
1551 case tok::pp_import:
1552 next();
1553 parseIncludeDirective();
1554 Type = LT_ImportStatement;
1555 break;
1556 case tok::pp_error:
1557 case tok::pp_warning:
1558 parseWarningOrError();
1559 break;
1560 case tok::pp_pragma:
1561 parsePragma();
1562 break;
1563 case tok::pp_if:
1564 case tok::pp_elif:
1565 Contexts.back().IsExpression = true;
1566 next();
1567 parseLine();
1568 break;
1569 default:
1570 break;
1571 }
1572 while (CurrentToken) {
1573 FormatToken *Tok = CurrentToken;
1574 next();
1575 if (Tok->is(tok::l_paren)) {
1576 parseParens();
1577 } else if (Tok->isOneOf(Keywords.kw___has_include,
1578 Keywords.kw___has_include_next)) {
1579 parseHasInclude();
1580 }
1581 }
1582 return Type;
1583 }
1584
1585 public:
parseLine()1586 LineType parseLine() {
1587 if (!CurrentToken)
1588 return LT_Invalid;
1589 NonTemplateLess.clear();
1590 if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1591 // We were not yet allowed to use C++17 optional when this was being
1592 // written. So we used LT_Invalid to mark that the line is not a
1593 // preprocessor directive.
1594 auto Type = parsePreprocessorDirective();
1595 if (Type != LT_Invalid)
1596 return Type;
1597 }
1598
1599 // Directly allow to 'import <string-literal>' to support protocol buffer
1600 // definitions (github.com/google/protobuf) or missing "#" (either way we
1601 // should not break the line).
1602 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1603 if ((Style.Language == FormatStyle::LK_Java &&
1604 CurrentToken->is(Keywords.kw_package)) ||
1605 (!Style.isVerilog() && Info &&
1606 Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1607 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1608 tok::kw_static))) {
1609 next();
1610 parseIncludeDirective();
1611 return LT_ImportStatement;
1612 }
1613
1614 // If this line starts and ends in '<' and '>', respectively, it is likely
1615 // part of "#define <a/b.h>".
1616 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1617 parseIncludeDirective();
1618 return LT_ImportStatement;
1619 }
1620
1621 // In .proto files, top-level options and package statements are very
1622 // similar to import statements and should not be line-wrapped.
1623 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1624 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1625 next();
1626 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1627 while (CurrentToken)
1628 next();
1629 return LT_ImportStatement;
1630 }
1631 }
1632
1633 bool KeywordVirtualFound = false;
1634 bool ImportStatement = false;
1635
1636 // import {...} from '...';
1637 if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1638 ImportStatement = true;
1639
1640 while (CurrentToken) {
1641 if (CurrentToken->is(tok::kw_virtual))
1642 KeywordVirtualFound = true;
1643 if (Style.isJavaScript()) {
1644 // export {...} from '...';
1645 // An export followed by "from 'some string';" is a re-export from
1646 // another module identified by a URI and is treated as a
1647 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1648 // Just "export {...};" or "export class ..." should not be treated as
1649 // an import in this sense.
1650 if (Line.First->is(tok::kw_export) &&
1651 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1652 CurrentToken->Next->isStringLiteral()) {
1653 ImportStatement = true;
1654 }
1655 if (isClosureImportStatement(*CurrentToken))
1656 ImportStatement = true;
1657 }
1658 if (!consumeToken())
1659 return LT_Invalid;
1660 }
1661 if (KeywordVirtualFound)
1662 return LT_VirtualFunctionDecl;
1663 if (ImportStatement)
1664 return LT_ImportStatement;
1665
1666 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1667 if (Contexts.back().FirstObjCSelectorName) {
1668 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1669 Contexts.back().LongestObjCSelectorName;
1670 }
1671 return LT_ObjCMethodDecl;
1672 }
1673
1674 for (const auto &ctx : Contexts)
1675 if (ctx.ContextType == Context::StructArrayInitializer)
1676 return LT_ArrayOfStructInitializer;
1677
1678 return LT_Other;
1679 }
1680
1681 private:
isClosureImportStatement(const FormatToken & Tok)1682 bool isClosureImportStatement(const FormatToken &Tok) {
1683 // FIXME: Closure-library specific stuff should not be hard-coded but be
1684 // configurable.
1685 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1686 Tok.Next->Next &&
1687 (Tok.Next->Next->TokenText == "module" ||
1688 Tok.Next->Next->TokenText == "provide" ||
1689 Tok.Next->Next->TokenText == "require" ||
1690 Tok.Next->Next->TokenText == "requireType" ||
1691 Tok.Next->Next->TokenText == "forwardDeclare") &&
1692 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1693 }
1694
resetTokenMetadata()1695 void resetTokenMetadata() {
1696 if (!CurrentToken)
1697 return;
1698
1699 // Reset token type in case we have already looked at it and then
1700 // recovered from an error (e.g. failure to find the matching >).
1701 if (!CurrentToken->isTypeFinalized() &&
1702 !CurrentToken->isOneOf(
1703 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
1704 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
1705 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
1706 TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral,
1707 TT_TemplateString, TT_ObjCStringLiteral, TT_UntouchableMacroFunc,
1708 TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro,
1709 TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace, TT_StructLBrace,
1710 TT_UnionLBrace, TT_RequiresClause,
1711 TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
1712 TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
1713 TT_BracedListLBrace)) {
1714 CurrentToken->setType(TT_Unknown);
1715 }
1716 CurrentToken->Role.reset();
1717 CurrentToken->MatchingParen = nullptr;
1718 CurrentToken->FakeLParens.clear();
1719 CurrentToken->FakeRParens = 0;
1720 }
1721
next()1722 void next() {
1723 if (!CurrentToken)
1724 return;
1725
1726 CurrentToken->NestingLevel = Contexts.size() - 1;
1727 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1728 modifyContext(*CurrentToken);
1729 determineTokenType(*CurrentToken);
1730 CurrentToken = CurrentToken->Next;
1731
1732 resetTokenMetadata();
1733 }
1734
1735 /// A struct to hold information valid in a specific context, e.g.
1736 /// a pair of parenthesis.
1737 struct Context {
Contextclang::format::__anonf599c8510111::AnnotatingParser::Context1738 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1739 bool IsExpression)
1740 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1741 IsExpression(IsExpression) {}
1742
1743 tok::TokenKind ContextKind;
1744 unsigned BindingStrength;
1745 bool IsExpression;
1746 unsigned LongestObjCSelectorName = 0;
1747 bool ColonIsForRangeExpr = false;
1748 bool ColonIsDictLiteral = false;
1749 bool ColonIsObjCMethodExpr = false;
1750 FormatToken *FirstObjCSelectorName = nullptr;
1751 FormatToken *FirstStartOfName = nullptr;
1752 bool CanBeExpression = true;
1753 bool CaretFound = false;
1754 bool InCpp11AttributeSpecifier = false;
1755 bool InCSharpAttributeSpecifier = false;
1756 bool VerilogAssignmentFound = false;
1757 // Whether the braces may mean concatenation instead of structure or array
1758 // literal.
1759 bool VerilogMayBeConcatenation = false;
1760 enum {
1761 Unknown,
1762 // Like the part after `:` in a constructor.
1763 // Context(...) : IsExpression(IsExpression)
1764 CtorInitializer,
1765 // Like in the parentheses in a foreach.
1766 ForEachMacro,
1767 // Like the inheritance list in a class declaration.
1768 // class Input : public IO
1769 InheritanceList,
1770 // Like in the braced list.
1771 // int x[] = {};
1772 StructArrayInitializer,
1773 // Like in `static_cast<int>`.
1774 TemplateArgument,
1775 // C11 _Generic selection.
1776 C11GenericSelection,
1777 // Like in the outer parentheses in `ffnand ff1(.q());`.
1778 VerilogInstancePortList,
1779 } ContextType = Unknown;
1780 };
1781
1782 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1783 /// of each instance.
1784 struct ScopedContextCreator {
1785 AnnotatingParser &P;
1786
ScopedContextCreatorclang::format::__anonf599c8510111::AnnotatingParser::ScopedContextCreator1787 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1788 unsigned Increase)
1789 : P(P) {
1790 P.Contexts.push_back(Context(ContextKind,
1791 P.Contexts.back().BindingStrength + Increase,
1792 P.Contexts.back().IsExpression));
1793 }
1794
~ScopedContextCreatorclang::format::__anonf599c8510111::AnnotatingParser::ScopedContextCreator1795 ~ScopedContextCreator() {
1796 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1797 if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
1798 P.Contexts.pop_back();
1799 P.Contexts.back().ContextType = Context::StructArrayInitializer;
1800 return;
1801 }
1802 }
1803 P.Contexts.pop_back();
1804 }
1805 };
1806
modifyContext(const FormatToken & Current)1807 void modifyContext(const FormatToken &Current) {
1808 auto AssignmentStartsExpression = [&]() {
1809 if (Current.getPrecedence() != prec::Assignment)
1810 return false;
1811
1812 if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
1813 return false;
1814 if (Line.First->is(tok::kw_template)) {
1815 assert(Current.Previous);
1816 if (Current.Previous->is(tok::kw_operator)) {
1817 // `template ... operator=` cannot be an expression.
1818 return false;
1819 }
1820
1821 // `template` keyword can start a variable template.
1822 const FormatToken *Tok = Line.First->getNextNonComment();
1823 assert(Tok); // Current token is on the same line.
1824 if (Tok->isNot(TT_TemplateOpener)) {
1825 // Explicit template instantiations do not have `<>`.
1826 return false;
1827 }
1828
1829 // This is the default value of a template parameter, determine if it's
1830 // type or non-type.
1831 if (Contexts.back().ContextKind == tok::less) {
1832 assert(Current.Previous->Previous);
1833 return !Current.Previous->Previous->isOneOf(tok::kw_typename,
1834 tok::kw_class);
1835 }
1836
1837 Tok = Tok->MatchingParen;
1838 if (!Tok)
1839 return false;
1840 Tok = Tok->getNextNonComment();
1841 if (!Tok)
1842 return false;
1843
1844 if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
1845 tok::kw_using)) {
1846 return false;
1847 }
1848
1849 return true;
1850 }
1851
1852 // Type aliases use `type X = ...;` in TypeScript and can be exported
1853 // using `export type ...`.
1854 if (Style.isJavaScript() &&
1855 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1856 Line.startsWith(tok::kw_export, Keywords.kw_type,
1857 tok::identifier))) {
1858 return false;
1859 }
1860
1861 return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
1862 };
1863
1864 if (AssignmentStartsExpression()) {
1865 Contexts.back().IsExpression = true;
1866 if (!Line.startsWith(TT_UnaryOperator)) {
1867 for (FormatToken *Previous = Current.Previous;
1868 Previous && Previous->Previous &&
1869 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1870 Previous = Previous->Previous) {
1871 if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
1872 Previous = Previous->MatchingParen;
1873 if (!Previous)
1874 break;
1875 }
1876 if (Previous->opensScope())
1877 break;
1878 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1879 Previous->isPointerOrReference() && Previous->Previous &&
1880 Previous->Previous->isNot(tok::equal)) {
1881 Previous->setType(TT_PointerOrReference);
1882 }
1883 }
1884 }
1885 } else if (Current.is(tok::lessless) &&
1886 (!Current.Previous ||
1887 Current.Previous->isNot(tok::kw_operator))) {
1888 Contexts.back().IsExpression = true;
1889 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1890 Contexts.back().IsExpression = true;
1891 } else if (Current.is(TT_TrailingReturnArrow)) {
1892 Contexts.back().IsExpression = false;
1893 } else if (Current.is(Keywords.kw_assert)) {
1894 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1895 } else if (Current.Previous &&
1896 Current.Previous->is(TT_CtorInitializerColon)) {
1897 Contexts.back().IsExpression = true;
1898 Contexts.back().ContextType = Context::CtorInitializer;
1899 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1900 Contexts.back().ContextType = Context::InheritanceList;
1901 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1902 for (FormatToken *Previous = Current.Previous;
1903 Previous && Previous->isOneOf(tok::star, tok::amp);
1904 Previous = Previous->Previous) {
1905 Previous->setType(TT_PointerOrReference);
1906 }
1907 if (Line.MustBeDeclaration &&
1908 Contexts.front().ContextType != Context::CtorInitializer) {
1909 Contexts.back().IsExpression = false;
1910 }
1911 } else if (Current.is(tok::kw_new)) {
1912 Contexts.back().CanBeExpression = false;
1913 } else if (Current.is(tok::semi) ||
1914 (Current.is(tok::exclaim) && Current.Previous &&
1915 Current.Previous->isNot(tok::kw_operator))) {
1916 // This should be the condition or increment in a for-loop.
1917 // But not operator !() (can't use TT_OverloadedOperator here as its not
1918 // been annotated yet).
1919 Contexts.back().IsExpression = true;
1920 }
1921 }
1922
untilMatchingParen(FormatToken * Current)1923 static FormatToken *untilMatchingParen(FormatToken *Current) {
1924 // Used when `MatchingParen` is not yet established.
1925 int ParenLevel = 0;
1926 while (Current) {
1927 if (Current->is(tok::l_paren))
1928 ++ParenLevel;
1929 if (Current->is(tok::r_paren))
1930 --ParenLevel;
1931 if (ParenLevel < 1)
1932 break;
1933 Current = Current->Next;
1934 }
1935 return Current;
1936 }
1937
isDeductionGuide(FormatToken & Current)1938 static bool isDeductionGuide(FormatToken &Current) {
1939 // Look for a deduction guide template<T> A(...) -> A<...>;
1940 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1941 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1942 // Find the TemplateCloser.
1943 FormatToken *TemplateCloser = Current.Next->Next;
1944 int NestingLevel = 0;
1945 while (TemplateCloser) {
1946 // Skip over an expressions in parens A<(3 < 2)>;
1947 if (TemplateCloser->is(tok::l_paren)) {
1948 // No Matching Paren yet so skip to matching paren
1949 TemplateCloser = untilMatchingParen(TemplateCloser);
1950 if (!TemplateCloser)
1951 break;
1952 }
1953 if (TemplateCloser->is(tok::less))
1954 ++NestingLevel;
1955 if (TemplateCloser->is(tok::greater))
1956 --NestingLevel;
1957 if (NestingLevel < 1)
1958 break;
1959 TemplateCloser = TemplateCloser->Next;
1960 }
1961 // Assuming we have found the end of the template ensure its followed
1962 // with a semi-colon.
1963 if (TemplateCloser && TemplateCloser->Next &&
1964 TemplateCloser->Next->is(tok::semi) &&
1965 Current.Previous->MatchingParen) {
1966 // Determine if the identifier `A` prior to the A<..>; is the same as
1967 // prior to the A(..)
1968 FormatToken *LeadingIdentifier =
1969 Current.Previous->MatchingParen->Previous;
1970
1971 return LeadingIdentifier &&
1972 LeadingIdentifier->TokenText == Current.Next->TokenText;
1973 }
1974 }
1975 return false;
1976 }
1977
determineTokenType(FormatToken & Current)1978 void determineTokenType(FormatToken &Current) {
1979 if (Current.isNot(TT_Unknown)) {
1980 // The token type is already known.
1981 return;
1982 }
1983
1984 if ((Style.isJavaScript() || Style.isCSharp()) &&
1985 Current.is(tok::exclaim)) {
1986 if (Current.Previous) {
1987 bool IsIdentifier =
1988 Style.isJavaScript()
1989 ? Keywords.IsJavaScriptIdentifier(
1990 *Current.Previous, /* AcceptIdentifierName= */ true)
1991 : Current.Previous->is(tok::identifier);
1992 if (IsIdentifier ||
1993 Current.Previous->isOneOf(
1994 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
1995 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
1996 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
1997 Current.Previous->Tok.isLiteral()) {
1998 Current.setType(TT_NonNullAssertion);
1999 return;
2000 }
2001 }
2002 if (Current.Next &&
2003 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
2004 Current.setType(TT_NonNullAssertion);
2005 return;
2006 }
2007 }
2008
2009 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2010 // function declaration have been found. In this case, 'Current' is a
2011 // trailing token of this declaration and thus cannot be a name.
2012 if (Current.is(Keywords.kw_instanceof)) {
2013 Current.setType(TT_BinaryOperator);
2014 } else if (isStartOfName(Current) &&
2015 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
2016 Contexts.back().FirstStartOfName = &Current;
2017 Current.setType(TT_StartOfName);
2018 } else if (Current.is(tok::semi)) {
2019 // Reset FirstStartOfName after finding a semicolon so that a for loop
2020 // with multiple increment statements is not confused with a for loop
2021 // having multiple variable declarations.
2022 Contexts.back().FirstStartOfName = nullptr;
2023 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
2024 AutoFound = true;
2025 } else if (Current.is(tok::arrow) &&
2026 Style.Language == FormatStyle::LK_Java) {
2027 Current.setType(TT_TrailingReturnArrow);
2028 } else if (Current.is(tok::arrow) && Style.isVerilog()) {
2029 // The implication operator.
2030 Current.setType(TT_BinaryOperator);
2031 } else if (Current.is(tok::arrow) && AutoFound &&
2032 Line.MightBeFunctionDecl && Current.NestingLevel == 0 &&
2033 !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
2034 // not auto operator->() -> xxx;
2035 Current.setType(TT_TrailingReturnArrow);
2036 } else if (Current.is(tok::arrow) && Current.Previous &&
2037 Current.Previous->is(tok::r_brace)) {
2038 // Concept implicit conversion constraint needs to be treated like
2039 // a trailing return type ... } -> <type>.
2040 Current.setType(TT_TrailingReturnArrow);
2041 } else if (isDeductionGuide(Current)) {
2042 // Deduction guides trailing arrow " A(...) -> A<T>;".
2043 Current.setType(TT_TrailingReturnArrow);
2044 } else if (Current.isPointerOrReference()) {
2045 Current.setType(determineStarAmpUsage(
2046 Current,
2047 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
2048 Contexts.back().ContextType == Context::TemplateArgument));
2049 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
2050 (Style.isVerilog() && Current.is(tok::pipe))) {
2051 Current.setType(determinePlusMinusCaretUsage(Current));
2052 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
2053 Contexts.back().CaretFound = true;
2054 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
2055 Current.setType(determineIncrementUsage(Current));
2056 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
2057 Current.setType(TT_UnaryOperator);
2058 } else if (Current.is(tok::question)) {
2059 if (Style.isJavaScript() && Line.MustBeDeclaration &&
2060 !Contexts.back().IsExpression) {
2061 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
2062 // on the interface, not a ternary expression.
2063 Current.setType(TT_JsTypeOptionalQuestion);
2064 } else {
2065 Current.setType(TT_ConditionalExpr);
2066 }
2067 } else if (Current.isBinaryOperator() &&
2068 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
2069 (Current.isNot(tok::greater) &&
2070 Style.Language != FormatStyle::LK_TextProto)) {
2071 if (Style.isVerilog()) {
2072 if (Current.is(tok::lessequal) && Contexts.size() == 1 &&
2073 !Contexts.back().VerilogAssignmentFound) {
2074 // In Verilog `<=` is assignment if in its own statement. It is a
2075 // statement instead of an expression, that is it can not be chained.
2076 Current.ForcedPrecedence = prec::Assignment;
2077 Current.setFinalizedType(TT_BinaryOperator);
2078 }
2079 if (Current.getPrecedence() == prec::Assignment)
2080 Contexts.back().VerilogAssignmentFound = true;
2081 }
2082 Current.setType(TT_BinaryOperator);
2083 } else if (Current.is(tok::comment)) {
2084 if (Current.TokenText.starts_with("/*")) {
2085 if (Current.TokenText.ends_with("*/")) {
2086 Current.setType(TT_BlockComment);
2087 } else {
2088 // The lexer has for some reason determined a comment here. But we
2089 // cannot really handle it, if it isn't properly terminated.
2090 Current.Tok.setKind(tok::unknown);
2091 }
2092 } else {
2093 Current.setType(TT_LineComment);
2094 }
2095 } else if (Current.is(tok::string_literal)) {
2096 if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
2097 Current.getPreviousNonComment() &&
2098 Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
2099 Current.getNextNonComment() &&
2100 Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
2101 Current.setType(TT_StringInConcatenation);
2102 }
2103 } else if (Current.is(tok::l_paren)) {
2104 if (lParenStartsCppCast(Current))
2105 Current.setType(TT_CppCastLParen);
2106 } else if (Current.is(tok::r_paren)) {
2107 if (rParenEndsCast(Current))
2108 Current.setType(TT_CastRParen);
2109 if (Current.MatchingParen && Current.Next &&
2110 !Current.Next->isBinaryOperator() &&
2111 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
2112 tok::comma, tok::period, tok::arrow,
2113 tok::coloncolon, tok::kw_noexcept)) {
2114 if (FormatToken *AfterParen = Current.MatchingParen->Next;
2115 AfterParen && AfterParen->isNot(tok::caret)) {
2116 // Make sure this isn't the return type of an Obj-C block declaration.
2117 if (FormatToken *BeforeParen = Current.MatchingParen->Previous;
2118 BeforeParen && BeforeParen->is(tok::identifier) &&
2119 BeforeParen->isNot(TT_TypenameMacro) &&
2120 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
2121 (!BeforeParen->Previous ||
2122 BeforeParen->Previous->ClosesTemplateDeclaration ||
2123 BeforeParen->Previous->ClosesRequiresClause)) {
2124 Current.setType(TT_FunctionAnnotationRParen);
2125 }
2126 }
2127 }
2128 } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
2129 Style.Language != FormatStyle::LK_Java) {
2130 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
2131 // marks declarations and properties that need special formatting.
2132 switch (Current.Next->Tok.getObjCKeywordID()) {
2133 case tok::objc_interface:
2134 case tok::objc_implementation:
2135 case tok::objc_protocol:
2136 Current.setType(TT_ObjCDecl);
2137 break;
2138 case tok::objc_property:
2139 Current.setType(TT_ObjCProperty);
2140 break;
2141 default:
2142 break;
2143 }
2144 } else if (Current.is(tok::period)) {
2145 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
2146 if (PreviousNoComment &&
2147 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
2148 Current.setType(TT_DesignatedInitializerPeriod);
2149 } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
2150 Current.Previous->isOneOf(TT_JavaAnnotation,
2151 TT_LeadingJavaAnnotation)) {
2152 Current.setType(Current.Previous->getType());
2153 }
2154 } else if (canBeObjCSelectorComponent(Current) &&
2155 // FIXME(bug 36976): ObjC return types shouldn't use
2156 // TT_CastRParen.
2157 Current.Previous && Current.Previous->is(TT_CastRParen) &&
2158 Current.Previous->MatchingParen &&
2159 Current.Previous->MatchingParen->Previous &&
2160 Current.Previous->MatchingParen->Previous->is(
2161 TT_ObjCMethodSpecifier)) {
2162 // This is the first part of an Objective-C selector name. (If there's no
2163 // colon after this, this is the only place which annotates the identifier
2164 // as a selector.)
2165 Current.setType(TT_SelectorName);
2166 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2167 tok::kw_requires) &&
2168 Current.Previous &&
2169 !Current.Previous->isOneOf(tok::equal, tok::at,
2170 TT_CtorInitializerComma,
2171 TT_CtorInitializerColon) &&
2172 Line.MightBeFunctionDecl && Contexts.size() == 1) {
2173 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2174 // function declaration have been found.
2175 Current.setType(TT_TrailingAnnotation);
2176 } else if ((Style.Language == FormatStyle::LK_Java ||
2177 Style.isJavaScript()) &&
2178 Current.Previous) {
2179 if (Current.Previous->is(tok::at) &&
2180 Current.isNot(Keywords.kw_interface)) {
2181 const FormatToken &AtToken = *Current.Previous;
2182 const FormatToken *Previous = AtToken.getPreviousNonComment();
2183 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2184 Current.setType(TT_LeadingJavaAnnotation);
2185 else
2186 Current.setType(TT_JavaAnnotation);
2187 } else if (Current.Previous->is(tok::period) &&
2188 Current.Previous->isOneOf(TT_JavaAnnotation,
2189 TT_LeadingJavaAnnotation)) {
2190 Current.setType(Current.Previous->getType());
2191 }
2192 }
2193 }
2194
2195 /// Take a guess at whether \p Tok starts a name of a function or
2196 /// variable declaration.
2197 ///
2198 /// This is a heuristic based on whether \p Tok is an identifier following
2199 /// something that is likely a type.
isStartOfName(const FormatToken & Tok)2200 bool isStartOfName(const FormatToken &Tok) {
2201 // Handled in ExpressionParser for Verilog.
2202 if (Style.isVerilog())
2203 return false;
2204
2205 if (Tok.isNot(tok::identifier) || !Tok.Previous)
2206 return false;
2207
2208 if (const auto *NextNonComment = Tok.getNextNonComment();
2209 (!NextNonComment && !Line.InMacroBody) ||
2210 (NextNonComment &&
2211 (NextNonComment->isPointerOrReference() ||
2212 NextNonComment->is(tok::string_literal) ||
2213 (Line.InPragmaDirective && NextNonComment->is(tok::identifier))))) {
2214 return false;
2215 }
2216
2217 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2218 Keywords.kw_as)) {
2219 return false;
2220 }
2221 if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2222 return false;
2223
2224 // Skip "const" as it does not have an influence on whether this is a name.
2225 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2226
2227 // For javascript const can be like "let" or "var"
2228 if (!Style.isJavaScript())
2229 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2230 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2231
2232 if (!PreviousNotConst)
2233 return false;
2234
2235 if (PreviousNotConst->ClosesRequiresClause)
2236 return false;
2237
2238 if (Style.isTableGen()) {
2239 // keywords such as let and def* defines names.
2240 if (Keywords.isTableGenDefinition(*PreviousNotConst))
2241 return true;
2242 }
2243
2244 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2245 PreviousNotConst->Previous &&
2246 PreviousNotConst->Previous->is(tok::hash);
2247
2248 if (PreviousNotConst->is(TT_TemplateCloser)) {
2249 return PreviousNotConst && PreviousNotConst->MatchingParen &&
2250 PreviousNotConst->MatchingParen->Previous &&
2251 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2252 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2253 }
2254
2255 if ((PreviousNotConst->is(tok::r_paren) &&
2256 PreviousNotConst->is(TT_TypeDeclarationParen)) ||
2257 PreviousNotConst->is(TT_AttributeRParen)) {
2258 return true;
2259 }
2260
2261 // If is a preprocess keyword like #define.
2262 if (IsPPKeyword)
2263 return false;
2264
2265 // int a or auto a.
2266 if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto))
2267 return true;
2268
2269 // *a or &a or &&a.
2270 if (PreviousNotConst->is(TT_PointerOrReference))
2271 return true;
2272
2273 // MyClass a;
2274 if (PreviousNotConst->isSimpleTypeSpecifier())
2275 return true;
2276
2277 // type[] a in Java
2278 if (Style.Language == FormatStyle::LK_Java &&
2279 PreviousNotConst->is(tok::r_square)) {
2280 return true;
2281 }
2282
2283 // const a = in JavaScript.
2284 return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2285 }
2286
2287 /// Determine whether '(' is starting a C++ cast.
lParenStartsCppCast(const FormatToken & Tok)2288 bool lParenStartsCppCast(const FormatToken &Tok) {
2289 // C-style casts are only used in C++.
2290 if (!Style.isCpp())
2291 return false;
2292
2293 FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2294 if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2295 LeftOfParens->MatchingParen) {
2296 auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2297 if (Prev &&
2298 Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2299 tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2300 // FIXME: Maybe we should handle identifiers ending with "_cast",
2301 // e.g. any_cast?
2302 return true;
2303 }
2304 }
2305 return false;
2306 }
2307
2308 /// Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)2309 bool rParenEndsCast(const FormatToken &Tok) {
2310 // C-style casts are only used in C++, C# and Java.
2311 if (!Style.isCSharp() && !Style.isCpp() &&
2312 Style.Language != FormatStyle::LK_Java) {
2313 return false;
2314 }
2315
2316 // Empty parens aren't casts and there are no casts at the end of the line.
2317 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
2318 return false;
2319
2320 if (Tok.MatchingParen->is(TT_OverloadedOperatorLParen))
2321 return false;
2322
2323 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
2324 if (LeftOfParens) {
2325 // If there is a closing parenthesis left of the current
2326 // parentheses, look past it as these might be chained casts.
2327 if (LeftOfParens->is(tok::r_paren) &&
2328 LeftOfParens->isNot(TT_CastRParen)) {
2329 if (!LeftOfParens->MatchingParen ||
2330 !LeftOfParens->MatchingParen->Previous) {
2331 return false;
2332 }
2333 LeftOfParens = LeftOfParens->MatchingParen->Previous;
2334 }
2335
2336 if (LeftOfParens->is(tok::r_square)) {
2337 // delete[] (void *)ptr;
2338 auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2339 if (Tok->isNot(tok::r_square))
2340 return nullptr;
2341
2342 Tok = Tok->getPreviousNonComment();
2343 if (!Tok || Tok->isNot(tok::l_square))
2344 return nullptr;
2345
2346 Tok = Tok->getPreviousNonComment();
2347 if (!Tok || Tok->isNot(tok::kw_delete))
2348 return nullptr;
2349 return Tok;
2350 };
2351 if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2352 LeftOfParens = MaybeDelete;
2353 }
2354
2355 // The Condition directly below this one will see the operator arguments
2356 // as a (void *foo) cast.
2357 // void operator delete(void *foo) ATTRIB;
2358 if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2359 LeftOfParens->Previous->is(tok::kw_operator)) {
2360 return false;
2361 }
2362
2363 // If there is an identifier (or with a few exceptions a keyword) right
2364 // before the parentheses, this is unlikely to be a cast.
2365 if (LeftOfParens->Tok.getIdentifierInfo() &&
2366 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2367 tok::kw_delete, tok::kw_throw)) {
2368 return false;
2369 }
2370
2371 // Certain other tokens right before the parentheses are also signals that
2372 // this cannot be a cast.
2373 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2374 TT_TemplateCloser, tok::ellipsis)) {
2375 return false;
2376 }
2377 }
2378
2379 if (Tok.Next->isOneOf(tok::question, tok::ampamp))
2380 return false;
2381
2382 // `foreach((A a, B b) in someList)` should not be seen as a cast.
2383 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
2384 return false;
2385
2386 // Functions which end with decorations like volatile, noexcept are unlikely
2387 // to be casts.
2388 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2389 tok::kw_requires, tok::kw_throw, tok::arrow,
2390 Keywords.kw_override, Keywords.kw_final) ||
2391 isCppAttribute(Style.isCpp(), *Tok.Next)) {
2392 return false;
2393 }
2394
2395 // As Java has no function types, a "(" after the ")" likely means that this
2396 // is a cast.
2397 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
2398 return true;
2399
2400 // If a (non-string) literal follows, this is likely a cast.
2401 if (Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof) ||
2402 (Tok.Next->Tok.isLiteral() && Tok.Next->isNot(tok::string_literal))) {
2403 return true;
2404 }
2405
2406 // Heuristically try to determine whether the parentheses contain a type.
2407 auto IsQualifiedPointerOrReference = [](FormatToken *T) {
2408 // This is used to handle cases such as x = (foo *const)&y;
2409 assert(!T->isSimpleTypeSpecifier() && "Should have already been checked");
2410 // Strip trailing qualifiers such as const or volatile when checking
2411 // whether the parens could be a cast to a pointer/reference type.
2412 while (T) {
2413 if (T->is(TT_AttributeRParen)) {
2414 // Handle `x = (foo *__attribute__((foo)))&v;`:
2415 assert(T->is(tok::r_paren));
2416 assert(T->MatchingParen);
2417 assert(T->MatchingParen->is(tok::l_paren));
2418 assert(T->MatchingParen->is(TT_AttributeLParen));
2419 if (const auto *Tok = T->MatchingParen->Previous;
2420 Tok && Tok->isAttribute()) {
2421 T = Tok->Previous;
2422 continue;
2423 }
2424 } else if (T->is(TT_AttributeSquare)) {
2425 // Handle `x = (foo *[[clang::foo]])&v;`:
2426 if (T->MatchingParen && T->MatchingParen->Previous) {
2427 T = T->MatchingParen->Previous;
2428 continue;
2429 }
2430 } else if (T->canBePointerOrReferenceQualifier()) {
2431 T = T->Previous;
2432 continue;
2433 }
2434 break;
2435 }
2436 return T && T->is(TT_PointerOrReference);
2437 };
2438 bool ParensAreType =
2439 !Tok.Previous ||
2440 Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2441 Tok.Previous->isSimpleTypeSpecifier() ||
2442 IsQualifiedPointerOrReference(Tok.Previous);
2443 bool ParensCouldEndDecl =
2444 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2445 if (ParensAreType && !ParensCouldEndDecl)
2446 return true;
2447
2448 // At this point, we heuristically assume that there are no casts at the
2449 // start of the line. We assume that we have found most cases where there
2450 // are by the logic above, e.g. "(void)x;".
2451 if (!LeftOfParens)
2452 return false;
2453
2454 // Certain token types inside the parentheses mean that this can't be a
2455 // cast.
2456 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
2457 Token = Token->Next) {
2458 if (Token->is(TT_BinaryOperator))
2459 return false;
2460 }
2461
2462 // If the following token is an identifier or 'this', this is a cast. All
2463 // cases where this can be something else are handled above.
2464 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
2465 return true;
2466
2467 // Look for a cast `( x ) (`.
2468 if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
2469 if (Tok.Previous->is(tok::identifier) &&
2470 Tok.Previous->Previous->is(tok::l_paren)) {
2471 return true;
2472 }
2473 }
2474
2475 if (!Tok.Next->Next)
2476 return false;
2477
2478 // If the next token after the parenthesis is a unary operator, assume
2479 // that this is cast, unless there are unexpected tokens inside the
2480 // parenthesis.
2481 const bool NextIsAmpOrStar = Tok.Next->isOneOf(tok::amp, tok::star);
2482 if (!(Tok.Next->isUnaryOperator() || NextIsAmpOrStar) ||
2483 Tok.Next->is(tok::plus) ||
2484 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2485 return false;
2486 }
2487 if (NextIsAmpOrStar &&
2488 (Tok.Next->Next->is(tok::numeric_constant) || Line.InPPDirective)) {
2489 return false;
2490 }
2491 if (Line.InPPDirective && Tok.Next->is(tok::minus))
2492 return false;
2493 // Search for unexpected tokens.
2494 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
2495 Prev = Prev->Previous) {
2496 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2497 return false;
2498 }
2499 return true;
2500 }
2501
2502 /// Returns true if the token is used as a unary operator.
determineUnaryOperatorByUsage(const FormatToken & Tok)2503 bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2504 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2505 if (!PrevToken)
2506 return true;
2507
2508 // These keywords are deliberately not included here because they may
2509 // precede only one of unary star/amp and plus/minus but not both. They are
2510 // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2511 //
2512 // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2513 // know how they can be followed by a star or amp.
2514 if (PrevToken->isOneOf(
2515 TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2516 tok::equal, tok::question, tok::l_square, tok::l_brace,
2517 tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2518 tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2519 return true;
2520 }
2521
2522 // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2523 // where the unary `+` operator is overloaded, it is reasonable to write
2524 // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2525 if (PrevToken->is(tok::kw_sizeof))
2526 return true;
2527
2528 // A sequence of leading unary operators.
2529 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2530 return true;
2531
2532 // There can't be two consecutive binary operators.
2533 if (PrevToken->is(TT_BinaryOperator))
2534 return true;
2535
2536 return false;
2537 }
2538
2539 /// Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)2540 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2541 bool InTemplateArgument) {
2542 if (Style.isJavaScript())
2543 return TT_BinaryOperator;
2544
2545 // && in C# must be a binary operator.
2546 if (Style.isCSharp() && Tok.is(tok::ampamp))
2547 return TT_BinaryOperator;
2548
2549 if (Style.isVerilog()) {
2550 // In Verilog, `*` can only be a binary operator. `&` can be either unary
2551 // or binary. `*` also includes `*>` in module path declarations in
2552 // specify blocks because merged tokens take the type of the first one by
2553 // default.
2554 if (Tok.is(tok::star))
2555 return TT_BinaryOperator;
2556 return determineUnaryOperatorByUsage(Tok) ? TT_UnaryOperator
2557 : TT_BinaryOperator;
2558 }
2559
2560 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2561 if (!PrevToken)
2562 return TT_UnaryOperator;
2563 if (PrevToken->is(TT_TypeName))
2564 return TT_PointerOrReference;
2565
2566 const FormatToken *NextToken = Tok.getNextNonComment();
2567
2568 if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2569 return TT_BinaryOperator;
2570
2571 if (!NextToken ||
2572 NextToken->isOneOf(tok::arrow, tok::equal, tok::comma, tok::r_paren,
2573 TT_RequiresClause) ||
2574 (NextToken->is(tok::kw_noexcept) && !IsExpression) ||
2575 NextToken->canBePointerOrReferenceQualifier() ||
2576 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2577 return TT_PointerOrReference;
2578 }
2579
2580 if (PrevToken->is(tok::coloncolon))
2581 return TT_PointerOrReference;
2582
2583 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2584 return TT_PointerOrReference;
2585
2586 if (determineUnaryOperatorByUsage(Tok))
2587 return TT_UnaryOperator;
2588
2589 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2590 return TT_PointerOrReference;
2591 if (NextToken->is(tok::kw_operator) && !IsExpression)
2592 return TT_PointerOrReference;
2593 if (NextToken->isOneOf(tok::comma, tok::semi))
2594 return TT_PointerOrReference;
2595
2596 // After right braces, star tokens are likely to be pointers to struct,
2597 // union, or class.
2598 // struct {} *ptr;
2599 // This by itself is not sufficient to distinguish from multiplication
2600 // following a brace-initialized expression, as in:
2601 // int i = int{42} * 2;
2602 // In the struct case, the part of the struct declaration until the `{` and
2603 // the `}` are put on separate unwrapped lines; in the brace-initialized
2604 // case, the matching `{` is on the same unwrapped line, so check for the
2605 // presence of the matching brace to distinguish between those.
2606 if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
2607 !PrevToken->MatchingParen) {
2608 return TT_PointerOrReference;
2609 }
2610
2611 if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
2612 return TT_UnaryOperator;
2613
2614 if (PrevToken->Tok.isLiteral() ||
2615 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2616 tok::kw_false, tok::r_brace)) {
2617 return TT_BinaryOperator;
2618 }
2619
2620 const FormatToken *NextNonParen = NextToken;
2621 while (NextNonParen && NextNonParen->is(tok::l_paren))
2622 NextNonParen = NextNonParen->getNextNonComment();
2623 if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
2624 NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
2625 NextNonParen->isUnaryOperator())) {
2626 return TT_BinaryOperator;
2627 }
2628
2629 // If we know we're in a template argument, there are no named declarations.
2630 // Thus, having an identifier on the right-hand side indicates a binary
2631 // operator.
2632 if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
2633 return TT_BinaryOperator;
2634
2635 // "&&" followed by "(", "*", or "&" is quite unlikely to be two successive
2636 // unary "&".
2637 if (Tok.is(tok::ampamp) &&
2638 NextToken->isOneOf(tok::l_paren, tok::star, tok::amp)) {
2639 return TT_BinaryOperator;
2640 }
2641
2642 // This catches some cases where evaluation order is used as control flow:
2643 // aaa && aaa->f();
2644 if (NextToken->Tok.isAnyIdentifier()) {
2645 const FormatToken *NextNextToken = NextToken->getNextNonComment();
2646 if (NextNextToken && NextNextToken->is(tok::arrow))
2647 return TT_BinaryOperator;
2648 }
2649
2650 // It is very unlikely that we are going to find a pointer or reference type
2651 // definition on the RHS of an assignment.
2652 if (IsExpression && !Contexts.back().CaretFound)
2653 return TT_BinaryOperator;
2654
2655 // Opeartors at class scope are likely pointer or reference members.
2656 if (!Scopes.empty() && Scopes.back() == ST_Class)
2657 return TT_PointerOrReference;
2658
2659 // Tokens that indicate member access or chained operator& use.
2660 auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
2661 return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
2662 tok::arrowstar, tok::periodstar);
2663 };
2664
2665 // It's more likely that & represents operator& than an uninitialized
2666 // reference.
2667 if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
2668 IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
2669 NextToken && NextToken->Tok.isAnyIdentifier()) {
2670 if (auto NextNext = NextToken->getNextNonComment();
2671 NextNext &&
2672 (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
2673 return TT_BinaryOperator;
2674 }
2675 }
2676
2677 return TT_PointerOrReference;
2678 }
2679
determinePlusMinusCaretUsage(const FormatToken & Tok)2680 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
2681 if (determineUnaryOperatorByUsage(Tok))
2682 return TT_UnaryOperator;
2683
2684 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2685 if (!PrevToken)
2686 return TT_UnaryOperator;
2687
2688 if (PrevToken->is(tok::at))
2689 return TT_UnaryOperator;
2690
2691 // Fall back to marking the token as binary operator.
2692 return TT_BinaryOperator;
2693 }
2694
2695 /// Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)2696 TokenType determineIncrementUsage(const FormatToken &Tok) {
2697 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2698 if (!PrevToken || PrevToken->is(TT_CastRParen))
2699 return TT_UnaryOperator;
2700 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2701 return TT_TrailingUnaryOperator;
2702
2703 return TT_UnaryOperator;
2704 }
2705
2706 SmallVector<Context, 8> Contexts;
2707
2708 const FormatStyle &Style;
2709 AnnotatedLine &Line;
2710 FormatToken *CurrentToken;
2711 bool AutoFound;
2712 const AdditionalKeywords &Keywords;
2713
2714 SmallVector<ScopeType> &Scopes;
2715
2716 // Set of "<" tokens that do not open a template parameter list. If parseAngle
2717 // determines that a specific token can't be a template opener, it will make
2718 // same decision irrespective of the decisions for tokens leading up to it.
2719 // Store this information to prevent this from causing exponential runtime.
2720 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2721 };
2722
2723 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2724 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2725
2726 /// Parses binary expressions by inserting fake parenthesis based on
2727 /// operator precedence.
2728 class ExpressionParser {
2729 public:
ExpressionParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,AnnotatedLine & Line)2730 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2731 AnnotatedLine &Line)
2732 : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
2733
2734 /// Parse expressions with the given operator precedence.
parse(int Precedence=0)2735 void parse(int Precedence = 0) {
2736 // Skip 'return' and ObjC selector colons as they are not part of a binary
2737 // expression.
2738 while (Current && (Current->is(tok::kw_return) ||
2739 (Current->is(tok::colon) &&
2740 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
2741 next();
2742 }
2743
2744 if (!Current || Precedence > PrecedenceArrowAndPeriod)
2745 return;
2746
2747 // Conditional expressions need to be parsed separately for proper nesting.
2748 if (Precedence == prec::Conditional) {
2749 parseConditionalExpr();
2750 return;
2751 }
2752
2753 // Parse unary operators, which all have a higher precedence than binary
2754 // operators.
2755 if (Precedence == PrecedenceUnaryOperator) {
2756 parseUnaryOperator();
2757 return;
2758 }
2759
2760 FormatToken *Start = Current;
2761 FormatToken *LatestOperator = nullptr;
2762 unsigned OperatorIndex = 0;
2763 // The first name of the current type in a port list.
2764 FormatToken *VerilogFirstOfType = nullptr;
2765
2766 while (Current) {
2767 // In Verilog ports in a module header that don't have a type take the
2768 // type of the previous one. For example,
2769 // module a(output b,
2770 // c,
2771 // output d);
2772 // In this case there need to be fake parentheses around b and c.
2773 if (Style.isVerilog() && Precedence == prec::Comma) {
2774 VerilogFirstOfType =
2775 verilogGroupDecl(VerilogFirstOfType, LatestOperator);
2776 }
2777
2778 // Consume operators with higher precedence.
2779 parse(Precedence + 1);
2780
2781 int CurrentPrecedence = getCurrentPrecedence();
2782
2783 if (Precedence == CurrentPrecedence && Current &&
2784 Current->is(TT_SelectorName)) {
2785 if (LatestOperator)
2786 addFakeParenthesis(Start, prec::Level(Precedence));
2787 Start = Current;
2788 }
2789
2790 if ((Style.isCSharp() || Style.isJavaScript() ||
2791 Style.Language == FormatStyle::LK_Java) &&
2792 Precedence == prec::Additive && Current) {
2793 // A string can be broken without parentheses around it when it is
2794 // already in a sequence of strings joined by `+` signs.
2795 FormatToken *Prev = Current->getPreviousNonComment();
2796 if (Prev && Prev->is(tok::string_literal) &&
2797 (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
2798 TT_StringInConcatenation))) {
2799 Prev->setType(TT_StringInConcatenation);
2800 }
2801 }
2802
2803 // At the end of the line or when an operator with lower precedence is
2804 // found, insert fake parenthesis and return.
2805 if (!Current ||
2806 (Current->closesScope() &&
2807 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2808 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2809 (CurrentPrecedence == prec::Conditional &&
2810 Precedence == prec::Assignment && Current->is(tok::colon))) {
2811 break;
2812 }
2813
2814 // Consume scopes: (), [], <> and {}
2815 // In addition to that we handle require clauses as scope, so that the
2816 // constraints in that are correctly indented.
2817 if (Current->opensScope() ||
2818 Current->isOneOf(TT_RequiresClause,
2819 TT_RequiresClauseInARequiresExpression)) {
2820 // In fragment of a JavaScript template string can look like '}..${' and
2821 // thus close a scope and open a new one at the same time.
2822 while (Current && (!Current->closesScope() || Current->opensScope())) {
2823 next();
2824 parse();
2825 }
2826 next();
2827 } else {
2828 // Operator found.
2829 if (CurrentPrecedence == Precedence) {
2830 if (LatestOperator)
2831 LatestOperator->NextOperator = Current;
2832 LatestOperator = Current;
2833 Current->OperatorIndex = OperatorIndex;
2834 ++OperatorIndex;
2835 }
2836 next(/*SkipPastLeadingComments=*/Precedence > 0);
2837 }
2838 }
2839
2840 // Group variables of the same type.
2841 if (Style.isVerilog() && Precedence == prec::Comma && VerilogFirstOfType)
2842 addFakeParenthesis(VerilogFirstOfType, prec::Comma);
2843
2844 if (LatestOperator && (Current || Precedence > 0)) {
2845 // The requires clauses do not neccessarily end in a semicolon or a brace,
2846 // but just go over to struct/class or a function declaration, we need to
2847 // intervene so that the fake right paren is inserted correctly.
2848 auto End =
2849 (Start->Previous &&
2850 Start->Previous->isOneOf(TT_RequiresClause,
2851 TT_RequiresClauseInARequiresExpression))
2852 ? [this]() {
2853 auto Ret = Current ? Current : Line.Last;
2854 while (!Ret->ClosesRequiresClause && Ret->Previous)
2855 Ret = Ret->Previous;
2856 return Ret;
2857 }()
2858 : nullptr;
2859
2860 if (Precedence == PrecedenceArrowAndPeriod) {
2861 // Call expressions don't have a binary operator precedence.
2862 addFakeParenthesis(Start, prec::Unknown, End);
2863 } else {
2864 addFakeParenthesis(Start, prec::Level(Precedence), End);
2865 }
2866 }
2867 }
2868
2869 private:
2870 /// Gets the precedence (+1) of the given token for binary operators
2871 /// and other tokens that we treat like binary operators.
getCurrentPrecedence()2872 int getCurrentPrecedence() {
2873 if (Current) {
2874 const FormatToken *NextNonComment = Current->getNextNonComment();
2875 if (Current->is(TT_ConditionalExpr))
2876 return prec::Conditional;
2877 if (NextNonComment && Current->is(TT_SelectorName) &&
2878 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2879 (Style.isProto() && NextNonComment->is(tok::less)))) {
2880 return prec::Assignment;
2881 }
2882 if (Current->is(TT_JsComputedPropertyName))
2883 return prec::Assignment;
2884 if (Current->is(TT_TrailingReturnArrow))
2885 return prec::Comma;
2886 if (Current->is(TT_FatArrow))
2887 return prec::Assignment;
2888 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2889 (Current->is(tok::comment) && NextNonComment &&
2890 NextNonComment->is(TT_SelectorName))) {
2891 return 0;
2892 }
2893 if (Current->is(TT_RangeBasedForLoopColon))
2894 return prec::Comma;
2895 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2896 Current->is(Keywords.kw_instanceof)) {
2897 return prec::Relational;
2898 }
2899 if (Style.isJavaScript() &&
2900 Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
2901 return prec::Relational;
2902 }
2903 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2904 return Current->getPrecedence();
2905 if (Current->isOneOf(tok::period, tok::arrow) &&
2906 Current->isNot(TT_TrailingReturnArrow)) {
2907 return PrecedenceArrowAndPeriod;
2908 }
2909 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2910 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2911 Keywords.kw_throws)) {
2912 return 0;
2913 }
2914 // In Verilog case labels are not on separate lines straight out of
2915 // UnwrappedLineParser. The colon is not part of an expression.
2916 if (Style.isVerilog() && Current->is(tok::colon))
2917 return 0;
2918 }
2919 return -1;
2920 }
2921
addFakeParenthesis(FormatToken * Start,prec::Level Precedence,FormatToken * End=nullptr)2922 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
2923 FormatToken *End = nullptr) {
2924 // Do not assign fake parenthesis to tokens that are part of an
2925 // unexpanded macro call. The line within the macro call contains
2926 // the parenthesis and commas, and we will not find operators within
2927 // that structure.
2928 if (Start->MacroParent)
2929 return;
2930
2931 Start->FakeLParens.push_back(Precedence);
2932 if (Precedence > prec::Unknown)
2933 Start->StartsBinaryExpression = true;
2934 if (!End && Current)
2935 End = Current->getPreviousNonComment();
2936 if (End) {
2937 ++End->FakeRParens;
2938 if (Precedence > prec::Unknown)
2939 End->EndsBinaryExpression = true;
2940 }
2941 }
2942
2943 /// Parse unary operator expressions and surround them with fake
2944 /// parentheses if appropriate.
parseUnaryOperator()2945 void parseUnaryOperator() {
2946 llvm::SmallVector<FormatToken *, 2> Tokens;
2947 while (Current && Current->is(TT_UnaryOperator)) {
2948 Tokens.push_back(Current);
2949 next();
2950 }
2951 parse(PrecedenceArrowAndPeriod);
2952 for (FormatToken *Token : llvm::reverse(Tokens)) {
2953 // The actual precedence doesn't matter.
2954 addFakeParenthesis(Token, prec::Unknown);
2955 }
2956 }
2957
parseConditionalExpr()2958 void parseConditionalExpr() {
2959 while (Current && Current->isTrailingComment())
2960 next();
2961 FormatToken *Start = Current;
2962 parse(prec::LogicalOr);
2963 if (!Current || Current->isNot(tok::question))
2964 return;
2965 next();
2966 parse(prec::Assignment);
2967 if (!Current || Current->isNot(TT_ConditionalExpr))
2968 return;
2969 next();
2970 parse(prec::Assignment);
2971 addFakeParenthesis(Start, prec::Conditional);
2972 }
2973
next(bool SkipPastLeadingComments=true)2974 void next(bool SkipPastLeadingComments = true) {
2975 if (Current)
2976 Current = Current->Next;
2977 while (Current &&
2978 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2979 Current->isTrailingComment()) {
2980 Current = Current->Next;
2981 }
2982 }
2983
2984 // Add fake parenthesis around declarations of the same type for example in a
2985 // module prototype. Return the first port / variable of the current type.
verilogGroupDecl(FormatToken * FirstOfType,FormatToken * PreviousComma)2986 FormatToken *verilogGroupDecl(FormatToken *FirstOfType,
2987 FormatToken *PreviousComma) {
2988 if (!Current)
2989 return nullptr;
2990
2991 FormatToken *Start = Current;
2992
2993 // Skip attributes.
2994 while (Start->startsSequence(tok::l_paren, tok::star)) {
2995 if (!(Start = Start->MatchingParen) ||
2996 !(Start = Start->getNextNonComment())) {
2997 return nullptr;
2998 }
2999 }
3000
3001 FormatToken *Tok = Start;
3002
3003 if (Tok->is(Keywords.kw_assign))
3004 Tok = Tok->getNextNonComment();
3005
3006 // Skip any type qualifiers to find the first identifier. It may be either a
3007 // new type name or a variable name. There can be several type qualifiers
3008 // preceding a variable name, and we can not tell them apart by looking at
3009 // the word alone since a macro can be defined as either a type qualifier or
3010 // a variable name. Thus we use the last word before the dimensions instead
3011 // of the first word as the candidate for the variable or type name.
3012 FormatToken *First = nullptr;
3013 while (Tok) {
3014 FormatToken *Next = Tok->getNextNonComment();
3015
3016 if (Tok->is(tok::hash)) {
3017 // Start of a macro expansion.
3018 First = Tok;
3019 Tok = Next;
3020 if (Tok)
3021 Tok = Tok->getNextNonComment();
3022 } else if (Tok->is(tok::hashhash)) {
3023 // Concatenation. Skip.
3024 Tok = Next;
3025 if (Tok)
3026 Tok = Tok->getNextNonComment();
3027 } else if (Keywords.isVerilogQualifier(*Tok) ||
3028 Keywords.isVerilogIdentifier(*Tok)) {
3029 First = Tok;
3030 Tok = Next;
3031 // The name may have dots like `interface_foo.modport_foo`.
3032 while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) &&
3033 (Tok = Tok->getNextNonComment())) {
3034 if (Keywords.isVerilogIdentifier(*Tok))
3035 Tok = Tok->getNextNonComment();
3036 }
3037 } else if (!Next) {
3038 Tok = nullptr;
3039 } else if (Tok->is(tok::l_paren)) {
3040 // Make sure the parenthesized list is a drive strength. Otherwise the
3041 // statement may be a module instantiation in which case we have already
3042 // found the instance name.
3043 if (Next->isOneOf(
3044 Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large,
3045 Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1,
3046 Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1,
3047 Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0,
3048 Keywords.kw_weak1)) {
3049 Tok->setType(TT_VerilogStrength);
3050 Tok = Tok->MatchingParen;
3051 if (Tok) {
3052 Tok->setType(TT_VerilogStrength);
3053 Tok = Tok->getNextNonComment();
3054 }
3055 } else {
3056 break;
3057 }
3058 } else if (Tok->is(tok::hash)) {
3059 if (Next->is(tok::l_paren))
3060 Next = Next->MatchingParen;
3061 if (Next)
3062 Tok = Next->getNextNonComment();
3063 } else {
3064 break;
3065 }
3066 }
3067
3068 // Find the second identifier. If it exists it will be the name.
3069 FormatToken *Second = nullptr;
3070 // Dimensions.
3071 while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen))
3072 Tok = Tok->getNextNonComment();
3073 if (Tok && (Tok->is(tok::hash) || Keywords.isVerilogIdentifier(*Tok)))
3074 Second = Tok;
3075
3076 // If the second identifier doesn't exist and there are qualifiers, the type
3077 // is implied.
3078 FormatToken *TypedName = nullptr;
3079 if (Second) {
3080 TypedName = Second;
3081 if (First && First->is(TT_Unknown))
3082 First->setType(TT_VerilogDimensionedTypeName);
3083 } else if (First != Start) {
3084 // If 'First' is null, then this isn't a declaration, 'TypedName' gets set
3085 // to null as intended.
3086 TypedName = First;
3087 }
3088
3089 if (TypedName) {
3090 // This is a declaration with a new type.
3091 if (TypedName->is(TT_Unknown))
3092 TypedName->setType(TT_StartOfName);
3093 // Group variables of the previous type.
3094 if (FirstOfType && PreviousComma) {
3095 PreviousComma->setType(TT_VerilogTypeComma);
3096 addFakeParenthesis(FirstOfType, prec::Comma, PreviousComma->Previous);
3097 }
3098
3099 FirstOfType = TypedName;
3100
3101 // Don't let higher precedence handle the qualifiers. For example if we
3102 // have:
3103 // parameter x = 0
3104 // We skip `parameter` here. This way the fake parentheses for the
3105 // assignment will be around `x = 0`.
3106 while (Current && Current != FirstOfType) {
3107 if (Current->opensScope()) {
3108 next();
3109 parse();
3110 }
3111 next();
3112 }
3113 }
3114
3115 return FirstOfType;
3116 }
3117
3118 const FormatStyle &Style;
3119 const AdditionalKeywords &Keywords;
3120 const AnnotatedLine &Line;
3121 FormatToken *Current;
3122 };
3123
3124 } // end anonymous namespace
3125
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines) const3126 void TokenAnnotator::setCommentLineLevels(
3127 SmallVectorImpl<AnnotatedLine *> &Lines) const {
3128 const AnnotatedLine *NextNonCommentLine = nullptr;
3129 for (AnnotatedLine *Line : llvm::reverse(Lines)) {
3130 assert(Line->First);
3131
3132 // If the comment is currently aligned with the line immediately following
3133 // it, that's probably intentional and we should keep it.
3134 if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
3135 Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
3136 NextNonCommentLine->First->OriginalColumn ==
3137 Line->First->OriginalColumn) {
3138 const bool PPDirectiveOrImportStmt =
3139 NextNonCommentLine->Type == LT_PreprocessorDirective ||
3140 NextNonCommentLine->Type == LT_ImportStatement;
3141 if (PPDirectiveOrImportStmt)
3142 Line->Type = LT_CommentAbovePPDirective;
3143 // Align comments for preprocessor lines with the # in column 0 if
3144 // preprocessor lines are not indented. Otherwise, align with the next
3145 // line.
3146 Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
3147 PPDirectiveOrImportStmt
3148 ? 0
3149 : NextNonCommentLine->Level;
3150 } else {
3151 NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
3152 }
3153
3154 setCommentLineLevels(Line->Children);
3155 }
3156 }
3157
maxNestingDepth(const AnnotatedLine & Line)3158 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
3159 unsigned Result = 0;
3160 for (const auto *Tok = Line.First; Tok; Tok = Tok->Next)
3161 Result = std::max(Result, Tok->NestingLevel);
3162 return Result;
3163 }
3164
3165 // Returns the name of a function with no return type, e.g. a constructor or
3166 // destructor.
getFunctionName(const AnnotatedLine & Line)3167 static FormatToken *getFunctionName(const AnnotatedLine &Line) {
3168 for (FormatToken *Tok = Line.getFirstNonComment(), *Name = nullptr; Tok;
3169 Tok = Tok->getNextNonComment()) {
3170 // Skip C++11 attributes both before and after the function name.
3171 if (Tok->is(tok::l_square) && Tok->is(TT_AttributeSquare)) {
3172 Tok = Tok->MatchingParen;
3173 if (!Tok)
3174 break;
3175 continue;
3176 }
3177
3178 // Make sure the name is followed by a pair of parentheses.
3179 if (Name) {
3180 return Tok->is(tok::l_paren) && Tok->isNot(TT_FunctionTypeLParen) &&
3181 Tok->MatchingParen
3182 ? Name
3183 : nullptr;
3184 }
3185
3186 // Skip keywords that may precede the constructor/destructor name.
3187 if (Tok->isOneOf(tok::kw_friend, tok::kw_inline, tok::kw_virtual,
3188 tok::kw_constexpr, tok::kw_consteval, tok::kw_explicit)) {
3189 continue;
3190 }
3191
3192 // A qualified name may start from the global namespace.
3193 if (Tok->is(tok::coloncolon)) {
3194 Tok = Tok->Next;
3195 if (!Tok)
3196 break;
3197 }
3198
3199 // Skip to the unqualified part of the name.
3200 while (Tok->startsSequence(tok::identifier, tok::coloncolon)) {
3201 assert(Tok->Next);
3202 Tok = Tok->Next->Next;
3203 if (!Tok)
3204 return nullptr;
3205 }
3206
3207 // Skip the `~` if a destructor name.
3208 if (Tok->is(tok::tilde)) {
3209 Tok = Tok->Next;
3210 if (!Tok)
3211 break;
3212 }
3213
3214 // Make sure the name is not already annotated, e.g. as NamespaceMacro.
3215 if (Tok->isNot(tok::identifier) || Tok->isNot(TT_Unknown))
3216 break;
3217
3218 Name = Tok;
3219 }
3220
3221 return nullptr;
3222 }
3223
3224 // Checks if Tok is a constructor/destructor name qualified by its class name.
isCtorOrDtorName(const FormatToken * Tok)3225 static bool isCtorOrDtorName(const FormatToken *Tok) {
3226 assert(Tok && Tok->is(tok::identifier));
3227 const auto *Prev = Tok->Previous;
3228
3229 if (Prev && Prev->is(tok::tilde))
3230 Prev = Prev->Previous;
3231
3232 if (!Prev || !Prev->endsSequence(tok::coloncolon, tok::identifier))
3233 return false;
3234
3235 assert(Prev->Previous);
3236 return Prev->Previous->TokenText == Tok->TokenText;
3237 }
3238
annotate(AnnotatedLine & Line)3239 void TokenAnnotator::annotate(AnnotatedLine &Line) {
3240 AnnotatingParser Parser(Style, Line, Keywords, Scopes);
3241 Line.Type = Parser.parseLine();
3242
3243 for (auto &Child : Line.Children)
3244 annotate(*Child);
3245
3246 // With very deep nesting, ExpressionParser uses lots of stack and the
3247 // formatting algorithm is very slow. We're not going to do a good job here
3248 // anyway - it's probably generated code being formatted by mistake.
3249 // Just skip the whole line.
3250 if (maxNestingDepth(Line) > 50)
3251 Line.Type = LT_Invalid;
3252
3253 if (Line.Type == LT_Invalid)
3254 return;
3255
3256 ExpressionParser ExprParser(Style, Keywords, Line);
3257 ExprParser.parse();
3258
3259 if (Style.isCpp()) {
3260 auto *Tok = getFunctionName(Line);
3261 if (Tok && ((!Scopes.empty() && Scopes.back() == ST_Class) ||
3262 Line.endsWith(TT_FunctionLBrace) || isCtorOrDtorName(Tok))) {
3263 Tok->setFinalizedType(TT_CtorDtorDeclName);
3264 }
3265 }
3266
3267 if (Line.startsWith(TT_ObjCMethodSpecifier))
3268 Line.Type = LT_ObjCMethodDecl;
3269 else if (Line.startsWith(TT_ObjCDecl))
3270 Line.Type = LT_ObjCDecl;
3271 else if (Line.startsWith(TT_ObjCProperty))
3272 Line.Type = LT_ObjCProperty;
3273
3274 auto *First = Line.First;
3275 First->SpacesRequiredBefore = 1;
3276 First->CanBreakBefore = First->MustBreakBefore;
3277
3278 if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
3279 Style.InsertNewlineAtEOF) {
3280 First->NewlinesBefore = 1;
3281 }
3282 }
3283
3284 // This function heuristically determines whether 'Current' starts the name of a
3285 // function declaration.
isFunctionDeclarationName(bool IsCpp,const FormatToken & Current,const AnnotatedLine & Line,FormatToken * & ClosingParen)3286 static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
3287 const AnnotatedLine &Line,
3288 FormatToken *&ClosingParen) {
3289 assert(Current.Previous);
3290
3291 if (Current.is(TT_FunctionDeclarationName))
3292 return true;
3293
3294 if (!Current.Tok.getIdentifierInfo())
3295 return false;
3296
3297 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
3298 for (; Next; Next = Next->Next) {
3299 if (Next->is(TT_OverloadedOperatorLParen))
3300 return Next;
3301 if (Next->is(TT_OverloadedOperator))
3302 continue;
3303 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
3304 // For 'new[]' and 'delete[]'.
3305 if (Next->Next &&
3306 Next->Next->startsSequence(tok::l_square, tok::r_square)) {
3307 Next = Next->Next->Next;
3308 }
3309 continue;
3310 }
3311 if (Next->startsSequence(tok::l_square, tok::r_square)) {
3312 // For operator[]().
3313 Next = Next->Next;
3314 continue;
3315 }
3316 if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
3317 Next->Next && Next->Next->isPointerOrReference()) {
3318 // For operator void*(), operator char*(), operator Foo*().
3319 Next = Next->Next;
3320 continue;
3321 }
3322 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3323 Next = Next->MatchingParen;
3324 continue;
3325 }
3326
3327 break;
3328 }
3329 return nullptr;
3330 };
3331
3332 // Find parentheses of parameter list.
3333 const FormatToken *Next = Current.Next;
3334 if (Current.is(tok::kw_operator)) {
3335 const auto *Previous = Current.Previous;
3336 if (Previous->Tok.getIdentifierInfo() &&
3337 !Previous->isOneOf(tok::kw_return, tok::kw_co_return)) {
3338 return true;
3339 }
3340 if (Previous->is(tok::r_paren) && Previous->is(TT_TypeDeclarationParen)) {
3341 assert(Previous->MatchingParen);
3342 assert(Previous->MatchingParen->is(tok::l_paren));
3343 assert(Previous->MatchingParen->is(TT_TypeDeclarationParen));
3344 return true;
3345 }
3346 if (!Previous->isPointerOrReference() && Previous->isNot(TT_TemplateCloser))
3347 return false;
3348 Next = skipOperatorName(Next);
3349 } else {
3350 if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0)
3351 return false;
3352 for (; Next; Next = Next->Next) {
3353 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3354 Next = Next->MatchingParen;
3355 } else if (Next->is(tok::coloncolon)) {
3356 Next = Next->Next;
3357 if (!Next)
3358 return false;
3359 if (Next->is(tok::kw_operator)) {
3360 Next = skipOperatorName(Next->Next);
3361 break;
3362 }
3363 if (Next->isNot(tok::identifier))
3364 return false;
3365 } else if (isCppAttribute(IsCpp, *Next)) {
3366 Next = Next->MatchingParen;
3367 if (!Next)
3368 return false;
3369 } else if (Next->is(tok::l_paren)) {
3370 break;
3371 } else {
3372 return false;
3373 }
3374 }
3375 }
3376
3377 // Check whether parameter list can belong to a function declaration.
3378 if (!Next || Next->isNot(tok::l_paren) || !Next->MatchingParen)
3379 return false;
3380 ClosingParen = Next->MatchingParen;
3381 assert(ClosingParen->is(tok::r_paren));
3382 // If the lines ends with "{", this is likely a function definition.
3383 if (Line.Last->is(tok::l_brace))
3384 return true;
3385 if (Next->Next == ClosingParen)
3386 return true; // Empty parentheses.
3387 // If there is an &/&& after the r_paren, this is likely a function.
3388 if (ClosingParen->Next && ClosingParen->Next->is(TT_PointerOrReference))
3389 return true;
3390
3391 // Check for K&R C function definitions (and C++ function definitions with
3392 // unnamed parameters), e.g.:
3393 // int f(i)
3394 // {
3395 // return i + 1;
3396 // }
3397 // bool g(size_t = 0, bool b = false)
3398 // {
3399 // return !b;
3400 // }
3401 if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
3402 !Line.endsWith(tok::semi)) {
3403 return true;
3404 }
3405
3406 for (const FormatToken *Tok = Next->Next; Tok && Tok != ClosingParen;
3407 Tok = Tok->Next) {
3408 if (Tok->is(TT_TypeDeclarationParen))
3409 return true;
3410 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
3411 Tok = Tok->MatchingParen;
3412 continue;
3413 }
3414 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
3415 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis,
3416 TT_TypeName)) {
3417 return true;
3418 }
3419 if (Tok->isOneOf(tok::l_brace, TT_ObjCMethodExpr) || Tok->Tok.isLiteral())
3420 return false;
3421 }
3422 return false;
3423 }
3424
mustBreakForReturnType(const AnnotatedLine & Line) const3425 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
3426 assert(Line.MightBeFunctionDecl);
3427
3428 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
3429 Style.AlwaysBreakAfterReturnType ==
3430 FormatStyle::RTBS_TopLevelDefinitions) &&
3431 Line.Level > 0) {
3432 return false;
3433 }
3434
3435 switch (Style.AlwaysBreakAfterReturnType) {
3436 case FormatStyle::RTBS_None:
3437 return false;
3438 case FormatStyle::RTBS_All:
3439 case FormatStyle::RTBS_TopLevel:
3440 return true;
3441 case FormatStyle::RTBS_AllDefinitions:
3442 case FormatStyle::RTBS_TopLevelDefinitions:
3443 return Line.mightBeFunctionDefinition();
3444 }
3445
3446 return false;
3447 }
3448
calculateFormattingInformation(AnnotatedLine & Line) const3449 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
3450 for (AnnotatedLine *ChildLine : Line.Children)
3451 calculateFormattingInformation(*ChildLine);
3452
3453 auto *First = Line.First;
3454 First->TotalLength = First->IsMultiline
3455 ? Style.ColumnLimit
3456 : Line.FirstStartColumn + First->ColumnWidth;
3457 FormatToken *Current = First->Next;
3458 bool InFunctionDecl = Line.MightBeFunctionDecl;
3459 bool AlignArrayOfStructures =
3460 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
3461 Line.Type == LT_ArrayOfStructInitializer);
3462 if (AlignArrayOfStructures)
3463 calculateArrayInitializerColumnList(Line);
3464
3465 const bool IsCpp = Style.isCpp();
3466 bool SeenName = false;
3467 bool LineIsFunctionDeclaration = false;
3468 FormatToken *ClosingParen = nullptr;
3469 FormatToken *AfterLastAttribute = nullptr;
3470
3471 for (auto *Tok = Current; Tok; Tok = Tok->Next) {
3472 if (Tok->is(TT_StartOfName))
3473 SeenName = true;
3474 if (Tok->Previous->EndsCppAttributeGroup)
3475 AfterLastAttribute = Tok;
3476 if (const bool IsCtorOrDtor = Tok->is(TT_CtorDtorDeclName);
3477 IsCtorOrDtor ||
3478 isFunctionDeclarationName(Style.isCpp(), *Tok, Line, ClosingParen)) {
3479 if (!IsCtorOrDtor)
3480 Tok->setFinalizedType(TT_FunctionDeclarationName);
3481 LineIsFunctionDeclaration = true;
3482 SeenName = true;
3483 break;
3484 }
3485 }
3486
3487 if (IsCpp && (LineIsFunctionDeclaration || First->is(TT_CtorDtorDeclName)) &&
3488 Line.endsWith(tok::semi, tok::r_brace)) {
3489 auto *Tok = Line.Last->Previous;
3490 while (Tok->isNot(tok::r_brace))
3491 Tok = Tok->Previous;
3492 if (auto *LBrace = Tok->MatchingParen; LBrace) {
3493 assert(LBrace->is(tok::l_brace));
3494 Tok->setBlockKind(BK_Block);
3495 LBrace->setBlockKind(BK_Block);
3496 LBrace->setFinalizedType(TT_FunctionLBrace);
3497 }
3498 }
3499
3500 if (IsCpp && SeenName && AfterLastAttribute &&
3501 mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
3502 AfterLastAttribute->MustBreakBefore = true;
3503 if (LineIsFunctionDeclaration)
3504 Line.ReturnTypeWrapped = true;
3505 }
3506
3507 if (IsCpp) {
3508 if (!LineIsFunctionDeclaration) {
3509 // Annotate */&/&& in `operator` function calls as binary operators.
3510 for (const auto *Tok = First; Tok; Tok = Tok->Next) {
3511 if (Tok->isNot(tok::kw_operator))
3512 continue;
3513 do {
3514 Tok = Tok->Next;
3515 } while (Tok && Tok->isNot(TT_OverloadedOperatorLParen));
3516 if (!Tok)
3517 break;
3518 const auto *LeftParen = Tok;
3519 for (Tok = Tok->Next; Tok && Tok != LeftParen->MatchingParen;
3520 Tok = Tok->Next) {
3521 if (Tok->isNot(tok::identifier))
3522 continue;
3523 auto *Next = Tok->Next;
3524 const bool NextIsBinaryOperator =
3525 Next && Next->isPointerOrReference() && Next->Next &&
3526 Next->Next->is(tok::identifier);
3527 if (!NextIsBinaryOperator)
3528 continue;
3529 Next->setType(TT_BinaryOperator);
3530 Tok = Next;
3531 }
3532 }
3533 } else if (ClosingParen) {
3534 for (auto *Tok = ClosingParen->Next; Tok; Tok = Tok->Next) {
3535 if (Tok->is(TT_CtorInitializerColon))
3536 break;
3537 if (Tok->is(tok::arrow)) {
3538 Tok->setType(TT_TrailingReturnArrow);
3539 break;
3540 }
3541 if (Tok->isNot(TT_TrailingAnnotation))
3542 continue;
3543 const auto *Next = Tok->Next;
3544 if (!Next || Next->isNot(tok::l_paren))
3545 continue;
3546 Tok = Next->MatchingParen;
3547 if (!Tok)
3548 break;
3549 }
3550 }
3551 }
3552
3553 while (Current) {
3554 const FormatToken *Prev = Current->Previous;
3555 if (Current->is(TT_LineComment)) {
3556 if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
3557 Current->SpacesRequiredBefore =
3558 (Style.Cpp11BracedListStyle && !Style.SpacesInParensOptions.Other)
3559 ? 0
3560 : 1;
3561 } else if (Prev->is(TT_VerilogMultiLineListLParen)) {
3562 Current->SpacesRequiredBefore = 0;
3563 } else {
3564 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
3565 }
3566
3567 // If we find a trailing comment, iterate backwards to determine whether
3568 // it seems to relate to a specific parameter. If so, break before that
3569 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
3570 // to the previous line in:
3571 // SomeFunction(a,
3572 // b, // comment
3573 // c);
3574 if (!Current->HasUnescapedNewline) {
3575 for (FormatToken *Parameter = Current->Previous; Parameter;
3576 Parameter = Parameter->Previous) {
3577 if (Parameter->isOneOf(tok::comment, tok::r_brace))
3578 break;
3579 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
3580 if (Parameter->Previous->isNot(TT_CtorInitializerComma) &&
3581 Parameter->HasUnescapedNewline) {
3582 Parameter->MustBreakBefore = true;
3583 }
3584 break;
3585 }
3586 }
3587 }
3588 } else if (!Current->Finalized && Current->SpacesRequiredBefore == 0 &&
3589 spaceRequiredBefore(Line, *Current)) {
3590 Current->SpacesRequiredBefore = 1;
3591 }
3592
3593 const auto &Children = Prev->Children;
3594 if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
3595 Current->MustBreakBefore = true;
3596 } else {
3597 Current->MustBreakBefore =
3598 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
3599 if (!Current->MustBreakBefore && InFunctionDecl &&
3600 Current->is(TT_FunctionDeclarationName)) {
3601 Current->MustBreakBefore = mustBreakForReturnType(Line);
3602 }
3603 }
3604
3605 Current->CanBreakBefore =
3606 Current->MustBreakBefore || canBreakBefore(Line, *Current);
3607 unsigned ChildSize = 0;
3608 if (Prev->Children.size() == 1) {
3609 FormatToken &LastOfChild = *Prev->Children[0]->Last;
3610 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
3611 : LastOfChild.TotalLength + 1;
3612 }
3613 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
3614 (Prev->Children.size() == 1 &&
3615 Prev->Children[0]->First->MustBreakBefore) ||
3616 Current->IsMultiline) {
3617 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
3618 } else {
3619 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
3620 ChildSize + Current->SpacesRequiredBefore;
3621 }
3622
3623 if (Current->is(TT_CtorInitializerColon))
3624 InFunctionDecl = false;
3625
3626 // FIXME: Only calculate this if CanBreakBefore is true once static
3627 // initializers etc. are sorted out.
3628 // FIXME: Move magic numbers to a better place.
3629
3630 // Reduce penalty for aligning ObjC method arguments using the colon
3631 // alignment as this is the canonical way (still prefer fitting everything
3632 // into one line if possible). Trying to fit a whole expression into one
3633 // line should not force other line breaks (e.g. when ObjC method
3634 // expression is a part of other expression).
3635 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
3636 if (Style.Language == FormatStyle::LK_ObjC &&
3637 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
3638 if (Current->ParameterIndex == 1)
3639 Current->SplitPenalty += 5 * Current->BindingStrength;
3640 } else {
3641 Current->SplitPenalty += 20 * Current->BindingStrength;
3642 }
3643
3644 Current = Current->Next;
3645 }
3646
3647 calculateUnbreakableTailLengths(Line);
3648 unsigned IndentLevel = Line.Level;
3649 for (Current = First; Current; Current = Current->Next) {
3650 if (Current->Role)
3651 Current->Role->precomputeFormattingInfos(Current);
3652 if (Current->MatchingParen &&
3653 Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
3654 IndentLevel > 0) {
3655 --IndentLevel;
3656 }
3657 Current->IndentLevel = IndentLevel;
3658 if (Current->opensBlockOrBlockTypeList(Style))
3659 ++IndentLevel;
3660 }
3661
3662 LLVM_DEBUG({ printDebugInfo(Line); });
3663 }
3664
calculateUnbreakableTailLengths(AnnotatedLine & Line) const3665 void TokenAnnotator::calculateUnbreakableTailLengths(
3666 AnnotatedLine &Line) const {
3667 unsigned UnbreakableTailLength = 0;
3668 FormatToken *Current = Line.Last;
3669 while (Current) {
3670 Current->UnbreakableTailLength = UnbreakableTailLength;
3671 if (Current->CanBreakBefore ||
3672 Current->isOneOf(tok::comment, tok::string_literal)) {
3673 UnbreakableTailLength = 0;
3674 } else {
3675 UnbreakableTailLength +=
3676 Current->ColumnWidth + Current->SpacesRequiredBefore;
3677 }
3678 Current = Current->Previous;
3679 }
3680 }
3681
calculateArrayInitializerColumnList(AnnotatedLine & Line) const3682 void TokenAnnotator::calculateArrayInitializerColumnList(
3683 AnnotatedLine &Line) const {
3684 if (Line.First == Line.Last)
3685 return;
3686 auto *CurrentToken = Line.First;
3687 CurrentToken->ArrayInitializerLineStart = true;
3688 unsigned Depth = 0;
3689 while (CurrentToken && CurrentToken != Line.Last) {
3690 if (CurrentToken->is(tok::l_brace)) {
3691 CurrentToken->IsArrayInitializer = true;
3692 if (CurrentToken->Next)
3693 CurrentToken->Next->MustBreakBefore = true;
3694 CurrentToken =
3695 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
3696 } else {
3697 CurrentToken = CurrentToken->Next;
3698 }
3699 }
3700 }
3701
calculateInitializerColumnList(AnnotatedLine & Line,FormatToken * CurrentToken,unsigned Depth) const3702 FormatToken *TokenAnnotator::calculateInitializerColumnList(
3703 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
3704 while (CurrentToken && CurrentToken != Line.Last) {
3705 if (CurrentToken->is(tok::l_brace))
3706 ++Depth;
3707 else if (CurrentToken->is(tok::r_brace))
3708 --Depth;
3709 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
3710 CurrentToken = CurrentToken->Next;
3711 if (!CurrentToken)
3712 break;
3713 CurrentToken->StartsColumn = true;
3714 CurrentToken = CurrentToken->Previous;
3715 }
3716 CurrentToken = CurrentToken->Next;
3717 }
3718 return CurrentToken;
3719 }
3720
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl) const3721 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
3722 const FormatToken &Tok,
3723 bool InFunctionDecl) const {
3724 const FormatToken &Left = *Tok.Previous;
3725 const FormatToken &Right = Tok;
3726
3727 if (Left.is(tok::semi))
3728 return 0;
3729
3730 // Language specific handling.
3731 if (Style.Language == FormatStyle::LK_Java) {
3732 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
3733 return 1;
3734 if (Right.is(Keywords.kw_implements))
3735 return 2;
3736 if (Left.is(tok::comma) && Left.NestingLevel == 0)
3737 return 3;
3738 } else if (Style.isJavaScript()) {
3739 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
3740 return 100;
3741 if (Left.is(TT_JsTypeColon))
3742 return 35;
3743 if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
3744 (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
3745 return 100;
3746 }
3747 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
3748 if (Left.opensScope() && Right.closesScope())
3749 return 200;
3750 } else if (Style.Language == FormatStyle::LK_Proto) {
3751 if (Right.is(tok::l_square))
3752 return 1;
3753 if (Right.is(tok::period))
3754 return 500;
3755 }
3756
3757 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3758 return 1;
3759 if (Right.is(tok::l_square)) {
3760 if (Left.is(tok::r_square))
3761 return 200;
3762 // Slightly prefer formatting local lambda definitions like functions.
3763 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
3764 return 35;
3765 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3766 TT_ArrayInitializerLSquare,
3767 TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
3768 return 500;
3769 }
3770 }
3771
3772 if (Left.is(tok::coloncolon))
3773 return Style.PenaltyBreakScopeResolution;
3774 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3775 Right.is(tok::kw_operator)) {
3776 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
3777 return 3;
3778 if (Left.is(TT_StartOfName))
3779 return 110;
3780 if (InFunctionDecl && Right.NestingLevel == 0)
3781 return Style.PenaltyReturnTypeOnItsOwnLine;
3782 return 200;
3783 }
3784 if (Right.is(TT_PointerOrReference))
3785 return 190;
3786 if (Right.is(TT_TrailingReturnArrow))
3787 return 110;
3788 if (Left.is(tok::equal) && Right.is(tok::l_brace))
3789 return 160;
3790 if (Left.is(TT_CastRParen))
3791 return 100;
3792 if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
3793 return 5000;
3794 if (Left.is(tok::comment))
3795 return 1000;
3796
3797 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
3798 TT_CtorInitializerColon)) {
3799 return 2;
3800 }
3801
3802 if (Right.isMemberAccess()) {
3803 // Breaking before the "./->" of a chained call/member access is reasonably
3804 // cheap, as formatting those with one call per line is generally
3805 // desirable. In particular, it should be cheaper to break before the call
3806 // than it is to break inside a call's parameters, which could lead to weird
3807 // "hanging" indents. The exception is the very last "./->" to support this
3808 // frequent pattern:
3809 //
3810 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
3811 // dddddddd);
3812 //
3813 // which might otherwise be blown up onto many lines. Here, clang-format
3814 // won't produce "hanging" indents anyway as there is no other trailing
3815 // call.
3816 //
3817 // Also apply higher penalty is not a call as that might lead to a wrapping
3818 // like:
3819 //
3820 // aaaaaaa
3821 // .aaaaaaaaa.bbbbbbbb(cccccccc);
3822 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
3823 ? 150
3824 : 35;
3825 }
3826
3827 if (Right.is(TT_TrailingAnnotation) &&
3828 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
3829 // Moving trailing annotations to the next line is fine for ObjC method
3830 // declarations.
3831 if (Line.startsWith(TT_ObjCMethodSpecifier))
3832 return 10;
3833 // Generally, breaking before a trailing annotation is bad unless it is
3834 // function-like. It seems to be especially preferable to keep standard
3835 // annotations (i.e. "const", "final" and "override") on the same line.
3836 // Use a slightly higher penalty after ")" so that annotations like
3837 // "const override" are kept together.
3838 bool is_short_annotation = Right.TokenText.size() < 10;
3839 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
3840 }
3841
3842 // In for-loops, prefer breaking at ',' and ';'.
3843 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
3844 return 4;
3845
3846 // In Objective-C method expressions, prefer breaking before "param:" over
3847 // breaking after it.
3848 if (Right.is(TT_SelectorName))
3849 return 0;
3850 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
3851 return Line.MightBeFunctionDecl ? 50 : 500;
3852
3853 // In Objective-C type declarations, avoid breaking after the category's
3854 // open paren (we'll prefer breaking after the protocol list's opening
3855 // angle bracket, if present).
3856 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
3857 Left.Previous->isOneOf(tok::identifier, tok::greater)) {
3858 return 500;
3859 }
3860
3861 if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
3862 return Style.PenaltyBreakOpenParenthesis;
3863 if (Left.is(tok::l_paren) && InFunctionDecl &&
3864 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
3865 return 100;
3866 }
3867 if (Left.is(tok::l_paren) && Left.Previous &&
3868 (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
3869 Left.Previous->isIf())) {
3870 return 1000;
3871 }
3872 if (Left.is(tok::equal) && InFunctionDecl)
3873 return 110;
3874 if (Right.is(tok::r_brace))
3875 return 1;
3876 if (Left.is(TT_TemplateOpener))
3877 return 100;
3878 if (Left.opensScope()) {
3879 // If we aren't aligning after opening parens/braces we can always break
3880 // here unless the style does not want us to place all arguments on the
3881 // next line.
3882 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
3883 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
3884 return 0;
3885 }
3886 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
3887 return 19;
3888 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
3889 : 19;
3890 }
3891 if (Left.is(TT_JavaAnnotation))
3892 return 50;
3893
3894 if (Left.is(TT_UnaryOperator))
3895 return 60;
3896 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
3897 Left.Previous->isLabelString() &&
3898 (Left.NextOperator || Left.OperatorIndex != 0)) {
3899 return 50;
3900 }
3901 if (Right.is(tok::plus) && Left.isLabelString() &&
3902 (Right.NextOperator || Right.OperatorIndex != 0)) {
3903 return 25;
3904 }
3905 if (Left.is(tok::comma))
3906 return 1;
3907 if (Right.is(tok::lessless) && Left.isLabelString() &&
3908 (Right.NextOperator || Right.OperatorIndex != 1)) {
3909 return 25;
3910 }
3911 if (Right.is(tok::lessless)) {
3912 // Breaking at a << is really cheap.
3913 if (Left.isNot(tok::r_paren) || Right.OperatorIndex > 0) {
3914 // Slightly prefer to break before the first one in log-like statements.
3915 return 2;
3916 }
3917 return 1;
3918 }
3919 if (Left.ClosesTemplateDeclaration)
3920 return Style.PenaltyBreakTemplateDeclaration;
3921 if (Left.ClosesRequiresClause)
3922 return 0;
3923 if (Left.is(TT_ConditionalExpr))
3924 return prec::Conditional;
3925 prec::Level Level = Left.getPrecedence();
3926 if (Level == prec::Unknown)
3927 Level = Right.getPrecedence();
3928 if (Level == prec::Assignment)
3929 return Style.PenaltyBreakAssignment;
3930 if (Level != prec::Unknown)
3931 return Level;
3932
3933 return 3;
3934 }
3935
spaceRequiredBeforeParens(const FormatToken & Right) const3936 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
3937 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
3938 return true;
3939 if (Right.is(TT_OverloadedOperatorLParen) &&
3940 Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
3941 return true;
3942 }
3943 if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
3944 Right.ParameterCount > 0) {
3945 return true;
3946 }
3947 return false;
3948 }
3949
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right) const3950 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
3951 const FormatToken &Left,
3952 const FormatToken &Right) const {
3953 if (Left.is(tok::kw_return) &&
3954 !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
3955 return true;
3956 }
3957 if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
3958 Right.MatchingParen->is(TT_CastRParen)) {
3959 return true;
3960 }
3961 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
3962 return true;
3963 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
3964 Left.Tok.getObjCKeywordID() == tok::objc_property) {
3965 return true;
3966 }
3967 if (Right.is(tok::hashhash))
3968 return Left.is(tok::hash);
3969 if (Left.isOneOf(tok::hashhash, tok::hash))
3970 return Right.is(tok::hash);
3971 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
3972 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
3973 Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
3974 return Style.SpacesInParensOptions.InEmptyParentheses;
3975 }
3976 if (Style.SpacesInParensOptions.InConditionalStatements) {
3977 const FormatToken *LeftParen = nullptr;
3978 if (Left.is(tok::l_paren))
3979 LeftParen = &Left;
3980 else if (Right.is(tok::r_paren) && Right.MatchingParen)
3981 LeftParen = Right.MatchingParen;
3982 if (LeftParen) {
3983 if (LeftParen->is(TT_ConditionLParen))
3984 return true;
3985 if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
3986 return true;
3987 }
3988 }
3989
3990 // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
3991 if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
3992 // function return type 'auto'
3993 TT_FunctionTypeLParen)) {
3994 return true;
3995 }
3996
3997 // auto{x} auto(x)
3998 if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
3999 return false;
4000
4001 // operator co_await(x)
4002 if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous &&
4003 Left.Previous->is(tok::kw_operator)) {
4004 return false;
4005 }
4006 // co_await (x), co_yield (x), co_return (x)
4007 if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
4008 !Right.isOneOf(tok::semi, tok::r_paren)) {
4009 return true;
4010 }
4011
4012 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
4013 return (Right.is(TT_CastRParen) ||
4014 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
4015 ? Style.SpacesInParensOptions.InCStyleCasts
4016 : Style.SpacesInParensOptions.Other;
4017 }
4018 if (Right.isOneOf(tok::semi, tok::comma))
4019 return false;
4020 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
4021 bool IsLightweightGeneric = Right.MatchingParen &&
4022 Right.MatchingParen->Next &&
4023 Right.MatchingParen->Next->is(tok::colon);
4024 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
4025 }
4026 if (Right.is(tok::less) && Left.is(tok::kw_template))
4027 return Style.SpaceAfterTemplateKeyword;
4028 if (Left.isOneOf(tok::exclaim, tok::tilde))
4029 return false;
4030 if (Left.is(tok::at) &&
4031 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
4032 tok::numeric_constant, tok::l_paren, tok::l_brace,
4033 tok::kw_true, tok::kw_false)) {
4034 return false;
4035 }
4036 if (Left.is(tok::colon))
4037 return Left.isNot(TT_ObjCMethodExpr);
4038 if (Left.is(tok::coloncolon))
4039 return false;
4040 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
4041 if (Style.Language == FormatStyle::LK_TextProto ||
4042 (Style.Language == FormatStyle::LK_Proto &&
4043 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
4044 // Format empty list as `<>`.
4045 if (Left.is(tok::less) && Right.is(tok::greater))
4046 return false;
4047 return !Style.Cpp11BracedListStyle;
4048 }
4049 // Don't attempt to format operator<(), as it is handled later.
4050 if (Right.isNot(TT_OverloadedOperatorLParen))
4051 return false;
4052 }
4053 if (Right.is(tok::ellipsis)) {
4054 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
4055 Left.Previous->is(tok::kw_case));
4056 }
4057 if (Left.is(tok::l_square) && Right.is(tok::amp))
4058 return Style.SpacesInSquareBrackets;
4059 if (Right.is(TT_PointerOrReference)) {
4060 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
4061 if (!Left.MatchingParen)
4062 return true;
4063 FormatToken *TokenBeforeMatchingParen =
4064 Left.MatchingParen->getPreviousNonComment();
4065 if (!TokenBeforeMatchingParen || Left.isNot(TT_TypeDeclarationParen))
4066 return true;
4067 }
4068 // Add a space if the previous token is a pointer qualifier or the closing
4069 // parenthesis of __attribute__(()) expression and the style requires spaces
4070 // after pointer qualifiers.
4071 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
4072 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4073 (Left.is(TT_AttributeRParen) ||
4074 Left.canBePointerOrReferenceQualifier())) {
4075 return true;
4076 }
4077 if (Left.Tok.isLiteral())
4078 return true;
4079 // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
4080 if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next &&
4081 Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
4082 return getTokenPointerOrReferenceAlignment(Right) !=
4083 FormatStyle::PAS_Left;
4084 }
4085 return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
4086 (getTokenPointerOrReferenceAlignment(Right) !=
4087 FormatStyle::PAS_Left ||
4088 (Line.IsMultiVariableDeclStmt &&
4089 (Left.NestingLevel == 0 ||
4090 (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
4091 }
4092 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
4093 (Left.isNot(TT_PointerOrReference) ||
4094 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
4095 !Line.IsMultiVariableDeclStmt))) {
4096 return true;
4097 }
4098 if (Left.is(TT_PointerOrReference)) {
4099 // Add a space if the next token is a pointer qualifier and the style
4100 // requires spaces before pointer qualifiers.
4101 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
4102 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4103 Right.canBePointerOrReferenceQualifier()) {
4104 return true;
4105 }
4106 // & 1
4107 if (Right.Tok.isLiteral())
4108 return true;
4109 // & /* comment
4110 if (Right.is(TT_BlockComment))
4111 return true;
4112 // foo() -> const Bar * override/final
4113 // S::foo() & noexcept/requires
4114 if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final, tok::kw_noexcept,
4115 TT_RequiresClause) &&
4116 Right.isNot(TT_StartOfName)) {
4117 return true;
4118 }
4119 // & {
4120 if (Right.is(tok::l_brace) && Right.is(BK_Block))
4121 return true;
4122 // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
4123 if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next &&
4124 Right.Next->is(TT_RangeBasedForLoopColon)) {
4125 return getTokenPointerOrReferenceAlignment(Left) !=
4126 FormatStyle::PAS_Right;
4127 }
4128 if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
4129 tok::l_paren)) {
4130 return false;
4131 }
4132 if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
4133 return false;
4134 // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
4135 // because it does not take into account nested scopes like lambdas.
4136 // In multi-variable declaration statements, attach */& to the variable
4137 // independently of the style. However, avoid doing it if we are in a nested
4138 // scope, e.g. lambda. We still need to special-case statements with
4139 // initializers.
4140 if (Line.IsMultiVariableDeclStmt &&
4141 (Left.NestingLevel == Line.First->NestingLevel ||
4142 ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
4143 startsWithInitStatement(Line)))) {
4144 return false;
4145 }
4146 return Left.Previous && !Left.Previous->isOneOf(
4147 tok::l_paren, tok::coloncolon, tok::l_square);
4148 }
4149 // Ensure right pointer alignment with ellipsis e.g. int *...P
4150 if (Left.is(tok::ellipsis) && Left.Previous &&
4151 Left.Previous->isPointerOrReference()) {
4152 return Style.PointerAlignment != FormatStyle::PAS_Right;
4153 }
4154
4155 if (Right.is(tok::star) && Left.is(tok::l_paren))
4156 return false;
4157 if (Left.is(tok::star) && Right.isPointerOrReference())
4158 return false;
4159 if (Right.isPointerOrReference()) {
4160 const FormatToken *Previous = &Left;
4161 while (Previous && Previous->isNot(tok::kw_operator)) {
4162 if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
4163 Previous = Previous->getPreviousNonComment();
4164 continue;
4165 }
4166 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
4167 Previous = Previous->MatchingParen->getPreviousNonComment();
4168 continue;
4169 }
4170 if (Previous->is(tok::coloncolon)) {
4171 Previous = Previous->getPreviousNonComment();
4172 continue;
4173 }
4174 break;
4175 }
4176 // Space between the type and the * in:
4177 // operator void*()
4178 // operator char*()
4179 // operator void const*()
4180 // operator void volatile*()
4181 // operator /*comment*/ const char*()
4182 // operator volatile /*comment*/ char*()
4183 // operator Foo*()
4184 // operator C<T>*()
4185 // operator std::Foo*()
4186 // operator C<T>::D<U>*()
4187 // dependent on PointerAlignment style.
4188 if (Previous) {
4189 if (Previous->endsSequence(tok::kw_operator))
4190 return Style.PointerAlignment != FormatStyle::PAS_Left;
4191 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
4192 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
4193 (Style.SpaceAroundPointerQualifiers ==
4194 FormatStyle::SAPQ_After) ||
4195 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
4196 }
4197 }
4198 }
4199 if (Style.isCSharp() && Left.is(Keywords.kw_is) && Right.is(tok::l_square))
4200 return true;
4201 const auto SpaceRequiredForArrayInitializerLSquare =
4202 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
4203 return Style.SpacesInContainerLiterals ||
4204 (Style.isProto() && !Style.Cpp11BracedListStyle &&
4205 LSquareTok.endsSequence(tok::l_square, tok::colon,
4206 TT_SelectorName));
4207 };
4208 if (Left.is(tok::l_square)) {
4209 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
4210 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
4211 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
4212 TT_LambdaLSquare) &&
4213 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
4214 }
4215 if (Right.is(tok::r_square)) {
4216 return Right.MatchingParen &&
4217 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
4218 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
4219 Style)) ||
4220 (Style.SpacesInSquareBrackets &&
4221 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
4222 TT_StructuredBindingLSquare,
4223 TT_LambdaLSquare)));
4224 }
4225 if (Right.is(tok::l_square) &&
4226 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4227 TT_DesignatedInitializerLSquare,
4228 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
4229 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
4230 !(Left.isNot(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
4231 Right.is(TT_ArraySubscriptLSquare))) {
4232 return false;
4233 }
4234 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
4235 return !Left.Children.empty(); // No spaces in "{}".
4236 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
4237 (Right.is(tok::r_brace) && Right.MatchingParen &&
4238 Right.MatchingParen->isNot(BK_Block))) {
4239 return !Style.Cpp11BracedListStyle || Style.SpacesInParensOptions.Other;
4240 }
4241 if (Left.is(TT_BlockComment)) {
4242 // No whitespace in x(/*foo=*/1), except for JavaScript.
4243 return Style.isJavaScript() || !Left.TokenText.ends_with("=*/");
4244 }
4245
4246 // Space between template and attribute.
4247 // e.g. template <typename T> [[nodiscard]] ...
4248 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
4249 return true;
4250 // Space before parentheses common for all languages
4251 if (Right.is(tok::l_paren)) {
4252 if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
4253 return spaceRequiredBeforeParens(Right);
4254 if (Left.isOneOf(TT_RequiresClause,
4255 TT_RequiresClauseInARequiresExpression)) {
4256 return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
4257 spaceRequiredBeforeParens(Right);
4258 }
4259 if (Left.is(TT_RequiresExpression)) {
4260 return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
4261 spaceRequiredBeforeParens(Right);
4262 }
4263 if (Left.is(TT_AttributeRParen) ||
4264 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
4265 return true;
4266 }
4267 if (Left.is(TT_ForEachMacro)) {
4268 return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
4269 spaceRequiredBeforeParens(Right);
4270 }
4271 if (Left.is(TT_IfMacro)) {
4272 return Style.SpaceBeforeParensOptions.AfterIfMacros ||
4273 spaceRequiredBeforeParens(Right);
4274 }
4275 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Custom &&
4276 Left.isOneOf(tok::kw_new, tok::kw_delete) &&
4277 Right.isNot(TT_OverloadedOperatorLParen) &&
4278 !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
4279 return Style.SpaceBeforeParensOptions.AfterPlacementOperator;
4280 }
4281 if (Line.Type == LT_ObjCDecl)
4282 return true;
4283 if (Left.is(tok::semi))
4284 return true;
4285 if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
4286 tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
4287 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
4288 Right.is(TT_ConditionLParen)) {
4289 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4290 spaceRequiredBeforeParens(Right);
4291 }
4292
4293 // TODO add Operator overloading specific Options to
4294 // SpaceBeforeParensOptions
4295 if (Right.is(TT_OverloadedOperatorLParen))
4296 return spaceRequiredBeforeParens(Right);
4297 // Function declaration or definition
4298 if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
4299 if (Line.mightBeFunctionDefinition()) {
4300 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4301 spaceRequiredBeforeParens(Right);
4302 } else {
4303 return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
4304 spaceRequiredBeforeParens(Right);
4305 }
4306 }
4307 // Lambda
4308 if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
4309 Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
4310 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4311 spaceRequiredBeforeParens(Right);
4312 }
4313 if (!Left.Previous || Left.Previous->isNot(tok::period)) {
4314 if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
4315 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4316 spaceRequiredBeforeParens(Right);
4317 }
4318 if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
4319 return ((!Line.MightBeFunctionDecl || !Left.Previous) &&
4320 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4321 spaceRequiredBeforeParens(Right);
4322 }
4323
4324 if (Left.is(tok::r_square) && Left.MatchingParen &&
4325 Left.MatchingParen->Previous &&
4326 Left.MatchingParen->Previous->is(tok::kw_delete)) {
4327 return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4328 spaceRequiredBeforeParens(Right);
4329 }
4330 }
4331 // Handle builtins like identifiers.
4332 if (Line.Type != LT_PreprocessorDirective &&
4333 (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
4334 return spaceRequiredBeforeParens(Right);
4335 }
4336 return false;
4337 }
4338 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
4339 return false;
4340 if (Right.is(TT_UnaryOperator)) {
4341 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
4342 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
4343 }
4344 // No space between the variable name and the initializer list.
4345 // A a1{1};
4346 // Verilog doesn't have such syntax, but it has word operators that are C++
4347 // identifiers like `a inside {b, c}`. So the rule is not applicable.
4348 if (!Style.isVerilog() &&
4349 (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
4350 tok::r_paren) ||
4351 Left.isSimpleTypeSpecifier()) &&
4352 Right.is(tok::l_brace) && Right.getNextNonComment() &&
4353 Right.isNot(BK_Block)) {
4354 return false;
4355 }
4356 if (Left.is(tok::period) || Right.is(tok::period))
4357 return false;
4358 // u#str, U#str, L#str, u8#str
4359 // uR#str, UR#str, LR#str, u8R#str
4360 if (Right.is(tok::hash) && Left.is(tok::identifier) &&
4361 (Left.TokenText == "L" || Left.TokenText == "u" ||
4362 Left.TokenText == "U" || Left.TokenText == "u8" ||
4363 Left.TokenText == "LR" || Left.TokenText == "uR" ||
4364 Left.TokenText == "UR" || Left.TokenText == "u8R")) {
4365 return false;
4366 }
4367 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
4368 Left.MatchingParen->Previous &&
4369 (Left.MatchingParen->Previous->is(tok::period) ||
4370 Left.MatchingParen->Previous->is(tok::coloncolon))) {
4371 // Java call to generic function with explicit type:
4372 // A.<B<C<...>>>DoSomething();
4373 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
4374 return false;
4375 }
4376 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
4377 return false;
4378 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
4379 // Objective-C dictionary literal -> no space after opening brace.
4380 return false;
4381 }
4382 if (Right.is(tok::r_brace) && Right.MatchingParen &&
4383 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
4384 // Objective-C dictionary literal -> no space before closing brace.
4385 return false;
4386 }
4387 if (Right.getType() == TT_TrailingAnnotation &&
4388 Right.isOneOf(tok::amp, tok::ampamp) &&
4389 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
4390 (!Right.Next || Right.Next->is(tok::semi))) {
4391 // Match const and volatile ref-qualifiers without any additional
4392 // qualifiers such as
4393 // void Fn() const &;
4394 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4395 }
4396
4397 return true;
4398 }
4399
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Right) const4400 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
4401 const FormatToken &Right) const {
4402 const FormatToken &Left = *Right.Previous;
4403
4404 // If the token is finalized don't touch it (as it could be in a
4405 // clang-format-off section).
4406 if (Left.Finalized)
4407 return Right.hasWhitespaceBefore();
4408
4409 // Never ever merge two words.
4410 if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
4411 return true;
4412
4413 // Leave a space between * and /* to avoid C4138 `comment end` found outside
4414 // of comment.
4415 if (Left.is(tok::star) && Right.is(tok::comment))
4416 return true;
4417
4418 if (Style.isCpp()) {
4419 if (Left.is(TT_OverloadedOperator) &&
4420 Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) {
4421 return true;
4422 }
4423 // Space between UDL and dot: auto b = 4s .count();
4424 if (Right.is(tok::period) && Left.is(tok::numeric_constant))
4425 return true;
4426 // Space between import <iostream>.
4427 // or import .....;
4428 if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
4429 return true;
4430 // Space between `module :` and `import :`.
4431 if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
4432 Right.is(TT_ModulePartitionColon)) {
4433 return true;
4434 }
4435 // No space between import foo:bar but keep a space between import :bar;
4436 if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
4437 return false;
4438 // No space between :bar;
4439 if (Left.is(TT_ModulePartitionColon) &&
4440 Right.isOneOf(tok::identifier, tok::kw_private)) {
4441 return false;
4442 }
4443 if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
4444 Line.First->is(Keywords.kw_import)) {
4445 return false;
4446 }
4447 // Space in __attribute__((attr)) ::type.
4448 if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
4449 Right.is(tok::coloncolon)) {
4450 return true;
4451 }
4452
4453 if (Left.is(tok::kw_operator))
4454 return Right.is(tok::coloncolon);
4455 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
4456 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
4457 return true;
4458 }
4459 if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
4460 Right.is(TT_TemplateOpener)) {
4461 return true;
4462 }
4463 } else if (Style.isProto()) {
4464 if (Right.is(tok::period) &&
4465 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
4466 Keywords.kw_repeated, Keywords.kw_extend)) {
4467 return true;
4468 }
4469 if (Right.is(tok::l_paren) &&
4470 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
4471 return true;
4472 }
4473 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
4474 return true;
4475 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
4476 if (Left.is(tok::slash) || Right.is(tok::slash))
4477 return false;
4478 if (Left.MatchingParen &&
4479 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
4480 Right.isOneOf(tok::l_brace, tok::less)) {
4481 return !Style.Cpp11BracedListStyle;
4482 }
4483 // A percent is probably part of a formatting specification, such as %lld.
4484 if (Left.is(tok::percent))
4485 return false;
4486 // Preserve the existence of a space before a percent for cases like 0x%04x
4487 // and "%d %d"
4488 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
4489 return Right.hasWhitespaceBefore();
4490 } else if (Style.isJson()) {
4491 if (Right.is(tok::colon) && Left.is(tok::string_literal))
4492 return Style.SpaceBeforeJsonColon;
4493 } else if (Style.isCSharp()) {
4494 // Require spaces around '{' and before '}' unless they appear in
4495 // interpolated strings. Interpolated strings are merged into a single token
4496 // so cannot have spaces inserted by this function.
4497
4498 // No space between 'this' and '['
4499 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
4500 return false;
4501
4502 // No space between 'new' and '('
4503 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
4504 return false;
4505
4506 // Space before { (including space within '{ {').
4507 if (Right.is(tok::l_brace))
4508 return true;
4509
4510 // Spaces inside braces.
4511 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
4512 return true;
4513
4514 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
4515 return true;
4516
4517 // Spaces around '=>'.
4518 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
4519 return true;
4520
4521 // No spaces around attribute target colons
4522 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
4523 return false;
4524
4525 // space between type and variable e.g. Dictionary<string,string> foo;
4526 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
4527 return true;
4528
4529 // spaces inside square brackets.
4530 if (Left.is(tok::l_square) || Right.is(tok::r_square))
4531 return Style.SpacesInSquareBrackets;
4532
4533 // No space before ? in nullable types.
4534 if (Right.is(TT_CSharpNullable))
4535 return false;
4536
4537 // No space before null forgiving '!'.
4538 if (Right.is(TT_NonNullAssertion))
4539 return false;
4540
4541 // No space between consecutive commas '[,,]'.
4542 if (Left.is(tok::comma) && Right.is(tok::comma))
4543 return false;
4544
4545 // space after var in `var (key, value)`
4546 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
4547 return true;
4548
4549 // space between keywords and paren e.g. "using ("
4550 if (Right.is(tok::l_paren)) {
4551 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
4552 Keywords.kw_lock)) {
4553 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4554 spaceRequiredBeforeParens(Right);
4555 }
4556 }
4557
4558 // space between method modifier and opening parenthesis of a tuple return
4559 // type
4560 if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
4561 tok::kw_virtual, tok::kw_extern, tok::kw_static,
4562 Keywords.kw_internal, Keywords.kw_abstract,
4563 Keywords.kw_sealed, Keywords.kw_override,
4564 Keywords.kw_async, Keywords.kw_unsafe) &&
4565 Right.is(tok::l_paren)) {
4566 return true;
4567 }
4568 } else if (Style.isJavaScript()) {
4569 if (Left.is(TT_FatArrow))
4570 return true;
4571 // for await ( ...
4572 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
4573 Left.Previous->is(tok::kw_for)) {
4574 return true;
4575 }
4576 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
4577 Right.MatchingParen) {
4578 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
4579 // An async arrow function, for example: `x = async () => foo();`,
4580 // as opposed to calling a function called async: `x = async();`
4581 if (Next && Next->is(TT_FatArrow))
4582 return true;
4583 }
4584 if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
4585 (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
4586 return false;
4587 }
4588 // In tagged template literals ("html`bar baz`"), there is no space between
4589 // the tag identifier and the template string.
4590 if (Keywords.IsJavaScriptIdentifier(Left,
4591 /* AcceptIdentifierName= */ false) &&
4592 Right.is(TT_TemplateString)) {
4593 return false;
4594 }
4595 if (Right.is(tok::star) &&
4596 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
4597 return false;
4598 }
4599 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
4600 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
4601 Keywords.kw_extends, Keywords.kw_implements)) {
4602 return true;
4603 }
4604 if (Right.is(tok::l_paren)) {
4605 // JS methods can use some keywords as names (e.g. `delete()`).
4606 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
4607 return false;
4608 // Valid JS method names can include keywords, e.g. `foo.delete()` or
4609 // `bar.instanceof()`. Recognize call positions by preceding period.
4610 if (Left.Previous && Left.Previous->is(tok::period) &&
4611 Left.Tok.getIdentifierInfo()) {
4612 return false;
4613 }
4614 // Additional unary JavaScript operators that need a space after.
4615 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
4616 tok::kw_void)) {
4617 return true;
4618 }
4619 }
4620 // `foo as const;` casts into a const type.
4621 if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
4622 return false;
4623 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
4624 tok::kw_const) ||
4625 // "of" is only a keyword if it appears after another identifier
4626 // (e.g. as "const x of y" in a for loop), or after a destructuring
4627 // operation (const [x, y] of z, const {a, b} of c).
4628 (Left.is(Keywords.kw_of) && Left.Previous &&
4629 (Left.Previous->is(tok::identifier) ||
4630 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
4631 (!Left.Previous || Left.Previous->isNot(tok::period))) {
4632 return true;
4633 }
4634 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
4635 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
4636 return false;
4637 }
4638 if (Left.is(Keywords.kw_as) &&
4639 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
4640 return true;
4641 }
4642 if (Left.is(tok::kw_default) && Left.Previous &&
4643 Left.Previous->is(tok::kw_export)) {
4644 return true;
4645 }
4646 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
4647 return true;
4648 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
4649 return false;
4650 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
4651 return false;
4652 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
4653 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
4654 return false;
4655 }
4656 if (Left.is(tok::ellipsis))
4657 return false;
4658 if (Left.is(TT_TemplateCloser) &&
4659 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
4660 Keywords.kw_implements, Keywords.kw_extends)) {
4661 // Type assertions ('<type>expr') are not followed by whitespace. Other
4662 // locations that should have whitespace following are identified by the
4663 // above set of follower tokens.
4664 return false;
4665 }
4666 if (Right.is(TT_NonNullAssertion))
4667 return false;
4668 if (Left.is(TT_NonNullAssertion) &&
4669 Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
4670 return true; // "x! as string", "x! in y"
4671 }
4672 } else if (Style.Language == FormatStyle::LK_Java) {
4673 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
4674 return true;
4675 // spaces inside square brackets.
4676 if (Left.is(tok::l_square) || Right.is(tok::r_square))
4677 return Style.SpacesInSquareBrackets;
4678
4679 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
4680 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4681 spaceRequiredBeforeParens(Right);
4682 }
4683 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
4684 tok::kw_protected) ||
4685 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
4686 Keywords.kw_native)) &&
4687 Right.is(TT_TemplateOpener)) {
4688 return true;
4689 }
4690 } else if (Style.isVerilog()) {
4691 // An escaped identifier ends with whitespace.
4692 if (Style.isVerilog() && Left.is(tok::identifier) &&
4693 Left.TokenText[0] == '\\') {
4694 return true;
4695 }
4696 // Add space between things in a primitive's state table unless in a
4697 // transition like `(0?)`.
4698 if ((Left.is(TT_VerilogTableItem) &&
4699 !Right.isOneOf(tok::r_paren, tok::semi)) ||
4700 (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
4701 const FormatToken *Next = Right.getNextNonComment();
4702 return !(Next && Next->is(tok::r_paren));
4703 }
4704 // Don't add space within a delay like `#0`.
4705 if (Left.isNot(TT_BinaryOperator) &&
4706 Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
4707 return false;
4708 }
4709 // Add space after a delay.
4710 if (Right.isNot(tok::semi) &&
4711 (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
4712 Left.endsSequence(tok::numeric_constant,
4713 Keywords.kw_verilogHashHash) ||
4714 (Left.is(tok::r_paren) && Left.MatchingParen &&
4715 Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
4716 return true;
4717 }
4718 // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
4719 // literal like `'{}`.
4720 if (Left.is(Keywords.kw_apostrophe) ||
4721 (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
4722 return false;
4723 }
4724 // Add spaces around the implication operator `->`.
4725 if (Left.is(tok::arrow) || Right.is(tok::arrow))
4726 return true;
4727 // Don't add spaces between two at signs. Like in a coverage event.
4728 // Don't add spaces between at and a sensitivity list like
4729 // `@(posedge clk)`.
4730 if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at))
4731 return false;
4732 // Add space between the type name and dimension like `logic [1:0]`.
4733 if (Right.is(tok::l_square) &&
4734 Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
4735 return true;
4736 }
4737 // In a tagged union expression, there should be a space after the tag.
4738 if (Right.isOneOf(tok::period, Keywords.kw_apostrophe) &&
4739 Keywords.isVerilogIdentifier(Left) && Left.getPreviousNonComment() &&
4740 Left.getPreviousNonComment()->is(Keywords.kw_tagged)) {
4741 return true;
4742 }
4743 // Don't add spaces between a casting type and the quote or repetition count
4744 // and the brace. The case of tagged union expressions is handled by the
4745 // previous rule.
4746 if ((Right.is(Keywords.kw_apostrophe) ||
4747 (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
4748 !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
4749 Keywords.isVerilogWordOperator(Left)) &&
4750 (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
4751 tok::numeric_constant) ||
4752 Keywords.isWordLike(Left))) {
4753 return false;
4754 }
4755 // Don't add spaces in imports like `import foo::*;`.
4756 if ((Right.is(tok::star) && Left.is(tok::coloncolon)) ||
4757 (Left.is(tok::star) && Right.is(tok::semi))) {
4758 return false;
4759 }
4760 // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
4761 if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
4762 return true;
4763 // Add space before drive strength like in `wire (strong1, pull0)`.
4764 if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength))
4765 return true;
4766 // Don't add space in a streaming concatenation like `{>>{j}}`.
4767 if ((Left.is(tok::l_brace) &&
4768 Right.isOneOf(tok::lessless, tok::greatergreater)) ||
4769 (Left.endsSequence(tok::lessless, tok::l_brace) ||
4770 Left.endsSequence(tok::greatergreater, tok::l_brace))) {
4771 return false;
4772 }
4773 }
4774 if (Left.is(TT_ImplicitStringLiteral))
4775 return Right.hasWhitespaceBefore();
4776 if (Line.Type == LT_ObjCMethodDecl) {
4777 if (Left.is(TT_ObjCMethodSpecifier))
4778 return true;
4779 if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) &&
4780 canBeObjCSelectorComponent(Right)) {
4781 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
4782 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
4783 // method declaration.
4784 return false;
4785 }
4786 }
4787 if (Line.Type == LT_ObjCProperty &&
4788 (Right.is(tok::equal) || Left.is(tok::equal))) {
4789 return false;
4790 }
4791
4792 if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow))
4793 return true;
4794
4795 if (Left.is(tok::comma) && Right.isNot(TT_OverloadedOperatorLParen) &&
4796 // In an unexpanded macro call we only find the parentheses and commas
4797 // in a line; the commas and closing parenthesis do not require a space.
4798 (Left.Children.empty() || !Left.MacroParent)) {
4799 return true;
4800 }
4801 if (Right.is(tok::comma))
4802 return false;
4803 if (Right.is(TT_ObjCBlockLParen))
4804 return true;
4805 if (Right.is(TT_CtorInitializerColon))
4806 return Style.SpaceBeforeCtorInitializerColon;
4807 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
4808 return false;
4809 if (Right.is(TT_RangeBasedForLoopColon) &&
4810 !Style.SpaceBeforeRangeBasedForLoopColon) {
4811 return false;
4812 }
4813 if (Left.is(TT_BitFieldColon)) {
4814 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4815 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
4816 }
4817 if (Right.is(tok::colon)) {
4818 if (Right.is(TT_CaseLabelColon))
4819 return Style.SpaceBeforeCaseColon;
4820 if (Right.is(TT_GotoLabelColon))
4821 return false;
4822 // `private:` and `public:`.
4823 if (!Right.getNextNonComment())
4824 return false;
4825 if (Right.is(TT_ObjCMethodExpr))
4826 return false;
4827 if (Left.is(tok::question))
4828 return false;
4829 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
4830 return false;
4831 if (Right.is(TT_DictLiteral))
4832 return Style.SpacesInContainerLiterals;
4833 if (Right.is(TT_AttributeColon))
4834 return false;
4835 if (Right.is(TT_CSharpNamedArgumentColon))
4836 return false;
4837 if (Right.is(TT_GenericSelectionColon))
4838 return false;
4839 if (Right.is(TT_BitFieldColon)) {
4840 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4841 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
4842 }
4843 return true;
4844 }
4845 // Do not merge "- -" into "--".
4846 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
4847 Right.isOneOf(tok::minus, tok::minusminus)) ||
4848 (Left.isOneOf(tok::plus, tok::plusplus) &&
4849 Right.isOneOf(tok::plus, tok::plusplus))) {
4850 return true;
4851 }
4852 if (Left.is(TT_UnaryOperator)) {
4853 if (Right.isNot(tok::l_paren)) {
4854 // The alternative operators for ~ and ! are "compl" and "not".
4855 // If they are used instead, we do not want to combine them with
4856 // the token to the right, unless that is a left paren.
4857 if (Left.is(tok::exclaim) && Left.TokenText == "not")
4858 return true;
4859 if (Left.is(tok::tilde) && Left.TokenText == "compl")
4860 return true;
4861 // Lambda captures allow for a lone &, so "&]" needs to be properly
4862 // handled.
4863 if (Left.is(tok::amp) && Right.is(tok::r_square))
4864 return Style.SpacesInSquareBrackets;
4865 }
4866 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
4867 Right.is(TT_BinaryOperator);
4868 }
4869
4870 // If the next token is a binary operator or a selector name, we have
4871 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
4872 if (Left.is(TT_CastRParen)) {
4873 return Style.SpaceAfterCStyleCast ||
4874 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
4875 }
4876
4877 auto ShouldAddSpacesInAngles = [this, &Right]() {
4878 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
4879 return true;
4880 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
4881 return Right.hasWhitespaceBefore();
4882 return false;
4883 };
4884
4885 if (Left.is(tok::greater) && Right.is(tok::greater)) {
4886 if (Style.Language == FormatStyle::LK_TextProto ||
4887 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
4888 return !Style.Cpp11BracedListStyle;
4889 }
4890 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
4891 ((Style.Standard < FormatStyle::LS_Cpp11) ||
4892 ShouldAddSpacesInAngles());
4893 }
4894 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
4895 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
4896 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
4897 return false;
4898 }
4899 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
4900 Right.getPrecedence() == prec::Assignment) {
4901 return false;
4902 }
4903 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
4904 (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
4905 return false;
4906 }
4907 if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
4908 // Generally don't remove existing spaces between an identifier and "::".
4909 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
4910 // this turns out to be too lenient, add analysis of the identifier itself.
4911 return Right.hasWhitespaceBefore();
4912 }
4913 if (Right.is(tok::coloncolon) &&
4914 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
4915 // Put a space between < and :: in vector< ::std::string >
4916 return (Left.is(TT_TemplateOpener) &&
4917 ((Style.Standard < FormatStyle::LS_Cpp11) ||
4918 ShouldAddSpacesInAngles())) ||
4919 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
4920 tok::kw___super, TT_TemplateOpener,
4921 TT_TemplateCloser)) ||
4922 (Left.is(tok::l_paren) && Style.SpacesInParensOptions.Other);
4923 }
4924 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
4925 return ShouldAddSpacesInAngles();
4926 // Space before TT_StructuredBindingLSquare.
4927 if (Right.is(TT_StructuredBindingLSquare)) {
4928 return !Left.isOneOf(tok::amp, tok::ampamp) ||
4929 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
4930 }
4931 // Space before & or && following a TT_StructuredBindingLSquare.
4932 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
4933 Right.isOneOf(tok::amp, tok::ampamp)) {
4934 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4935 }
4936 if ((Right.is(TT_BinaryOperator) && Left.isNot(tok::l_paren)) ||
4937 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
4938 Right.isNot(tok::r_paren))) {
4939 return true;
4940 }
4941 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
4942 Left.MatchingParen &&
4943 Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
4944 return false;
4945 }
4946 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
4947 Line.Type == LT_ImportStatement) {
4948 return true;
4949 }
4950 if (Right.is(TT_TrailingUnaryOperator))
4951 return false;
4952 if (Left.is(TT_RegexLiteral))
4953 return false;
4954 return spaceRequiredBetween(Line, Left, Right);
4955 }
4956
4957 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)4958 static bool isAllmanBrace(const FormatToken &Tok) {
4959 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4960 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
4961 }
4962
4963 // Returns 'true' if 'Tok' is a function argument.
IsFunctionArgument(const FormatToken & Tok)4964 static bool IsFunctionArgument(const FormatToken &Tok) {
4965 return Tok.MatchingParen && Tok.MatchingParen->Next &&
4966 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
4967 }
4968
4969 static bool
isItAnEmptyLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)4970 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
4971 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
4972 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
4973 }
4974
isAllmanLambdaBrace(const FormatToken & Tok)4975 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
4976 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4977 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
4978 }
4979
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const4980 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
4981 const FormatToken &Right) const {
4982 const FormatToken &Left = *Right.Previous;
4983 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
4984 return true;
4985
4986 if (Style.isCSharp()) {
4987 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
4988 Style.BraceWrapping.AfterFunction) {
4989 return true;
4990 }
4991 if (Right.is(TT_CSharpNamedArgumentColon) ||
4992 Left.is(TT_CSharpNamedArgumentColon)) {
4993 return false;
4994 }
4995 if (Right.is(TT_CSharpGenericTypeConstraint))
4996 return true;
4997 if (Right.Next && Right.Next->is(TT_FatArrow) &&
4998 (Right.is(tok::numeric_constant) ||
4999 (Right.is(tok::identifier) && Right.TokenText == "_"))) {
5000 return true;
5001 }
5002
5003 // Break after C# [...] and before public/protected/private/internal.
5004 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
5005 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
5006 Right.is(Keywords.kw_internal))) {
5007 return true;
5008 }
5009 // Break between ] and [ but only when there are really 2 attributes.
5010 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
5011 Left.is(tok::r_square) && Right.is(tok::l_square)) {
5012 return true;
5013 }
5014
5015 } else if (Style.isJavaScript()) {
5016 // FIXME: This might apply to other languages and token kinds.
5017 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
5018 Left.Previous->is(tok::string_literal)) {
5019 return true;
5020 }
5021 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
5022 Left.Previous && Left.Previous->is(tok::equal) &&
5023 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
5024 tok::kw_const) &&
5025 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
5026 // above.
5027 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
5028 // Object literals on the top level of a file are treated as "enum-style".
5029 // Each key/value pair is put on a separate line, instead of bin-packing.
5030 return true;
5031 }
5032 if (Left.is(tok::l_brace) && Line.Level == 0 &&
5033 (Line.startsWith(tok::kw_enum) ||
5034 Line.startsWith(tok::kw_const, tok::kw_enum) ||
5035 Line.startsWith(tok::kw_export, tok::kw_enum) ||
5036 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
5037 // JavaScript top-level enum key/value pairs are put on separate lines
5038 // instead of bin-packing.
5039 return true;
5040 }
5041 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
5042 Left.Previous->is(TT_FatArrow)) {
5043 // JS arrow function (=> {...}).
5044 switch (Style.AllowShortLambdasOnASingleLine) {
5045 case FormatStyle::SLS_All:
5046 return false;
5047 case FormatStyle::SLS_None:
5048 return true;
5049 case FormatStyle::SLS_Empty:
5050 return !Left.Children.empty();
5051 case FormatStyle::SLS_Inline:
5052 // allow one-lining inline (e.g. in function call args) and empty arrow
5053 // functions.
5054 return (Left.NestingLevel == 0 && Line.Level == 0) &&
5055 !Left.Children.empty();
5056 }
5057 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
5058 }
5059
5060 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
5061 !Left.Children.empty()) {
5062 // Support AllowShortFunctionsOnASingleLine for JavaScript.
5063 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
5064 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
5065 (Left.NestingLevel == 0 && Line.Level == 0 &&
5066 Style.AllowShortFunctionsOnASingleLine &
5067 FormatStyle::SFS_InlineOnly);
5068 }
5069 } else if (Style.Language == FormatStyle::LK_Java) {
5070 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
5071 Right.Next->is(tok::string_literal)) {
5072 return true;
5073 }
5074 } else if (Style.isVerilog()) {
5075 // Break between assignments.
5076 if (Left.is(TT_VerilogAssignComma))
5077 return true;
5078 // Break between ports of different types.
5079 if (Left.is(TT_VerilogTypeComma))
5080 return true;
5081 // Break between ports in a module instantiation and after the parameter
5082 // list.
5083 if (Style.VerilogBreakBetweenInstancePorts &&
5084 (Left.is(TT_VerilogInstancePortComma) ||
5085 (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) &&
5086 Left.MatchingParen &&
5087 Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) {
5088 return true;
5089 }
5090 // Break after labels. In Verilog labels don't have the 'case' keyword, so
5091 // it is hard to identify them in UnwrappedLineParser.
5092 if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
5093 return true;
5094 } else if (Style.BreakAdjacentStringLiterals &&
5095 (Style.isCpp() || Style.isProto() ||
5096 Style.Language == FormatStyle::LK_TableGen)) {
5097 if (Left.isStringLiteral() && Right.isStringLiteral())
5098 return true;
5099 }
5100
5101 // Basic JSON newline processing.
5102 if (Style.isJson()) {
5103 // Always break after a JSON record opener.
5104 // {
5105 // }
5106 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
5107 return true;
5108 // Always break after a JSON array opener based on BreakArrays.
5109 if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
5110 Right.isNot(tok::r_square)) ||
5111 Left.is(tok::comma)) {
5112 if (Right.is(tok::l_brace))
5113 return true;
5114 // scan to the right if an we see an object or an array inside
5115 // then break.
5116 for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
5117 if (Tok->isOneOf(tok::l_brace, tok::l_square))
5118 return true;
5119 if (Tok->isOneOf(tok::r_brace, tok::r_square))
5120 break;
5121 }
5122 return Style.BreakArrays;
5123 }
5124 }
5125
5126 if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
5127 Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
5128 return true;
5129 }
5130
5131 // If the last token before a '}', ']', or ')' is a comma or a trailing
5132 // comment, the intention is to insert a line break after it in order to make
5133 // shuffling around entries easier. Import statements, especially in
5134 // JavaScript, can be an exception to this rule.
5135 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
5136 const FormatToken *BeforeClosingBrace = nullptr;
5137 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
5138 (Style.isJavaScript() && Left.is(tok::l_paren))) &&
5139 Left.isNot(BK_Block) && Left.MatchingParen) {
5140 BeforeClosingBrace = Left.MatchingParen->Previous;
5141 } else if (Right.MatchingParen &&
5142 (Right.MatchingParen->isOneOf(tok::l_brace,
5143 TT_ArrayInitializerLSquare) ||
5144 (Style.isJavaScript() &&
5145 Right.MatchingParen->is(tok::l_paren)))) {
5146 BeforeClosingBrace = &Left;
5147 }
5148 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
5149 BeforeClosingBrace->isTrailingComment())) {
5150 return true;
5151 }
5152 }
5153
5154 if (Right.is(tok::comment)) {
5155 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
5156 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
5157 }
5158 if (Left.isTrailingComment())
5159 return true;
5160 if (Left.IsUnterminatedLiteral)
5161 return true;
5162 if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
5163 Right.Next->is(tok::string_literal)) {
5164 return true;
5165 }
5166 if (Right.is(TT_RequiresClause)) {
5167 switch (Style.RequiresClausePosition) {
5168 case FormatStyle::RCPS_OwnLine:
5169 case FormatStyle::RCPS_WithFollowing:
5170 return true;
5171 default:
5172 break;
5173 }
5174 }
5175 // Can break after template<> declaration
5176 if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
5177 Left.MatchingParen->NestingLevel == 0) {
5178 // Put concepts on the next line e.g.
5179 // template<typename T>
5180 // concept ...
5181 if (Right.is(tok::kw_concept))
5182 return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
5183 return Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes;
5184 }
5185 if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
5186 switch (Style.RequiresClausePosition) {
5187 case FormatStyle::RCPS_OwnLine:
5188 case FormatStyle::RCPS_WithPreceding:
5189 return true;
5190 default:
5191 break;
5192 }
5193 }
5194 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
5195 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
5196 (Left.is(TT_CtorInitializerComma) ||
5197 Right.is(TT_CtorInitializerColon))) {
5198 return true;
5199 }
5200
5201 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5202 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
5203 return true;
5204 }
5205 }
5206 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
5207 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
5208 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
5209 return true;
5210 }
5211 if (Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly) {
5212 if ((Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon ||
5213 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) &&
5214 Right.is(TT_CtorInitializerColon)) {
5215 return true;
5216 }
5217
5218 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5219 Left.is(TT_CtorInitializerColon)) {
5220 return true;
5221 }
5222 }
5223 // Break only if we have multiple inheritance.
5224 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
5225 Right.is(TT_InheritanceComma)) {
5226 return true;
5227 }
5228 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
5229 Left.is(TT_InheritanceComma)) {
5230 return true;
5231 }
5232 if (Right.is(tok::string_literal) && Right.TokenText.starts_with("R\"")) {
5233 // Multiline raw string literals are special wrt. line breaks. The author
5234 // has made a deliberate choice and might have aligned the contents of the
5235 // string literal accordingly. Thus, we try keep existing line breaks.
5236 return Right.IsMultiline && Right.NewlinesBefore > 0;
5237 }
5238 if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous &&
5239 Left.Previous->is(tok::equal))) &&
5240 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
5241 // Don't put enums or option definitions onto single lines in protocol
5242 // buffers.
5243 return true;
5244 }
5245 if (Right.is(TT_InlineASMBrace))
5246 return Right.HasUnescapedNewline;
5247
5248 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
5249 auto *FirstNonComment = Line.getFirstNonComment();
5250 bool AccessSpecifier =
5251 FirstNonComment &&
5252 FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
5253 tok::kw_private, tok::kw_protected);
5254
5255 if (Style.BraceWrapping.AfterEnum) {
5256 if (Line.startsWith(tok::kw_enum) ||
5257 Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
5258 return true;
5259 }
5260 // Ensure BraceWrapping for `public enum A {`.
5261 if (AccessSpecifier && FirstNonComment->Next &&
5262 FirstNonComment->Next->is(tok::kw_enum)) {
5263 return true;
5264 }
5265 }
5266
5267 // Ensure BraceWrapping for `public interface A {`.
5268 if (Style.BraceWrapping.AfterClass &&
5269 ((AccessSpecifier && FirstNonComment->Next &&
5270 FirstNonComment->Next->is(Keywords.kw_interface)) ||
5271 Line.startsWith(Keywords.kw_interface))) {
5272 return true;
5273 }
5274
5275 // Don't attempt to interpret struct return types as structs.
5276 if (Right.isNot(TT_FunctionLBrace)) {
5277 return (Line.startsWith(tok::kw_class) &&
5278 Style.BraceWrapping.AfterClass) ||
5279 (Line.startsWith(tok::kw_struct) &&
5280 Style.BraceWrapping.AfterStruct);
5281 }
5282 }
5283
5284 if (Left.is(TT_ObjCBlockLBrace) &&
5285 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
5286 return true;
5287 }
5288
5289 // Ensure wrapping after __attribute__((XX)) and @interface etc.
5290 if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
5291 Right.is(TT_ObjCDecl)) {
5292 return true;
5293 }
5294
5295 if (Left.is(TT_LambdaLBrace)) {
5296 if (IsFunctionArgument(Left) &&
5297 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
5298 return false;
5299 }
5300
5301 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
5302 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
5303 (!Left.Children.empty() &&
5304 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
5305 return true;
5306 }
5307 }
5308
5309 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
5310 (Left.isPointerOrReference() || Left.is(TT_TemplateCloser))) {
5311 return true;
5312 }
5313
5314 // Put multiple Java annotation on a new line.
5315 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
5316 Left.is(TT_LeadingJavaAnnotation) &&
5317 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
5318 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
5319 return true;
5320 }
5321
5322 if (Right.is(TT_ProtoExtensionLSquare))
5323 return true;
5324
5325 // In text proto instances if a submessage contains at least 2 entries and at
5326 // least one of them is a submessage, like A { ... B { ... } ... },
5327 // put all of the entries of A on separate lines by forcing the selector of
5328 // the submessage B to be put on a newline.
5329 //
5330 // Example: these can stay on one line:
5331 // a { scalar_1: 1 scalar_2: 2 }
5332 // a { b { key: value } }
5333 //
5334 // and these entries need to be on a new line even if putting them all in one
5335 // line is under the column limit:
5336 // a {
5337 // scalar: 1
5338 // b { key: value }
5339 // }
5340 //
5341 // We enforce this by breaking before a submessage field that has previous
5342 // siblings, *and* breaking before a field that follows a submessage field.
5343 //
5344 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
5345 // the TT_SelectorName there, but we don't want to break inside the brackets.
5346 //
5347 // Another edge case is @submessage { key: value }, which is a common
5348 // substitution placeholder. In this case we want to keep `@` and `submessage`
5349 // together.
5350 //
5351 // We ensure elsewhere that extensions are always on their own line.
5352 if (Style.isProto() && Right.is(TT_SelectorName) &&
5353 Right.isNot(tok::r_square) && Right.Next) {
5354 // Keep `@submessage` together in:
5355 // @submessage { key: value }
5356 if (Left.is(tok::at))
5357 return false;
5358 // Look for the scope opener after selector in cases like:
5359 // selector { ...
5360 // selector: { ...
5361 // selector: @base { ...
5362 FormatToken *LBrace = Right.Next;
5363 if (LBrace && LBrace->is(tok::colon)) {
5364 LBrace = LBrace->Next;
5365 if (LBrace && LBrace->is(tok::at)) {
5366 LBrace = LBrace->Next;
5367 if (LBrace)
5368 LBrace = LBrace->Next;
5369 }
5370 }
5371 if (LBrace &&
5372 // The scope opener is one of {, [, <:
5373 // selector { ... }
5374 // selector [ ... ]
5375 // selector < ... >
5376 //
5377 // In case of selector { ... }, the l_brace is TT_DictLiteral.
5378 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
5379 // so we check for immediately following r_brace.
5380 ((LBrace->is(tok::l_brace) &&
5381 (LBrace->is(TT_DictLiteral) ||
5382 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
5383 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
5384 // If Left.ParameterCount is 0, then this submessage entry is not the
5385 // first in its parent submessage, and we want to break before this entry.
5386 // If Left.ParameterCount is greater than 0, then its parent submessage
5387 // might contain 1 or more entries and we want to break before this entry
5388 // if it contains at least 2 entries. We deal with this case later by
5389 // detecting and breaking before the next entry in the parent submessage.
5390 if (Left.ParameterCount == 0)
5391 return true;
5392 // However, if this submessage is the first entry in its parent
5393 // submessage, Left.ParameterCount might be 1 in some cases.
5394 // We deal with this case later by detecting an entry
5395 // following a closing paren of this submessage.
5396 }
5397
5398 // If this is an entry immediately following a submessage, it will be
5399 // preceded by a closing paren of that submessage, like in:
5400 // left---. .---right
5401 // v v
5402 // sub: { ... } key: value
5403 // If there was a comment between `}` an `key` above, then `key` would be
5404 // put on a new line anyways.
5405 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
5406 return true;
5407 }
5408
5409 return false;
5410 }
5411
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const5412 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
5413 const FormatToken &Right) const {
5414 const FormatToken &Left = *Right.Previous;
5415 // Language-specific stuff.
5416 if (Style.isCSharp()) {
5417 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
5418 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
5419 return false;
5420 }
5421 // Only break after commas for generic type constraints.
5422 if (Line.First->is(TT_CSharpGenericTypeConstraint))
5423 return Left.is(TT_CSharpGenericTypeConstraintComma);
5424 // Keep nullable operators attached to their identifiers.
5425 if (Right.is(TT_CSharpNullable))
5426 return false;
5427 } else if (Style.Language == FormatStyle::LK_Java) {
5428 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5429 Keywords.kw_implements)) {
5430 return false;
5431 }
5432 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5433 Keywords.kw_implements)) {
5434 return true;
5435 }
5436 } else if (Style.isJavaScript()) {
5437 const FormatToken *NonComment = Right.getPreviousNonComment();
5438 if (NonComment &&
5439 NonComment->isOneOf(
5440 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
5441 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
5442 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
5443 Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
5444 Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
5445 Keywords.kw_await)) {
5446 return false; // Otherwise automatic semicolon insertion would trigger.
5447 }
5448 if (Right.NestingLevel == 0 &&
5449 (Left.Tok.getIdentifierInfo() ||
5450 Left.isOneOf(tok::r_square, tok::r_paren)) &&
5451 Right.isOneOf(tok::l_square, tok::l_paren)) {
5452 return false; // Otherwise automatic semicolon insertion would trigger.
5453 }
5454 if (NonComment && NonComment->is(tok::identifier) &&
5455 NonComment->TokenText == "asserts") {
5456 return false;
5457 }
5458 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
5459 return false;
5460 if (Left.is(TT_JsTypeColon))
5461 return true;
5462 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
5463 if (Left.is(tok::exclaim) && Right.is(tok::colon))
5464 return false;
5465 // Look for is type annotations like:
5466 // function f(): a is B { ... }
5467 // Do not break before is in these cases.
5468 if (Right.is(Keywords.kw_is)) {
5469 const FormatToken *Next = Right.getNextNonComment();
5470 // If `is` is followed by a colon, it's likely that it's a dict key, so
5471 // ignore it for this check.
5472 // For example this is common in Polymer:
5473 // Polymer({
5474 // is: 'name',
5475 // ...
5476 // });
5477 if (!Next || Next->isNot(tok::colon))
5478 return false;
5479 }
5480 if (Left.is(Keywords.kw_in))
5481 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
5482 if (Right.is(Keywords.kw_in))
5483 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5484 if (Right.is(Keywords.kw_as))
5485 return false; // must not break before as in 'x as type' casts
5486 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
5487 // extends and infer can appear as keywords in conditional types:
5488 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
5489 // do not break before them, as the expressions are subject to ASI.
5490 return false;
5491 }
5492 if (Left.is(Keywords.kw_as))
5493 return true;
5494 if (Left.is(TT_NonNullAssertion))
5495 return true;
5496 if (Left.is(Keywords.kw_declare) &&
5497 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
5498 Keywords.kw_function, tok::kw_class, tok::kw_enum,
5499 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
5500 Keywords.kw_let, tok::kw_const)) {
5501 // See grammar for 'declare' statements at:
5502 // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
5503 return false;
5504 }
5505 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
5506 Right.isOneOf(tok::identifier, tok::string_literal)) {
5507 return false; // must not break in "module foo { ...}"
5508 }
5509 if (Right.is(TT_TemplateString) && Right.closesScope())
5510 return false;
5511 // Don't split tagged template literal so there is a break between the tag
5512 // identifier and template string.
5513 if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
5514 return false;
5515 if (Left.is(TT_TemplateString) && Left.opensScope())
5516 return true;
5517 }
5518
5519 if (Left.is(tok::at))
5520 return false;
5521 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
5522 return false;
5523 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
5524 return Right.isNot(tok::l_paren);
5525 if (Right.is(TT_PointerOrReference)) {
5526 return Line.IsMultiVariableDeclStmt ||
5527 (getTokenPointerOrReferenceAlignment(Right) ==
5528 FormatStyle::PAS_Right &&
5529 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
5530 }
5531 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
5532 Right.is(tok::kw_operator)) {
5533 return true;
5534 }
5535 if (Left.is(TT_PointerOrReference))
5536 return false;
5537 if (Right.isTrailingComment()) {
5538 // We rely on MustBreakBefore being set correctly here as we should not
5539 // change the "binding" behavior of a comment.
5540 // The first comment in a braced lists is always interpreted as belonging to
5541 // the first list element. Otherwise, it should be placed outside of the
5542 // list.
5543 return Left.is(BK_BracedInit) ||
5544 (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
5545 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
5546 }
5547 if (Left.is(tok::question) && Right.is(tok::colon))
5548 return false;
5549 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
5550 return Style.BreakBeforeTernaryOperators;
5551 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
5552 return !Style.BreakBeforeTernaryOperators;
5553 if (Left.is(TT_InheritanceColon))
5554 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
5555 if (Right.is(TT_InheritanceColon))
5556 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
5557 if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) &&
5558 Left.isNot(TT_SelectorName)) {
5559 return true;
5560 }
5561
5562 if (Right.is(tok::colon) &&
5563 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
5564 return false;
5565 }
5566 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
5567 if (Style.isProto()) {
5568 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
5569 return false;
5570 // Prevent cases like:
5571 //
5572 // submessage:
5573 // { key: valueeeeeeeeeeee }
5574 //
5575 // when the snippet does not fit into one line.
5576 // Prefer:
5577 //
5578 // submessage: {
5579 // key: valueeeeeeeeeeee
5580 // }
5581 //
5582 // instead, even if it is longer by one line.
5583 //
5584 // Note that this allows the "{" to go over the column limit
5585 // when the column limit is just between ":" and "{", but that does
5586 // not happen too often and alternative formattings in this case are
5587 // not much better.
5588 //
5589 // The code covers the cases:
5590 //
5591 // submessage: { ... }
5592 // submessage: < ... >
5593 // repeated: [ ... ]
5594 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
5595 Right.is(TT_DictLiteral)) ||
5596 Right.is(TT_ArrayInitializerLSquare)) {
5597 return false;
5598 }
5599 }
5600 return true;
5601 }
5602 if (Right.is(tok::r_square) && Right.MatchingParen &&
5603 Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
5604 return false;
5605 }
5606 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
5607 Right.Next->is(TT_ObjCMethodExpr))) {
5608 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
5609 }
5610 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
5611 return true;
5612 if (Right.is(tok::kw_concept))
5613 return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
5614 if (Right.is(TT_RequiresClause))
5615 return true;
5616 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
5617 return true;
5618 if (Left.ClosesRequiresClause)
5619 return true;
5620 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
5621 TT_OverloadedOperator)) {
5622 return false;
5623 }
5624 if (Left.is(TT_RangeBasedForLoopColon))
5625 return true;
5626 if (Right.is(TT_RangeBasedForLoopColon))
5627 return false;
5628 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
5629 return true;
5630 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
5631 (Left.is(tok::less) && Right.is(tok::less))) {
5632 return false;
5633 }
5634 if (Right.is(TT_BinaryOperator) &&
5635 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
5636 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
5637 Right.getPrecedence() != prec::Assignment)) {
5638 return true;
5639 }
5640 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
5641 Left.is(tok::kw_operator)) {
5642 return false;
5643 }
5644 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
5645 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
5646 return false;
5647 }
5648 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
5649 !Style.Cpp11BracedListStyle) {
5650 return false;
5651 }
5652 if (Left.is(TT_AttributeLParen) ||
5653 (Left.is(tok::l_paren) && Left.is(TT_TypeDeclarationParen))) {
5654 return false;
5655 }
5656 if (Left.is(tok::l_paren) && Left.Previous &&
5657 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
5658 return false;
5659 }
5660 if (Right.is(TT_ImplicitStringLiteral))
5661 return false;
5662
5663 if (Right.is(TT_TemplateCloser))
5664 return false;
5665 if (Right.is(tok::r_square) && Right.MatchingParen &&
5666 Right.MatchingParen->is(TT_LambdaLSquare)) {
5667 return false;
5668 }
5669
5670 // We only break before r_brace if there was a corresponding break before
5671 // the l_brace, which is tracked by BreakBeforeClosingBrace.
5672 if (Right.is(tok::r_brace)) {
5673 return Right.MatchingParen && (Right.MatchingParen->is(BK_Block) ||
5674 (Right.isBlockIndentedInitRBrace(Style)));
5675 }
5676
5677 // We only break before r_paren if we're in a block indented context.
5678 if (Right.is(tok::r_paren)) {
5679 if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
5680 !Right.MatchingParen) {
5681 return false;
5682 }
5683 auto Next = Right.Next;
5684 if (Next && Next->is(tok::r_paren))
5685 Next = Next->Next;
5686 if (Next && Next->is(tok::l_paren))
5687 return false;
5688 const FormatToken *Previous = Right.MatchingParen->Previous;
5689 return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
5690 }
5691
5692 // Allow breaking after a trailing annotation, e.g. after a method
5693 // declaration.
5694 if (Left.is(TT_TrailingAnnotation)) {
5695 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
5696 tok::less, tok::coloncolon);
5697 }
5698
5699 if (Right.isAttribute())
5700 return true;
5701
5702 if (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))
5703 return Left.isNot(TT_AttributeSquare);
5704
5705 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
5706 return true;
5707
5708 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
5709 return true;
5710
5711 if (Left.is(TT_CtorInitializerColon)) {
5712 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5713 (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
5714 }
5715 if (Right.is(TT_CtorInitializerColon))
5716 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
5717 if (Left.is(TT_CtorInitializerComma) &&
5718 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5719 return false;
5720 }
5721 if (Right.is(TT_CtorInitializerComma) &&
5722 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5723 return true;
5724 }
5725 if (Left.is(TT_InheritanceComma) &&
5726 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5727 return false;
5728 }
5729 if (Right.is(TT_InheritanceComma) &&
5730 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5731 return true;
5732 }
5733 if (Left.is(TT_ArrayInitializerLSquare))
5734 return true;
5735 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
5736 return true;
5737 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
5738 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
5739 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
5740 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
5741 Left.getPrecedence() == prec::Assignment)) {
5742 return true;
5743 }
5744 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
5745 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
5746 return false;
5747 }
5748
5749 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
5750 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
5751 if (isAllmanLambdaBrace(Left))
5752 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
5753 if (isAllmanLambdaBrace(Right))
5754 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
5755 }
5756
5757 if (Right.is(tok::kw_noexcept) && Right.is(TT_TrailingAnnotation)) {
5758 switch (Style.AllowBreakBeforeNoexceptSpecifier) {
5759 case FormatStyle::BBNSS_Never:
5760 return false;
5761 case FormatStyle::BBNSS_Always:
5762 return true;
5763 case FormatStyle::BBNSS_OnlyWithParen:
5764 return Right.Next && Right.Next->is(tok::l_paren);
5765 }
5766 }
5767
5768 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
5769 tok::kw_class, tok::kw_struct, tok::comment) ||
5770 Right.isMemberAccess() ||
5771 Right.isOneOf(TT_TrailingReturnArrow, tok::lessless, tok::colon,
5772 tok::l_square, tok::at) ||
5773 (Left.is(tok::r_paren) &&
5774 Right.isOneOf(tok::identifier, tok::kw_const)) ||
5775 (Left.is(tok::l_paren) && Right.isNot(tok::r_paren)) ||
5776 (Left.is(TT_TemplateOpener) && Right.isNot(TT_TemplateCloser));
5777 }
5778
printDebugInfo(const AnnotatedLine & Line) const5779 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
5780 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
5781 << ", T=" << Line.Type << ", C=" << Line.IsContinuation
5782 << "):\n";
5783 const FormatToken *Tok = Line.First;
5784 while (Tok) {
5785 llvm::errs() << " M=" << Tok->MustBreakBefore
5786 << " C=" << Tok->CanBreakBefore
5787 << " T=" << getTokenTypeName(Tok->getType())
5788 << " S=" << Tok->SpacesRequiredBefore
5789 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
5790 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
5791 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
5792 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
5793 for (prec::Level LParen : Tok->FakeLParens)
5794 llvm::errs() << LParen << "/";
5795 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
5796 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
5797 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
5798 if (!Tok->Next)
5799 assert(Tok == Line.Last);
5800 Tok = Tok->Next;
5801 }
5802 llvm::errs() << "----\n";
5803 }
5804
5805 FormatStyle::PointerAlignmentStyle
getTokenReferenceAlignment(const FormatToken & Reference) const5806 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
5807 assert(Reference.isOneOf(tok::amp, tok::ampamp));
5808 switch (Style.ReferenceAlignment) {
5809 case FormatStyle::RAS_Pointer:
5810 return Style.PointerAlignment;
5811 case FormatStyle::RAS_Left:
5812 return FormatStyle::PAS_Left;
5813 case FormatStyle::RAS_Right:
5814 return FormatStyle::PAS_Right;
5815 case FormatStyle::RAS_Middle:
5816 return FormatStyle::PAS_Middle;
5817 }
5818 assert(0); //"Unhandled value of ReferenceAlignment"
5819 return Style.PointerAlignment;
5820 }
5821
5822 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(const FormatToken & PointerOrReference) const5823 TokenAnnotator::getTokenPointerOrReferenceAlignment(
5824 const FormatToken &PointerOrReference) const {
5825 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
5826 switch (Style.ReferenceAlignment) {
5827 case FormatStyle::RAS_Pointer:
5828 return Style.PointerAlignment;
5829 case FormatStyle::RAS_Left:
5830 return FormatStyle::PAS_Left;
5831 case FormatStyle::RAS_Right:
5832 return FormatStyle::PAS_Right;
5833 case FormatStyle::RAS_Middle:
5834 return FormatStyle::PAS_Middle;
5835 }
5836 }
5837 assert(PointerOrReference.is(tok::star));
5838 return Style.PointerAlignment;
5839 }
5840
5841 } // namespace format
5842 } // namespace clang
5843