1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "format-token-annotator"
23 
24 namespace clang {
25 namespace format {
26 
mustBreakAfterAttributes(const FormatToken & Tok,const FormatStyle & Style)27 static bool mustBreakAfterAttributes(const FormatToken &Tok,
28                                      const FormatStyle &Style) {
29   switch (Style.BreakAfterAttributes) {
30   case FormatStyle::ABS_Always:
31     return true;
32   case FormatStyle::ABS_Leave:
33     return Tok.NewlinesBefore > 0;
34   default:
35     return false;
36   }
37 }
38 
39 namespace {
40 
41 /// Returns \c true if the line starts with a token that can start a statement
42 /// with an initializer.
startsWithInitStatement(const AnnotatedLine & Line)43 static bool startsWithInitStatement(const AnnotatedLine &Line) {
44   return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
45          Line.startsWith(tok::kw_switch);
46 }
47 
48 /// Returns \c true if the token can be used as an identifier in
49 /// an Objective-C \c \@selector, \c false otherwise.
50 ///
51 /// Because getFormattingLangOpts() always lexes source code as
52 /// Objective-C++, C++ keywords like \c new and \c delete are
53 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
54 ///
55 /// For Objective-C and Objective-C++, both identifiers and keywords
56 /// are valid inside @selector(...) (or a macro which
57 /// invokes @selector(...)). So, we allow treat any identifier or
58 /// keyword as a potential Objective-C selector component.
canBeObjCSelectorComponent(const FormatToken & Tok)59 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
60   return Tok.Tok.getIdentifierInfo();
61 }
62 
63 /// With `Left` being '(', check if we're at either `[...](` or
64 /// `[...]<...>(`, where the [ opens a lambda capture list.
isLambdaParameterList(const FormatToken * Left)65 static bool isLambdaParameterList(const FormatToken *Left) {
66   // Skip <...> if present.
67   if (Left->Previous && Left->Previous->is(tok::greater) &&
68       Left->Previous->MatchingParen &&
69       Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
70     Left = Left->Previous->MatchingParen;
71   }
72 
73   // Check for `[...]`.
74   return Left->Previous && Left->Previous->is(tok::r_square) &&
75          Left->Previous->MatchingParen &&
76          Left->Previous->MatchingParen->is(TT_LambdaLSquare);
77 }
78 
79 /// Returns \c true if the token is followed by a boolean condition, \c false
80 /// otherwise.
isKeywordWithCondition(const FormatToken & Tok)81 static bool isKeywordWithCondition(const FormatToken &Tok) {
82   return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
83                      tok::kw_constexpr, tok::kw_catch);
84 }
85 
86 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
isCppAttribute(bool IsCpp,const FormatToken & Tok)87 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
88   if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
89     return false;
90   // The first square bracket is part of an ObjC array literal
91   if (Tok.Previous && Tok.Previous->is(tok::at))
92     return false;
93   const FormatToken *AttrTok = Tok.Next->Next;
94   if (!AttrTok)
95     return false;
96   // C++17 '[[using ns: foo, bar(baz, blech)]]'
97   // We assume nobody will name an ObjC variable 'using'.
98   if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
99     return true;
100   if (AttrTok->isNot(tok::identifier))
101     return false;
102   while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
103     // ObjC message send. We assume nobody will use : in a C++11 attribute
104     // specifier parameter, although this is technically valid:
105     // [[foo(:)]].
106     if (AttrTok->is(tok::colon) ||
107         AttrTok->startsSequence(tok::identifier, tok::identifier) ||
108         AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
109       return false;
110     }
111     if (AttrTok->is(tok::ellipsis))
112       return true;
113     AttrTok = AttrTok->Next;
114   }
115   return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
116 }
117 
118 /// A parser that gathers additional information about tokens.
119 ///
120 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
121 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
122 /// into template parameter lists.
123 class AnnotatingParser {
124 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,const AdditionalKeywords & Keywords,SmallVector<ScopeType> & Scopes)125   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
126                    const AdditionalKeywords &Keywords,
127                    SmallVector<ScopeType> &Scopes)
128       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
129         Keywords(Keywords), Scopes(Scopes) {
130     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
131     resetTokenMetadata();
132   }
133 
134 private:
getScopeType(const FormatToken & Token) const135   ScopeType getScopeType(const FormatToken &Token) const {
136     switch (Token.getType()) {
137     case TT_FunctionLBrace:
138     case TT_LambdaLBrace:
139       return ST_Function;
140     case TT_ClassLBrace:
141     case TT_StructLBrace:
142     case TT_UnionLBrace:
143       return ST_Class;
144     default:
145       return ST_Other;
146     }
147   }
148 
parseAngle()149   bool parseAngle() {
150     if (!CurrentToken || !CurrentToken->Previous)
151       return false;
152     if (NonTemplateLess.count(CurrentToken->Previous) > 0)
153       return false;
154 
155     const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
156     if (Previous.Previous) {
157       if (Previous.Previous->Tok.isLiteral())
158         return false;
159       if (Previous.Previous->is(tok::r_brace))
160         return false;
161       if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
162           (!Previous.Previous->MatchingParen ||
163            Previous.Previous->MatchingParen->isNot(
164                TT_OverloadedOperatorLParen))) {
165         return false;
166       }
167       if (Previous.Previous->is(tok::kw_operator) &&
168           CurrentToken->is(tok::l_paren)) {
169         return false;
170       }
171     }
172 
173     FormatToken *Left = CurrentToken->Previous;
174     Left->ParentBracket = Contexts.back().ContextKind;
175     ScopedContextCreator ContextCreator(*this, tok::less, 12);
176 
177     // If this angle is in the context of an expression, we need to be more
178     // hesitant to detect it as opening template parameters.
179     bool InExprContext = Contexts.back().IsExpression;
180 
181     Contexts.back().IsExpression = false;
182     // If there's a template keyword before the opening angle bracket, this is a
183     // template parameter, not an argument.
184     if (Left->Previous && Left->Previous->isNot(tok::kw_template))
185       Contexts.back().ContextType = Context::TemplateArgument;
186 
187     if (Style.Language == FormatStyle::LK_Java &&
188         CurrentToken->is(tok::question)) {
189       next();
190     }
191 
192     while (CurrentToken) {
193       if (CurrentToken->is(tok::greater)) {
194         // Try to do a better job at looking for ">>" within the condition of
195         // a statement. Conservatively insert spaces between consecutive ">"
196         // tokens to prevent splitting right bitshift operators and potentially
197         // altering program semantics. This check is overly conservative and
198         // will prevent spaces from being inserted in select nested template
199         // parameter cases, but should not alter program semantics.
200         if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
201             Left->ParentBracket != tok::less &&
202             CurrentToken->getStartOfNonWhitespace() ==
203                 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
204                     -1)) {
205           return false;
206         }
207         Left->MatchingParen = CurrentToken;
208         CurrentToken->MatchingParen = Left;
209         // In TT_Proto, we must distignuish between:
210         //   map<key, value>
211         //   msg < item: data >
212         //   msg: < item: data >
213         // In TT_TextProto, map<key, value> does not occur.
214         if (Style.Language == FormatStyle::LK_TextProto ||
215             (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
216              Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
217           CurrentToken->setType(TT_DictLiteral);
218         } else {
219           CurrentToken->setType(TT_TemplateCloser);
220           CurrentToken->Tok.setLength(1);
221         }
222         if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral())
223           return false;
224         next();
225         return true;
226       }
227       if (CurrentToken->is(tok::question) &&
228           Style.Language == FormatStyle::LK_Java) {
229         next();
230         continue;
231       }
232       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
233           (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
234            !Style.isCSharp() && !Style.isProto())) {
235         return false;
236       }
237       // If a && or || is found and interpreted as a binary operator, this set
238       // of angles is likely part of something like "a < b && c > d". If the
239       // angles are inside an expression, the ||/&& might also be a binary
240       // operator that was misinterpreted because we are parsing template
241       // parameters.
242       // FIXME: This is getting out of hand, write a decent parser.
243       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
244           CurrentToken->Previous->is(TT_BinaryOperator) &&
245           Contexts[Contexts.size() - 2].IsExpression &&
246           !Line.startsWith(tok::kw_template)) {
247         return false;
248       }
249       updateParameterCount(Left, CurrentToken);
250       if (Style.Language == FormatStyle::LK_Proto) {
251         if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
252           if (CurrentToken->is(tok::colon) ||
253               (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
254                Previous->isNot(tok::colon))) {
255             Previous->setType(TT_SelectorName);
256           }
257         }
258       }
259       if (!consumeToken())
260         return false;
261     }
262     return false;
263   }
264 
parseUntouchableParens()265   bool parseUntouchableParens() {
266     while (CurrentToken) {
267       CurrentToken->Finalized = true;
268       switch (CurrentToken->Tok.getKind()) {
269       case tok::l_paren:
270         next();
271         if (!parseUntouchableParens())
272           return false;
273         continue;
274       case tok::r_paren:
275         next();
276         return true;
277       default:
278         // no-op
279         break;
280       }
281       next();
282     }
283     return false;
284   }
285 
parseParens(bool LookForDecls=false)286   bool parseParens(bool LookForDecls = false) {
287     if (!CurrentToken)
288       return false;
289     assert(CurrentToken->Previous && "Unknown previous token");
290     FormatToken &OpeningParen = *CurrentToken->Previous;
291     assert(OpeningParen.is(tok::l_paren));
292     FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
293     OpeningParen.ParentBracket = Contexts.back().ContextKind;
294     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
295 
296     // FIXME: This is a bit of a hack. Do better.
297     Contexts.back().ColonIsForRangeExpr =
298         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
299 
300     if (OpeningParen.Previous &&
301         OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
302       OpeningParen.Finalized = true;
303       return parseUntouchableParens();
304     }
305 
306     bool StartsObjCMethodExpr = false;
307     if (!Style.isVerilog()) {
308       if (FormatToken *MaybeSel = OpeningParen.Previous) {
309         // @selector( starts a selector.
310         if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
311             MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
312           StartsObjCMethodExpr = true;
313         }
314       }
315     }
316 
317     if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
318       // Find the previous kw_operator token.
319       FormatToken *Prev = &OpeningParen;
320       while (Prev->isNot(tok::kw_operator)) {
321         Prev = Prev->Previous;
322         assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
323       }
324 
325       // If faced with "a.operator*(argument)" or "a->operator*(argument)",
326       // i.e. the operator is called as a member function,
327       // then the argument must be an expression.
328       bool OperatorCalledAsMemberFunction =
329           Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
330       Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
331     } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) {
332       Contexts.back().IsExpression = true;
333       Contexts.back().ContextType = Context::VerilogInstancePortList;
334     } else if (Style.isJavaScript() &&
335                (Line.startsWith(Keywords.kw_type, tok::identifier) ||
336                 Line.startsWith(tok::kw_export, Keywords.kw_type,
337                                 tok::identifier))) {
338       // type X = (...);
339       // export type X = (...);
340       Contexts.back().IsExpression = false;
341     } else if (OpeningParen.Previous &&
342                (OpeningParen.Previous->isOneOf(
343                     tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit,
344                     tok::kw_while, tok::l_paren, tok::comma,
345                     TT_BinaryOperator) ||
346                 OpeningParen.Previous->isIf())) {
347       // static_assert, if and while usually contain expressions.
348       Contexts.back().IsExpression = true;
349     } else if (Style.isJavaScript() && OpeningParen.Previous &&
350                (OpeningParen.Previous->is(Keywords.kw_function) ||
351                 (OpeningParen.Previous->endsSequence(tok::identifier,
352                                                      Keywords.kw_function)))) {
353       // function(...) or function f(...)
354       Contexts.back().IsExpression = false;
355     } else if (Style.isJavaScript() && OpeningParen.Previous &&
356                OpeningParen.Previous->is(TT_JsTypeColon)) {
357       // let x: (SomeType);
358       Contexts.back().IsExpression = false;
359     } else if (isLambdaParameterList(&OpeningParen)) {
360       // This is a parameter list of a lambda expression.
361       Contexts.back().IsExpression = false;
362     } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
363       Contexts.back().IsExpression = false;
364     } else if (OpeningParen.Previous &&
365                OpeningParen.Previous->is(tok::kw__Generic)) {
366       Contexts.back().ContextType = Context::C11GenericSelection;
367       Contexts.back().IsExpression = true;
368     } else if (Line.InPPDirective &&
369                (!OpeningParen.Previous ||
370                 OpeningParen.Previous->isNot(tok::identifier))) {
371       Contexts.back().IsExpression = true;
372     } else if (Contexts[Contexts.size() - 2].CaretFound) {
373       // This is the parameter list of an ObjC block.
374       Contexts.back().IsExpression = false;
375     } else if (OpeningParen.Previous &&
376                OpeningParen.Previous->is(TT_ForEachMacro)) {
377       // The first argument to a foreach macro is a declaration.
378       Contexts.back().ContextType = Context::ForEachMacro;
379       Contexts.back().IsExpression = false;
380     } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
381                OpeningParen.Previous->MatchingParen->isOneOf(
382                    TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
383       Contexts.back().IsExpression = false;
384     } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
385       bool IsForOrCatch =
386           OpeningParen.Previous &&
387           OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
388       Contexts.back().IsExpression = !IsForOrCatch;
389     }
390 
391     // Infer the role of the l_paren based on the previous token if we haven't
392     // detected one yet.
393     if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
394       if (PrevNonComment->isAttribute()) {
395         OpeningParen.setType(TT_AttributeLParen);
396       } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
397                                          tok::kw_typeof,
398 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
399 #include "clang/Basic/TransformTypeTraits.def"
400                                          tok::kw__Atomic)) {
401         OpeningParen.setType(TT_TypeDeclarationParen);
402         // decltype() and typeof() usually contain expressions.
403         if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
404           Contexts.back().IsExpression = true;
405       }
406     }
407 
408     if (StartsObjCMethodExpr) {
409       Contexts.back().ColonIsObjCMethodExpr = true;
410       OpeningParen.setType(TT_ObjCMethodExpr);
411     }
412 
413     // MightBeFunctionType and ProbablyFunctionType are used for
414     // function pointer and reference types as well as Objective-C
415     // block types:
416     //
417     // void (*FunctionPointer)(void);
418     // void (&FunctionReference)(void);
419     // void (&&FunctionReference)(void);
420     // void (^ObjCBlock)(void);
421     bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
422     bool ProbablyFunctionType =
423         CurrentToken->isPointerOrReference() || CurrentToken->is(tok::caret);
424     bool HasMultipleLines = false;
425     bool HasMultipleParametersOnALine = false;
426     bool MightBeObjCForRangeLoop =
427         OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
428     FormatToken *PossibleObjCForInToken = nullptr;
429     while (CurrentToken) {
430       // LookForDecls is set when "if (" has been seen. Check for
431       // 'identifier' '*' 'identifier' followed by not '=' -- this
432       // '*' has to be a binary operator but determineStarAmpUsage() will
433       // categorize it as an unary operator, so set the right type here.
434       if (LookForDecls && CurrentToken->Next) {
435         FormatToken *Prev = CurrentToken->getPreviousNonComment();
436         if (Prev) {
437           FormatToken *PrevPrev = Prev->getPreviousNonComment();
438           FormatToken *Next = CurrentToken->Next;
439           if (PrevPrev && PrevPrev->is(tok::identifier) &&
440               PrevPrev->isNot(TT_TypeName) && Prev->isPointerOrReference() &&
441               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
442             Prev->setType(TT_BinaryOperator);
443             LookForDecls = false;
444           }
445         }
446       }
447 
448       if (CurrentToken->Previous->is(TT_PointerOrReference) &&
449           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
450                                                     tok::coloncolon)) {
451         ProbablyFunctionType = true;
452       }
453       if (CurrentToken->is(tok::comma))
454         MightBeFunctionType = false;
455       if (CurrentToken->Previous->is(TT_BinaryOperator))
456         Contexts.back().IsExpression = true;
457       if (CurrentToken->is(tok::r_paren)) {
458         if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
459             ProbablyFunctionType && CurrentToken->Next &&
460             (CurrentToken->Next->is(tok::l_paren) ||
461              (CurrentToken->Next->is(tok::l_square) &&
462               Line.MustBeDeclaration))) {
463           OpeningParen.setType(OpeningParen.Next->is(tok::caret)
464                                    ? TT_ObjCBlockLParen
465                                    : TT_FunctionTypeLParen);
466         }
467         OpeningParen.MatchingParen = CurrentToken;
468         CurrentToken->MatchingParen = &OpeningParen;
469 
470         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
471             OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
472           // Detect the case where macros are used to generate lambdas or
473           // function bodies, e.g.:
474           //   auto my_lambda = MACRO((Type *type, int i) { .. body .. });
475           for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
476                Tok = Tok->Next) {
477             if (Tok->is(TT_BinaryOperator) && Tok->isPointerOrReference())
478               Tok->setType(TT_PointerOrReference);
479           }
480         }
481 
482         if (StartsObjCMethodExpr) {
483           CurrentToken->setType(TT_ObjCMethodExpr);
484           if (Contexts.back().FirstObjCSelectorName) {
485             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
486                 Contexts.back().LongestObjCSelectorName;
487           }
488         }
489 
490         if (OpeningParen.is(TT_AttributeLParen))
491           CurrentToken->setType(TT_AttributeRParen);
492         if (OpeningParen.is(TT_TypeDeclarationParen))
493           CurrentToken->setType(TT_TypeDeclarationParen);
494         if (OpeningParen.Previous &&
495             OpeningParen.Previous->is(TT_JavaAnnotation)) {
496           CurrentToken->setType(TT_JavaAnnotation);
497         }
498         if (OpeningParen.Previous &&
499             OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
500           CurrentToken->setType(TT_LeadingJavaAnnotation);
501         }
502         if (OpeningParen.Previous &&
503             OpeningParen.Previous->is(TT_AttributeSquare)) {
504           CurrentToken->setType(TT_AttributeSquare);
505         }
506 
507         if (!HasMultipleLines)
508           OpeningParen.setPackingKind(PPK_Inconclusive);
509         else if (HasMultipleParametersOnALine)
510           OpeningParen.setPackingKind(PPK_BinPacked);
511         else
512           OpeningParen.setPackingKind(PPK_OnePerLine);
513 
514         next();
515         return true;
516       }
517       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
518         return false;
519 
520       if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
521         OpeningParen.setType(TT_Unknown);
522       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
523           !CurrentToken->Next->HasUnescapedNewline &&
524           !CurrentToken->Next->isTrailingComment()) {
525         HasMultipleParametersOnALine = true;
526       }
527       bool ProbablyFunctionTypeLParen =
528           (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
529            CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
530       if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
531            CurrentToken->Previous->isSimpleTypeSpecifier()) &&
532           !(CurrentToken->is(tok::l_brace) ||
533             (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
534         Contexts.back().IsExpression = false;
535       }
536       if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
537         MightBeObjCForRangeLoop = false;
538         if (PossibleObjCForInToken) {
539           PossibleObjCForInToken->setType(TT_Unknown);
540           PossibleObjCForInToken = nullptr;
541         }
542       }
543       if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
544         PossibleObjCForInToken = CurrentToken;
545         PossibleObjCForInToken->setType(TT_ObjCForIn);
546       }
547       // When we discover a 'new', we set CanBeExpression to 'false' in order to
548       // parse the type correctly. Reset that after a comma.
549       if (CurrentToken->is(tok::comma))
550         Contexts.back().CanBeExpression = true;
551 
552       FormatToken *Tok = CurrentToken;
553       if (!consumeToken())
554         return false;
555       updateParameterCount(&OpeningParen, Tok);
556       if (CurrentToken && CurrentToken->HasUnescapedNewline)
557         HasMultipleLines = true;
558     }
559     return false;
560   }
561 
isCSharpAttributeSpecifier(const FormatToken & Tok)562   bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
563     if (!Style.isCSharp())
564       return false;
565 
566     // `identifier[i]` is not an attribute.
567     if (Tok.Previous && Tok.Previous->is(tok::identifier))
568       return false;
569 
570     // Chains of [] in `identifier[i][j][k]` are not attributes.
571     if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
572       auto *MatchingParen = Tok.Previous->MatchingParen;
573       if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
574         return false;
575     }
576 
577     const FormatToken *AttrTok = Tok.Next;
578     if (!AttrTok)
579       return false;
580 
581     // Just an empty declaration e.g. string [].
582     if (AttrTok->is(tok::r_square))
583       return false;
584 
585     // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
586     while (AttrTok && AttrTok->isNot(tok::r_square))
587       AttrTok = AttrTok->Next;
588 
589     if (!AttrTok)
590       return false;
591 
592     // Allow an attribute to be the only content of a file.
593     AttrTok = AttrTok->Next;
594     if (!AttrTok)
595       return true;
596 
597     // Limit this to being an access modifier that follows.
598     if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
599                          tok::comment, tok::kw_class, tok::kw_static,
600                          tok::l_square, Keywords.kw_internal)) {
601       return true;
602     }
603 
604     // incase its a [XXX] retval func(....
605     if (AttrTok->Next &&
606         AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
607       return true;
608     }
609 
610     return false;
611   }
612 
parseSquare()613   bool parseSquare() {
614     if (!CurrentToken)
615       return false;
616 
617     // A '[' could be an index subscript (after an identifier or after
618     // ')' or ']'), it could be the start of an Objective-C method
619     // expression, it could the start of an Objective-C array literal,
620     // or it could be a C++ attribute specifier [[foo::bar]].
621     FormatToken *Left = CurrentToken->Previous;
622     Left->ParentBracket = Contexts.back().ContextKind;
623     FormatToken *Parent = Left->getPreviousNonComment();
624 
625     // Cases where '>' is followed by '['.
626     // In C++, this can happen either in array of templates (foo<int>[10])
627     // or when array is a nested template type (unique_ptr<type1<type2>[]>).
628     bool CppArrayTemplates =
629         Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
630         (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
631          Contexts.back().ContextType == Context::TemplateArgument);
632 
633     const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
634     const bool IsCpp11AttributeSpecifier =
635         isCppAttribute(Style.isCpp(), *Left) || IsInnerSquare;
636 
637     // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
638     bool IsCSharpAttributeSpecifier =
639         isCSharpAttributeSpecifier(*Left) ||
640         Contexts.back().InCSharpAttributeSpecifier;
641 
642     bool InsideInlineASM = Line.startsWith(tok::kw_asm);
643     bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
644     bool StartsObjCMethodExpr =
645         !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
646         Style.isCpp() && !IsCpp11AttributeSpecifier &&
647         !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
648         Left->isNot(TT_LambdaLSquare) &&
649         !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
650         (!Parent ||
651          Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
652                          tok::kw_return, tok::kw_throw) ||
653          Parent->isUnaryOperator() ||
654          // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
655          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
656          (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
657           prec::Unknown));
658     bool ColonFound = false;
659 
660     unsigned BindingIncrease = 1;
661     if (IsCppStructuredBinding) {
662       Left->setType(TT_StructuredBindingLSquare);
663     } else if (Left->is(TT_Unknown)) {
664       if (StartsObjCMethodExpr) {
665         Left->setType(TT_ObjCMethodExpr);
666       } else if (InsideInlineASM) {
667         Left->setType(TT_InlineASMSymbolicNameLSquare);
668       } else if (IsCpp11AttributeSpecifier) {
669         Left->setType(TT_AttributeSquare);
670         if (!IsInnerSquare && Left->Previous)
671           Left->Previous->EndsCppAttributeGroup = false;
672       } else if (Style.isJavaScript() && Parent &&
673                  Contexts.back().ContextKind == tok::l_brace &&
674                  Parent->isOneOf(tok::l_brace, tok::comma)) {
675         Left->setType(TT_JsComputedPropertyName);
676       } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
677                  Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
678         Left->setType(TT_DesignatedInitializerLSquare);
679       } else if (IsCSharpAttributeSpecifier) {
680         Left->setType(TT_AttributeSquare);
681       } else if (CurrentToken->is(tok::r_square) && Parent &&
682                  Parent->is(TT_TemplateCloser)) {
683         Left->setType(TT_ArraySubscriptLSquare);
684       } else if (Style.isProto()) {
685         // Square braces in LK_Proto can either be message field attributes:
686         //
687         // optional Aaa aaa = 1 [
688         //   (aaa) = aaa
689         // ];
690         //
691         // extensions 123 [
692         //   (aaa) = aaa
693         // ];
694         //
695         // or text proto extensions (in options):
696         //
697         // option (Aaa.options) = {
698         //   [type.type/type] {
699         //     key: value
700         //   }
701         // }
702         //
703         // or repeated fields (in options):
704         //
705         // option (Aaa.options) = {
706         //   keys: [ 1, 2, 3 ]
707         // }
708         //
709         // In the first and the third case we want to spread the contents inside
710         // the square braces; in the second we want to keep them inline.
711         Left->setType(TT_ArrayInitializerLSquare);
712         if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
713                                 tok::equal) &&
714             !Left->endsSequence(tok::l_square, tok::numeric_constant,
715                                 tok::identifier) &&
716             !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
717           Left->setType(TT_ProtoExtensionLSquare);
718           BindingIncrease = 10;
719         }
720       } else if (!CppArrayTemplates && Parent &&
721                  Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
722                                  tok::comma, tok::l_paren, tok::l_square,
723                                  tok::question, tok::colon, tok::kw_return,
724                                  // Should only be relevant to JavaScript:
725                                  tok::kw_default)) {
726         Left->setType(TT_ArrayInitializerLSquare);
727       } else {
728         BindingIncrease = 10;
729         Left->setType(TT_ArraySubscriptLSquare);
730       }
731     }
732 
733     ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
734     Contexts.back().IsExpression = true;
735     if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
736       Contexts.back().IsExpression = false;
737 
738     Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
739     Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
740     Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
741 
742     while (CurrentToken) {
743       if (CurrentToken->is(tok::r_square)) {
744         if (IsCpp11AttributeSpecifier) {
745           CurrentToken->setType(TT_AttributeSquare);
746           if (!IsInnerSquare)
747             CurrentToken->EndsCppAttributeGroup = true;
748         }
749         if (IsCSharpAttributeSpecifier) {
750           CurrentToken->setType(TT_AttributeSquare);
751         } else if (((CurrentToken->Next &&
752                      CurrentToken->Next->is(tok::l_paren)) ||
753                     (CurrentToken->Previous &&
754                      CurrentToken->Previous->Previous == Left)) &&
755                    Left->is(TT_ObjCMethodExpr)) {
756           // An ObjC method call is rarely followed by an open parenthesis. It
757           // also can't be composed of just one token, unless it's a macro that
758           // will be expanded to more tokens.
759           // FIXME: Do we incorrectly label ":" with this?
760           StartsObjCMethodExpr = false;
761           Left->setType(TT_Unknown);
762         }
763         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
764           CurrentToken->setType(TT_ObjCMethodExpr);
765           // If we haven't seen a colon yet, make sure the last identifier
766           // before the r_square is tagged as a selector name component.
767           if (!ColonFound && CurrentToken->Previous &&
768               CurrentToken->Previous->is(TT_Unknown) &&
769               canBeObjCSelectorComponent(*CurrentToken->Previous)) {
770             CurrentToken->Previous->setType(TT_SelectorName);
771           }
772           // determineStarAmpUsage() thinks that '*' '[' is allocating an
773           // array of pointers, but if '[' starts a selector then '*' is a
774           // binary operator.
775           if (Parent && Parent->is(TT_PointerOrReference))
776             Parent->overwriteFixedType(TT_BinaryOperator);
777         }
778         // An arrow after an ObjC method expression is not a lambda arrow.
779         if (CurrentToken->getType() == TT_ObjCMethodExpr &&
780             CurrentToken->Next &&
781             CurrentToken->Next->is(TT_TrailingReturnArrow)) {
782           CurrentToken->Next->overwriteFixedType(TT_Unknown);
783         }
784         Left->MatchingParen = CurrentToken;
785         CurrentToken->MatchingParen = Left;
786         // FirstObjCSelectorName is set when a colon is found. This does
787         // not work, however, when the method has no parameters.
788         // Here, we set FirstObjCSelectorName when the end of the method call is
789         // reached, in case it was not set already.
790         if (!Contexts.back().FirstObjCSelectorName) {
791           FormatToken *Previous = CurrentToken->getPreviousNonComment();
792           if (Previous && Previous->is(TT_SelectorName)) {
793             Previous->ObjCSelectorNameParts = 1;
794             Contexts.back().FirstObjCSelectorName = Previous;
795           }
796         } else {
797           Left->ParameterCount =
798               Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
799         }
800         if (Contexts.back().FirstObjCSelectorName) {
801           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
802               Contexts.back().LongestObjCSelectorName;
803           if (Left->BlockParameterCount > 1)
804             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
805         }
806         next();
807         return true;
808       }
809       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
810         return false;
811       if (CurrentToken->is(tok::colon)) {
812         if (IsCpp11AttributeSpecifier &&
813             CurrentToken->endsSequence(tok::colon, tok::identifier,
814                                        tok::kw_using)) {
815           // Remember that this is a [[using ns: foo]] C++ attribute, so we
816           // don't add a space before the colon (unlike other colons).
817           CurrentToken->setType(TT_AttributeColon);
818         } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
819                    Left->isOneOf(TT_ArraySubscriptLSquare,
820                                  TT_DesignatedInitializerLSquare)) {
821           Left->setType(TT_ObjCMethodExpr);
822           StartsObjCMethodExpr = true;
823           Contexts.back().ColonIsObjCMethodExpr = true;
824           if (Parent && Parent->is(tok::r_paren)) {
825             // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
826             Parent->setType(TT_CastRParen);
827           }
828         }
829         ColonFound = true;
830       }
831       if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
832           !ColonFound) {
833         Left->setType(TT_ArrayInitializerLSquare);
834       }
835       FormatToken *Tok = CurrentToken;
836       if (!consumeToken())
837         return false;
838       updateParameterCount(Left, Tok);
839     }
840     return false;
841   }
842 
couldBeInStructArrayInitializer() const843   bool couldBeInStructArrayInitializer() const {
844     if (Contexts.size() < 2)
845       return false;
846     // We want to back up no more then 2 context levels i.e.
847     // . { { <-
848     const auto End = std::next(Contexts.rbegin(), 2);
849     auto Last = Contexts.rbegin();
850     unsigned Depth = 0;
851     for (; Last != End; ++Last)
852       if (Last->ContextKind == tok::l_brace)
853         ++Depth;
854     return Depth == 2 && Last->ContextKind != tok::l_brace;
855   }
856 
parseBrace()857   bool parseBrace() {
858     if (!CurrentToken)
859       return true;
860 
861     assert(CurrentToken->Previous);
862     FormatToken &OpeningBrace = *CurrentToken->Previous;
863     assert(OpeningBrace.is(tok::l_brace));
864     OpeningBrace.ParentBracket = Contexts.back().ContextKind;
865 
866     if (Contexts.back().CaretFound)
867       OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
868     Contexts.back().CaretFound = false;
869 
870     ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
871     Contexts.back().ColonIsDictLiteral = true;
872     if (OpeningBrace.is(BK_BracedInit))
873       Contexts.back().IsExpression = true;
874     if (Style.isJavaScript() && OpeningBrace.Previous &&
875         OpeningBrace.Previous->is(TT_JsTypeColon)) {
876       Contexts.back().IsExpression = false;
877     }
878     if (Style.isVerilog() &&
879         (!OpeningBrace.getPreviousNonComment() ||
880          OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
881       Contexts.back().VerilogMayBeConcatenation = true;
882     }
883 
884     unsigned CommaCount = 0;
885     while (CurrentToken) {
886       if (CurrentToken->is(tok::r_brace)) {
887         assert(!Scopes.empty());
888         assert(Scopes.back() == getScopeType(OpeningBrace));
889         Scopes.pop_back();
890         assert(OpeningBrace.Optional == CurrentToken->Optional);
891         OpeningBrace.MatchingParen = CurrentToken;
892         CurrentToken->MatchingParen = &OpeningBrace;
893         if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
894           if (OpeningBrace.ParentBracket == tok::l_brace &&
895               couldBeInStructArrayInitializer() && CommaCount > 0) {
896             Contexts.back().ContextType = Context::StructArrayInitializer;
897           }
898         }
899         next();
900         return true;
901       }
902       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
903         return false;
904       updateParameterCount(&OpeningBrace, CurrentToken);
905       if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
906         FormatToken *Previous = CurrentToken->getPreviousNonComment();
907         if (Previous->is(TT_JsTypeOptionalQuestion))
908           Previous = Previous->getPreviousNonComment();
909         if ((CurrentToken->is(tok::colon) &&
910              (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
911             Style.isProto()) {
912           OpeningBrace.setType(TT_DictLiteral);
913           if (Previous->Tok.getIdentifierInfo() ||
914               Previous->is(tok::string_literal)) {
915             Previous->setType(TT_SelectorName);
916           }
917         }
918         if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown))
919           OpeningBrace.setType(TT_DictLiteral);
920         else if (Style.isJavaScript())
921           OpeningBrace.overwriteFixedType(TT_DictLiteral);
922       }
923       if (CurrentToken->is(tok::comma)) {
924         if (Style.isJavaScript())
925           OpeningBrace.overwriteFixedType(TT_DictLiteral);
926         ++CommaCount;
927       }
928       if (!consumeToken())
929         return false;
930     }
931     return true;
932   }
933 
updateParameterCount(FormatToken * Left,FormatToken * Current)934   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
935     // For ObjC methods, the number of parameters is calculated differently as
936     // method declarations have a different structure (the parameters are not
937     // inside a bracket scope).
938     if (Current->is(tok::l_brace) && Current->is(BK_Block))
939       ++Left->BlockParameterCount;
940     if (Current->is(tok::comma)) {
941       ++Left->ParameterCount;
942       if (!Left->Role)
943         Left->Role.reset(new CommaSeparatedList(Style));
944       Left->Role->CommaFound(Current);
945     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
946       Left->ParameterCount = 1;
947     }
948   }
949 
parseConditional()950   bool parseConditional() {
951     while (CurrentToken) {
952       if (CurrentToken->is(tok::colon)) {
953         CurrentToken->setType(TT_ConditionalExpr);
954         next();
955         return true;
956       }
957       if (!consumeToken())
958         return false;
959     }
960     return false;
961   }
962 
parseTemplateDeclaration()963   bool parseTemplateDeclaration() {
964     if (CurrentToken && CurrentToken->is(tok::less)) {
965       CurrentToken->setType(TT_TemplateOpener);
966       next();
967       if (!parseAngle())
968         return false;
969       if (CurrentToken)
970         CurrentToken->Previous->ClosesTemplateDeclaration = true;
971       return true;
972     }
973     return false;
974   }
975 
consumeToken()976   bool consumeToken() {
977     if (Style.isCpp()) {
978       const auto *Prev = CurrentToken->getPreviousNonComment();
979       if (Prev && Prev->is(tok::r_square) && Prev->is(TT_AttributeSquare) &&
980           CurrentToken->isOneOf(tok::kw_if, tok::kw_switch, tok::kw_case,
981                                 tok::kw_default, tok::kw_for, tok::kw_while) &&
982           mustBreakAfterAttributes(*CurrentToken, Style)) {
983         CurrentToken->MustBreakBefore = true;
984       }
985     }
986     FormatToken *Tok = CurrentToken;
987     next();
988     // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
989     // operators.
990     if (Tok->is(TT_VerilogTableItem))
991       return true;
992     switch (Tok->Tok.getKind()) {
993     case tok::plus:
994     case tok::minus:
995       if (!Tok->Previous && Line.MustBeDeclaration)
996         Tok->setType(TT_ObjCMethodSpecifier);
997       break;
998     case tok::colon:
999       if (!Tok->Previous)
1000         return false;
1001       // Goto labels and case labels are already identified in
1002       // UnwrappedLineParser.
1003       if (Tok->isTypeFinalized())
1004         break;
1005       // Colons from ?: are handled in parseConditional().
1006       if (Style.isJavaScript()) {
1007         if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
1008             (Contexts.size() == 1 &&               // switch/case labels
1009              !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
1010             Contexts.back().ContextKind == tok::l_paren ||  // function params
1011             Contexts.back().ContextKind == tok::l_square || // array type
1012             (!Contexts.back().IsExpression &&
1013              Contexts.back().ContextKind == tok::l_brace) || // object type
1014             (Contexts.size() == 1 &&
1015              Line.MustBeDeclaration)) { // method/property declaration
1016           Contexts.back().IsExpression = false;
1017           Tok->setType(TT_JsTypeColon);
1018           break;
1019         }
1020       } else if (Style.isCSharp()) {
1021         if (Contexts.back().InCSharpAttributeSpecifier) {
1022           Tok->setType(TT_AttributeColon);
1023           break;
1024         }
1025         if (Contexts.back().ContextKind == tok::l_paren) {
1026           Tok->setType(TT_CSharpNamedArgumentColon);
1027           break;
1028         }
1029       } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
1030         // The distribution weight operators are labeled
1031         // TT_BinaryOperator by the lexer.
1032         if (Keywords.isVerilogEnd(*Tok->Previous) ||
1033             Keywords.isVerilogBegin(*Tok->Previous)) {
1034           Tok->setType(TT_VerilogBlockLabelColon);
1035         } else if (Contexts.back().ContextKind == tok::l_square) {
1036           Tok->setType(TT_BitFieldColon);
1037         } else if (Contexts.back().ColonIsDictLiteral) {
1038           Tok->setType(TT_DictLiteral);
1039         } else if (Contexts.size() == 1) {
1040           // In Verilog a case label doesn't have the case keyword. We
1041           // assume a colon following an expression is a case label.
1042           // Colons from ?: are annotated in parseConditional().
1043           Tok->setType(TT_CaseLabelColon);
1044           if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
1045             --Line.Level;
1046         }
1047         break;
1048       }
1049       if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
1050           Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
1051           Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
1052         Tok->setType(TT_ModulePartitionColon);
1053       } else if (Contexts.back().ColonIsDictLiteral || Style.isProto()) {
1054         Tok->setType(TT_DictLiteral);
1055         if (Style.Language == FormatStyle::LK_TextProto) {
1056           if (FormatToken *Previous = Tok->getPreviousNonComment())
1057             Previous->setType(TT_SelectorName);
1058         }
1059       } else if (Contexts.back().ColonIsObjCMethodExpr ||
1060                  Line.startsWith(TT_ObjCMethodSpecifier)) {
1061         Tok->setType(TT_ObjCMethodExpr);
1062         const FormatToken *BeforePrevious = Tok->Previous->Previous;
1063         // Ensure we tag all identifiers in method declarations as
1064         // TT_SelectorName.
1065         bool UnknownIdentifierInMethodDeclaration =
1066             Line.startsWith(TT_ObjCMethodSpecifier) &&
1067             Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1068         if (!BeforePrevious ||
1069             // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1070             !(BeforePrevious->is(TT_CastRParen) ||
1071               (BeforePrevious->is(TT_ObjCMethodExpr) &&
1072                BeforePrevious->is(tok::colon))) ||
1073             BeforePrevious->is(tok::r_square) ||
1074             Contexts.back().LongestObjCSelectorName == 0 ||
1075             UnknownIdentifierInMethodDeclaration) {
1076           Tok->Previous->setType(TT_SelectorName);
1077           if (!Contexts.back().FirstObjCSelectorName) {
1078             Contexts.back().FirstObjCSelectorName = Tok->Previous;
1079           } else if (Tok->Previous->ColumnWidth >
1080                      Contexts.back().LongestObjCSelectorName) {
1081             Contexts.back().LongestObjCSelectorName =
1082                 Tok->Previous->ColumnWidth;
1083           }
1084           Tok->Previous->ParameterIndex =
1085               Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1086           ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1087         }
1088       } else if (Contexts.back().ColonIsForRangeExpr) {
1089         Tok->setType(TT_RangeBasedForLoopColon);
1090       } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1091         Tok->setType(TT_GenericSelectionColon);
1092       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1093         Tok->setType(TT_BitFieldColon);
1094       } else if (Contexts.size() == 1 &&
1095                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1096                                       tok::kw_default)) {
1097         FormatToken *Prev = Tok->getPreviousNonComment();
1098         if (!Prev)
1099           break;
1100         if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1101             Prev->ClosesRequiresClause) {
1102           Tok->setType(TT_CtorInitializerColon);
1103         } else if (Prev->is(tok::kw_try)) {
1104           // Member initializer list within function try block.
1105           FormatToken *PrevPrev = Prev->getPreviousNonComment();
1106           if (!PrevPrev)
1107             break;
1108           if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1109             Tok->setType(TT_CtorInitializerColon);
1110         } else {
1111           Tok->setType(TT_InheritanceColon);
1112         }
1113       } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1114                  (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1115                   (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1116                    Tok->Next->Next->is(tok::colon)))) {
1117         // This handles a special macro in ObjC code where selectors including
1118         // the colon are passed as macro arguments.
1119         Tok->setType(TT_ObjCMethodExpr);
1120       } else if (Contexts.back().ContextKind == tok::l_paren &&
1121                  !Line.InPragmaDirective) {
1122         Tok->setType(TT_InlineASMColon);
1123       }
1124       break;
1125     case tok::pipe:
1126     case tok::amp:
1127       // | and & in declarations/type expressions represent union and
1128       // intersection types, respectively.
1129       if (Style.isJavaScript() && !Contexts.back().IsExpression)
1130         Tok->setType(TT_JsTypeOperator);
1131       break;
1132     case tok::kw_if:
1133       if (CurrentToken &&
1134           CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1135         next();
1136       }
1137       [[fallthrough]];
1138     case tok::kw_while:
1139       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1140         next();
1141         if (!parseParens(/*LookForDecls=*/true))
1142           return false;
1143       }
1144       break;
1145     case tok::kw_for:
1146       if (Style.isJavaScript()) {
1147         // x.for and {for: ...}
1148         if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1149             (Tok->Next && Tok->Next->is(tok::colon))) {
1150           break;
1151         }
1152         // JS' for await ( ...
1153         if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1154           next();
1155       }
1156       if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await))
1157         next();
1158       Contexts.back().ColonIsForRangeExpr = true;
1159       if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1160         return false;
1161       next();
1162       if (!parseParens())
1163         return false;
1164       break;
1165     case tok::l_paren:
1166       // When faced with 'operator()()', the kw_operator handler incorrectly
1167       // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1168       // the first two parens OverloadedOperators and the second l_paren an
1169       // OverloadedOperatorLParen.
1170       if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1171           Tok->Previous->MatchingParen &&
1172           Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1173         Tok->Previous->setType(TT_OverloadedOperator);
1174         Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1175         Tok->setType(TT_OverloadedOperatorLParen);
1176       }
1177 
1178       if (Style.isVerilog()) {
1179         // Identify the parameter list and port list in a module instantiation.
1180         // This is still needed when we already have
1181         // UnwrappedLineParser::parseVerilogHierarchyHeader because that
1182         // function is only responsible for the definition, not the
1183         // instantiation.
1184         auto IsInstancePort = [&]() {
1185           const FormatToken *Prev = Tok->getPreviousNonComment();
1186           const FormatToken *PrevPrev;
1187           // In the following example all 4 left parentheses will be treated as
1188           // 'TT_VerilogInstancePortLParen'.
1189           //
1190           //   module_x instance_1(port_1); // Case A.
1191           //   module_x #(parameter_1)      // Case B.
1192           //       instance_2(port_1),      // Case C.
1193           //       instance_3(port_1);      // Case D.
1194           if (!Prev || !(PrevPrev = Prev->getPreviousNonComment()))
1195             return false;
1196           // Case A.
1197           if (Keywords.isVerilogIdentifier(*Prev) &&
1198               Keywords.isVerilogIdentifier(*PrevPrev)) {
1199             return true;
1200           }
1201           // Case B.
1202           if (Prev->is(Keywords.kw_verilogHash) &&
1203               Keywords.isVerilogIdentifier(*PrevPrev)) {
1204             return true;
1205           }
1206           // Case C.
1207           if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::r_paren))
1208             return true;
1209           // Case D.
1210           if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::comma)) {
1211             const FormatToken *PrevParen = PrevPrev->getPreviousNonComment();
1212             if (PrevParen->is(tok::r_paren) && PrevParen->MatchingParen &&
1213                 PrevParen->MatchingParen->is(TT_VerilogInstancePortLParen)) {
1214               return true;
1215             }
1216           }
1217           return false;
1218         };
1219 
1220         if (IsInstancePort())
1221           Tok->setFinalizedType(TT_VerilogInstancePortLParen);
1222       }
1223 
1224       if (!parseParens())
1225         return false;
1226       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1227           !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1228           !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
1229         if (const auto *Previous = Tok->Previous;
1230             !Previous ||
1231             (!Previous->isAttribute() &&
1232              !Previous->isOneOf(TT_RequiresClause, TT_LeadingJavaAnnotation))) {
1233           Line.MightBeFunctionDecl = true;
1234         }
1235       }
1236       break;
1237     case tok::l_square:
1238       if (!parseSquare())
1239         return false;
1240       break;
1241     case tok::l_brace:
1242       if (Style.Language == FormatStyle::LK_TextProto) {
1243         FormatToken *Previous = Tok->getPreviousNonComment();
1244         if (Previous && Previous->getType() != TT_DictLiteral)
1245           Previous->setType(TT_SelectorName);
1246       }
1247       Scopes.push_back(getScopeType(*Tok));
1248       if (!parseBrace())
1249         return false;
1250       break;
1251     case tok::less:
1252       if (parseAngle()) {
1253         Tok->setType(TT_TemplateOpener);
1254         // In TT_Proto, we must distignuish between:
1255         //   map<key, value>
1256         //   msg < item: data >
1257         //   msg: < item: data >
1258         // In TT_TextProto, map<key, value> does not occur.
1259         if (Style.Language == FormatStyle::LK_TextProto ||
1260             (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1261              Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1262           Tok->setType(TT_DictLiteral);
1263           FormatToken *Previous = Tok->getPreviousNonComment();
1264           if (Previous && Previous->getType() != TT_DictLiteral)
1265             Previous->setType(TT_SelectorName);
1266         }
1267       } else {
1268         Tok->setType(TT_BinaryOperator);
1269         NonTemplateLess.insert(Tok);
1270         CurrentToken = Tok;
1271         next();
1272       }
1273       break;
1274     case tok::r_paren:
1275     case tok::r_square:
1276       return false;
1277     case tok::r_brace:
1278       // Don't pop scope when encountering unbalanced r_brace.
1279       if (!Scopes.empty())
1280         Scopes.pop_back();
1281       // Lines can start with '}'.
1282       if (Tok->Previous)
1283         return false;
1284       break;
1285     case tok::greater:
1286       if (Style.Language != FormatStyle::LK_TextProto)
1287         Tok->setType(TT_BinaryOperator);
1288       if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1289         Tok->SpacesRequiredBefore = 1;
1290       break;
1291     case tok::kw_operator:
1292       if (Style.isProto())
1293         break;
1294       while (CurrentToken &&
1295              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1296         if (CurrentToken->isOneOf(tok::star, tok::amp))
1297           CurrentToken->setType(TT_PointerOrReference);
1298         auto Next = CurrentToken->getNextNonComment();
1299         if (!Next)
1300           break;
1301         if (Next->is(tok::less))
1302           next();
1303         else
1304           consumeToken();
1305         if (!CurrentToken)
1306           break;
1307         auto Previous = CurrentToken->getPreviousNonComment();
1308         assert(Previous);
1309         if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator))
1310           break;
1311         if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1312                               tok::star, tok::arrow, tok::amp, tok::ampamp) ||
1313             // User defined literal.
1314             Previous->TokenText.starts_with("\"\"")) {
1315           Previous->setType(TT_OverloadedOperator);
1316           if (CurrentToken->isOneOf(tok::less, tok::greater))
1317             break;
1318         }
1319       }
1320       if (CurrentToken && CurrentToken->is(tok::l_paren))
1321         CurrentToken->setType(TT_OverloadedOperatorLParen);
1322       if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1323         CurrentToken->Previous->setType(TT_OverloadedOperator);
1324       break;
1325     case tok::question:
1326       if (Style.isJavaScript() && Tok->Next &&
1327           Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1328                              tok::r_brace, tok::r_square)) {
1329         // Question marks before semicolons, colons, etc. indicate optional
1330         // types (fields, parameters), e.g.
1331         //   function(x?: string, y?) {...}
1332         //   class X { y?; }
1333         Tok->setType(TT_JsTypeOptionalQuestion);
1334         break;
1335       }
1336       // Declarations cannot be conditional expressions, this can only be part
1337       // of a type declaration.
1338       if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1339           Style.isJavaScript()) {
1340         break;
1341       }
1342       if (Style.isCSharp()) {
1343         // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1344         // nullable types.
1345 
1346         // `Type?)`, `Type?>`, `Type? name;`
1347         if (Tok->Next &&
1348             (Tok->Next->startsSequence(tok::question, tok::r_paren) ||
1349              Tok->Next->startsSequence(tok::question, tok::greater) ||
1350              Tok->Next->startsSequence(tok::question, tok::identifier,
1351                                        tok::semi))) {
1352           Tok->setType(TT_CSharpNullable);
1353           break;
1354         }
1355 
1356         // `Type? name =`
1357         if (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1358             Tok->Next->Next->is(tok::equal)) {
1359           Tok->setType(TT_CSharpNullable);
1360           break;
1361         }
1362 
1363         // Line.MustBeDeclaration will be true for `Type? name;`.
1364         // But not
1365         // cond ? "A" : "B";
1366         // cond ? id : "B";
1367         // cond ? cond2 ? "A" : "B" : "C";
1368         if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
1369             (!Tok->Next ||
1370              !Tok->Next->isOneOf(tok::identifier, tok::string_literal) ||
1371              !Tok->Next->Next ||
1372              !Tok->Next->Next->isOneOf(tok::colon, tok::question))) {
1373           Tok->setType(TT_CSharpNullable);
1374           break;
1375         }
1376       }
1377       parseConditional();
1378       break;
1379     case tok::kw_template:
1380       parseTemplateDeclaration();
1381       break;
1382     case tok::comma:
1383       switch (Contexts.back().ContextType) {
1384       case Context::CtorInitializer:
1385         Tok->setType(TT_CtorInitializerComma);
1386         break;
1387       case Context::InheritanceList:
1388         Tok->setType(TT_InheritanceComma);
1389         break;
1390       case Context::VerilogInstancePortList:
1391         Tok->setFinalizedType(TT_VerilogInstancePortComma);
1392         break;
1393       default:
1394         if (Style.isVerilog() && Contexts.size() == 1 &&
1395             Line.startsWith(Keywords.kw_assign)) {
1396           Tok->setFinalizedType(TT_VerilogAssignComma);
1397         } else if (Contexts.back().FirstStartOfName &&
1398                    (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1399           Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1400           Line.IsMultiVariableDeclStmt = true;
1401         }
1402         break;
1403       }
1404       if (Contexts.back().ContextType == Context::ForEachMacro)
1405         Contexts.back().IsExpression = true;
1406       break;
1407     case tok::kw_default:
1408       // Unindent case labels.
1409       if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1410           (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1411         --Line.Level;
1412       }
1413       break;
1414     case tok::identifier:
1415       if (Tok->isOneOf(Keywords.kw___has_include,
1416                        Keywords.kw___has_include_next)) {
1417         parseHasInclude();
1418       }
1419       if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1420           Tok->Next->isNot(tok::l_paren)) {
1421         Tok->setType(TT_CSharpGenericTypeConstraint);
1422         parseCSharpGenericTypeConstraint();
1423         if (!Tok->getPreviousNonComment())
1424           Line.IsContinuation = true;
1425       }
1426       break;
1427     case tok::arrow:
1428       if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept))
1429         Tok->setType(TT_TrailingReturnArrow);
1430       break;
1431     default:
1432       break;
1433     }
1434     return true;
1435   }
1436 
parseCSharpGenericTypeConstraint()1437   void parseCSharpGenericTypeConstraint() {
1438     int OpenAngleBracketsCount = 0;
1439     while (CurrentToken) {
1440       if (CurrentToken->is(tok::less)) {
1441         // parseAngle is too greedy and will consume the whole line.
1442         CurrentToken->setType(TT_TemplateOpener);
1443         ++OpenAngleBracketsCount;
1444         next();
1445       } else if (CurrentToken->is(tok::greater)) {
1446         CurrentToken->setType(TT_TemplateCloser);
1447         --OpenAngleBracketsCount;
1448         next();
1449       } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1450         // We allow line breaks after GenericTypeConstraintComma's
1451         // so do not flag commas in Generics as GenericTypeConstraintComma's.
1452         CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1453         next();
1454       } else if (CurrentToken->is(Keywords.kw_where)) {
1455         CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1456         next();
1457       } else if (CurrentToken->is(tok::colon)) {
1458         CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1459         next();
1460       } else {
1461         next();
1462       }
1463     }
1464   }
1465 
parseIncludeDirective()1466   void parseIncludeDirective() {
1467     if (CurrentToken && CurrentToken->is(tok::less)) {
1468       next();
1469       while (CurrentToken) {
1470         // Mark tokens up to the trailing line comments as implicit string
1471         // literals.
1472         if (CurrentToken->isNot(tok::comment) &&
1473             !CurrentToken->TokenText.starts_with("//")) {
1474           CurrentToken->setType(TT_ImplicitStringLiteral);
1475         }
1476         next();
1477       }
1478     }
1479   }
1480 
parseWarningOrError()1481   void parseWarningOrError() {
1482     next();
1483     // We still want to format the whitespace left of the first token of the
1484     // warning or error.
1485     next();
1486     while (CurrentToken) {
1487       CurrentToken->setType(TT_ImplicitStringLiteral);
1488       next();
1489     }
1490   }
1491 
parsePragma()1492   void parsePragma() {
1493     next(); // Consume "pragma".
1494     if (CurrentToken &&
1495         CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1496                               Keywords.kw_region)) {
1497       bool IsMarkOrRegion =
1498           CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1499       next();
1500       next(); // Consume first token (so we fix leading whitespace).
1501       while (CurrentToken) {
1502         if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1503           CurrentToken->setType(TT_ImplicitStringLiteral);
1504         next();
1505       }
1506     }
1507   }
1508 
parseHasInclude()1509   void parseHasInclude() {
1510     if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1511       return;
1512     next(); // '('
1513     parseIncludeDirective();
1514     next(); // ')'
1515   }
1516 
parsePreprocessorDirective()1517   LineType parsePreprocessorDirective() {
1518     bool IsFirstToken = CurrentToken->IsFirst;
1519     LineType Type = LT_PreprocessorDirective;
1520     next();
1521     if (!CurrentToken)
1522       return Type;
1523 
1524     if (Style.isJavaScript() && IsFirstToken) {
1525       // JavaScript files can contain shebang lines of the form:
1526       // #!/usr/bin/env node
1527       // Treat these like C++ #include directives.
1528       while (CurrentToken) {
1529         // Tokens cannot be comments here.
1530         CurrentToken->setType(TT_ImplicitStringLiteral);
1531         next();
1532       }
1533       return LT_ImportStatement;
1534     }
1535 
1536     if (CurrentToken->is(tok::numeric_constant)) {
1537       CurrentToken->SpacesRequiredBefore = 1;
1538       return Type;
1539     }
1540     // Hashes in the middle of a line can lead to any strange token
1541     // sequence.
1542     if (!CurrentToken->Tok.getIdentifierInfo())
1543       return Type;
1544     // In Verilog macro expansions start with a backtick just like preprocessor
1545     // directives. Thus we stop if the word is not a preprocessor directive.
1546     if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1547       return LT_Invalid;
1548     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1549     case tok::pp_include:
1550     case tok::pp_include_next:
1551     case tok::pp_import:
1552       next();
1553       parseIncludeDirective();
1554       Type = LT_ImportStatement;
1555       break;
1556     case tok::pp_error:
1557     case tok::pp_warning:
1558       parseWarningOrError();
1559       break;
1560     case tok::pp_pragma:
1561       parsePragma();
1562       break;
1563     case tok::pp_if:
1564     case tok::pp_elif:
1565       Contexts.back().IsExpression = true;
1566       next();
1567       parseLine();
1568       break;
1569     default:
1570       break;
1571     }
1572     while (CurrentToken) {
1573       FormatToken *Tok = CurrentToken;
1574       next();
1575       if (Tok->is(tok::l_paren)) {
1576         parseParens();
1577       } else if (Tok->isOneOf(Keywords.kw___has_include,
1578                               Keywords.kw___has_include_next)) {
1579         parseHasInclude();
1580       }
1581     }
1582     return Type;
1583   }
1584 
1585 public:
parseLine()1586   LineType parseLine() {
1587     if (!CurrentToken)
1588       return LT_Invalid;
1589     NonTemplateLess.clear();
1590     if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1591       // We were not yet allowed to use C++17 optional when this was being
1592       // written. So we used LT_Invalid to mark that the line is not a
1593       // preprocessor directive.
1594       auto Type = parsePreprocessorDirective();
1595       if (Type != LT_Invalid)
1596         return Type;
1597     }
1598 
1599     // Directly allow to 'import <string-literal>' to support protocol buffer
1600     // definitions (github.com/google/protobuf) or missing "#" (either way we
1601     // should not break the line).
1602     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1603     if ((Style.Language == FormatStyle::LK_Java &&
1604          CurrentToken->is(Keywords.kw_package)) ||
1605         (!Style.isVerilog() && Info &&
1606          Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1607          CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1608                                      tok::kw_static))) {
1609       next();
1610       parseIncludeDirective();
1611       return LT_ImportStatement;
1612     }
1613 
1614     // If this line starts and ends in '<' and '>', respectively, it is likely
1615     // part of "#define <a/b.h>".
1616     if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1617       parseIncludeDirective();
1618       return LT_ImportStatement;
1619     }
1620 
1621     // In .proto files, top-level options and package statements are very
1622     // similar to import statements and should not be line-wrapped.
1623     if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1624         CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1625       next();
1626       if (CurrentToken && CurrentToken->is(tok::identifier)) {
1627         while (CurrentToken)
1628           next();
1629         return LT_ImportStatement;
1630       }
1631     }
1632 
1633     bool KeywordVirtualFound = false;
1634     bool ImportStatement = false;
1635 
1636     // import {...} from '...';
1637     if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1638       ImportStatement = true;
1639 
1640     while (CurrentToken) {
1641       if (CurrentToken->is(tok::kw_virtual))
1642         KeywordVirtualFound = true;
1643       if (Style.isJavaScript()) {
1644         // export {...} from '...';
1645         // An export followed by "from 'some string';" is a re-export from
1646         // another module identified by a URI and is treated as a
1647         // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1648         // Just "export {...};" or "export class ..." should not be treated as
1649         // an import in this sense.
1650         if (Line.First->is(tok::kw_export) &&
1651             CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1652             CurrentToken->Next->isStringLiteral()) {
1653           ImportStatement = true;
1654         }
1655         if (isClosureImportStatement(*CurrentToken))
1656           ImportStatement = true;
1657       }
1658       if (!consumeToken())
1659         return LT_Invalid;
1660     }
1661     if (KeywordVirtualFound)
1662       return LT_VirtualFunctionDecl;
1663     if (ImportStatement)
1664       return LT_ImportStatement;
1665 
1666     if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1667       if (Contexts.back().FirstObjCSelectorName) {
1668         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1669             Contexts.back().LongestObjCSelectorName;
1670       }
1671       return LT_ObjCMethodDecl;
1672     }
1673 
1674     for (const auto &ctx : Contexts)
1675       if (ctx.ContextType == Context::StructArrayInitializer)
1676         return LT_ArrayOfStructInitializer;
1677 
1678     return LT_Other;
1679   }
1680 
1681 private:
isClosureImportStatement(const FormatToken & Tok)1682   bool isClosureImportStatement(const FormatToken &Tok) {
1683     // FIXME: Closure-library specific stuff should not be hard-coded but be
1684     // configurable.
1685     return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1686            Tok.Next->Next &&
1687            (Tok.Next->Next->TokenText == "module" ||
1688             Tok.Next->Next->TokenText == "provide" ||
1689             Tok.Next->Next->TokenText == "require" ||
1690             Tok.Next->Next->TokenText == "requireType" ||
1691             Tok.Next->Next->TokenText == "forwardDeclare") &&
1692            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1693   }
1694 
resetTokenMetadata()1695   void resetTokenMetadata() {
1696     if (!CurrentToken)
1697       return;
1698 
1699     // Reset token type in case we have already looked at it and then
1700     // recovered from an error (e.g. failure to find the matching >).
1701     if (!CurrentToken->isTypeFinalized() &&
1702         !CurrentToken->isOneOf(
1703             TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
1704             TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
1705             TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
1706             TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral,
1707             TT_TemplateString, TT_ObjCStringLiteral, TT_UntouchableMacroFunc,
1708             TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro,
1709             TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace, TT_StructLBrace,
1710             TT_UnionLBrace, TT_RequiresClause,
1711             TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
1712             TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
1713             TT_BracedListLBrace)) {
1714       CurrentToken->setType(TT_Unknown);
1715     }
1716     CurrentToken->Role.reset();
1717     CurrentToken->MatchingParen = nullptr;
1718     CurrentToken->FakeLParens.clear();
1719     CurrentToken->FakeRParens = 0;
1720   }
1721 
next()1722   void next() {
1723     if (!CurrentToken)
1724       return;
1725 
1726     CurrentToken->NestingLevel = Contexts.size() - 1;
1727     CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1728     modifyContext(*CurrentToken);
1729     determineTokenType(*CurrentToken);
1730     CurrentToken = CurrentToken->Next;
1731 
1732     resetTokenMetadata();
1733   }
1734 
1735   /// A struct to hold information valid in a specific context, e.g.
1736   /// a pair of parenthesis.
1737   struct Context {
Contextclang::format::__anonf599c8510111::AnnotatingParser::Context1738     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1739             bool IsExpression)
1740         : ContextKind(ContextKind), BindingStrength(BindingStrength),
1741           IsExpression(IsExpression) {}
1742 
1743     tok::TokenKind ContextKind;
1744     unsigned BindingStrength;
1745     bool IsExpression;
1746     unsigned LongestObjCSelectorName = 0;
1747     bool ColonIsForRangeExpr = false;
1748     bool ColonIsDictLiteral = false;
1749     bool ColonIsObjCMethodExpr = false;
1750     FormatToken *FirstObjCSelectorName = nullptr;
1751     FormatToken *FirstStartOfName = nullptr;
1752     bool CanBeExpression = true;
1753     bool CaretFound = false;
1754     bool InCpp11AttributeSpecifier = false;
1755     bool InCSharpAttributeSpecifier = false;
1756     bool VerilogAssignmentFound = false;
1757     // Whether the braces may mean concatenation instead of structure or array
1758     // literal.
1759     bool VerilogMayBeConcatenation = false;
1760     enum {
1761       Unknown,
1762       // Like the part after `:` in a constructor.
1763       //   Context(...) : IsExpression(IsExpression)
1764       CtorInitializer,
1765       // Like in the parentheses in a foreach.
1766       ForEachMacro,
1767       // Like the inheritance list in a class declaration.
1768       //   class Input : public IO
1769       InheritanceList,
1770       // Like in the braced list.
1771       //   int x[] = {};
1772       StructArrayInitializer,
1773       // Like in `static_cast<int>`.
1774       TemplateArgument,
1775       // C11 _Generic selection.
1776       C11GenericSelection,
1777       // Like in the outer parentheses in `ffnand ff1(.q());`.
1778       VerilogInstancePortList,
1779     } ContextType = Unknown;
1780   };
1781 
1782   /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1783   /// of each instance.
1784   struct ScopedContextCreator {
1785     AnnotatingParser &P;
1786 
ScopedContextCreatorclang::format::__anonf599c8510111::AnnotatingParser::ScopedContextCreator1787     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1788                          unsigned Increase)
1789         : P(P) {
1790       P.Contexts.push_back(Context(ContextKind,
1791                                    P.Contexts.back().BindingStrength + Increase,
1792                                    P.Contexts.back().IsExpression));
1793     }
1794 
~ScopedContextCreatorclang::format::__anonf599c8510111::AnnotatingParser::ScopedContextCreator1795     ~ScopedContextCreator() {
1796       if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1797         if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
1798           P.Contexts.pop_back();
1799           P.Contexts.back().ContextType = Context::StructArrayInitializer;
1800           return;
1801         }
1802       }
1803       P.Contexts.pop_back();
1804     }
1805   };
1806 
modifyContext(const FormatToken & Current)1807   void modifyContext(const FormatToken &Current) {
1808     auto AssignmentStartsExpression = [&]() {
1809       if (Current.getPrecedence() != prec::Assignment)
1810         return false;
1811 
1812       if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
1813         return false;
1814       if (Line.First->is(tok::kw_template)) {
1815         assert(Current.Previous);
1816         if (Current.Previous->is(tok::kw_operator)) {
1817           // `template ... operator=` cannot be an expression.
1818           return false;
1819         }
1820 
1821         // `template` keyword can start a variable template.
1822         const FormatToken *Tok = Line.First->getNextNonComment();
1823         assert(Tok); // Current token is on the same line.
1824         if (Tok->isNot(TT_TemplateOpener)) {
1825           // Explicit template instantiations do not have `<>`.
1826           return false;
1827         }
1828 
1829         // This is the default value of a template parameter, determine if it's
1830         // type or non-type.
1831         if (Contexts.back().ContextKind == tok::less) {
1832           assert(Current.Previous->Previous);
1833           return !Current.Previous->Previous->isOneOf(tok::kw_typename,
1834                                                       tok::kw_class);
1835         }
1836 
1837         Tok = Tok->MatchingParen;
1838         if (!Tok)
1839           return false;
1840         Tok = Tok->getNextNonComment();
1841         if (!Tok)
1842           return false;
1843 
1844         if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
1845                          tok::kw_using)) {
1846           return false;
1847         }
1848 
1849         return true;
1850       }
1851 
1852       // Type aliases use `type X = ...;` in TypeScript and can be exported
1853       // using `export type ...`.
1854       if (Style.isJavaScript() &&
1855           (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1856            Line.startsWith(tok::kw_export, Keywords.kw_type,
1857                            tok::identifier))) {
1858         return false;
1859       }
1860 
1861       return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
1862     };
1863 
1864     if (AssignmentStartsExpression()) {
1865       Contexts.back().IsExpression = true;
1866       if (!Line.startsWith(TT_UnaryOperator)) {
1867         for (FormatToken *Previous = Current.Previous;
1868              Previous && Previous->Previous &&
1869              !Previous->Previous->isOneOf(tok::comma, tok::semi);
1870              Previous = Previous->Previous) {
1871           if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
1872             Previous = Previous->MatchingParen;
1873             if (!Previous)
1874               break;
1875           }
1876           if (Previous->opensScope())
1877             break;
1878           if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1879               Previous->isPointerOrReference() && Previous->Previous &&
1880               Previous->Previous->isNot(tok::equal)) {
1881             Previous->setType(TT_PointerOrReference);
1882           }
1883         }
1884       }
1885     } else if (Current.is(tok::lessless) &&
1886                (!Current.Previous ||
1887                 Current.Previous->isNot(tok::kw_operator))) {
1888       Contexts.back().IsExpression = true;
1889     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1890       Contexts.back().IsExpression = true;
1891     } else if (Current.is(TT_TrailingReturnArrow)) {
1892       Contexts.back().IsExpression = false;
1893     } else if (Current.is(Keywords.kw_assert)) {
1894       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1895     } else if (Current.Previous &&
1896                Current.Previous->is(TT_CtorInitializerColon)) {
1897       Contexts.back().IsExpression = true;
1898       Contexts.back().ContextType = Context::CtorInitializer;
1899     } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1900       Contexts.back().ContextType = Context::InheritanceList;
1901     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1902       for (FormatToken *Previous = Current.Previous;
1903            Previous && Previous->isOneOf(tok::star, tok::amp);
1904            Previous = Previous->Previous) {
1905         Previous->setType(TT_PointerOrReference);
1906       }
1907       if (Line.MustBeDeclaration &&
1908           Contexts.front().ContextType != Context::CtorInitializer) {
1909         Contexts.back().IsExpression = false;
1910       }
1911     } else if (Current.is(tok::kw_new)) {
1912       Contexts.back().CanBeExpression = false;
1913     } else if (Current.is(tok::semi) ||
1914                (Current.is(tok::exclaim) && Current.Previous &&
1915                 Current.Previous->isNot(tok::kw_operator))) {
1916       // This should be the condition or increment in a for-loop.
1917       // But not operator !() (can't use TT_OverloadedOperator here as its not
1918       // been annotated yet).
1919       Contexts.back().IsExpression = true;
1920     }
1921   }
1922 
untilMatchingParen(FormatToken * Current)1923   static FormatToken *untilMatchingParen(FormatToken *Current) {
1924     // Used when `MatchingParen` is not yet established.
1925     int ParenLevel = 0;
1926     while (Current) {
1927       if (Current->is(tok::l_paren))
1928         ++ParenLevel;
1929       if (Current->is(tok::r_paren))
1930         --ParenLevel;
1931       if (ParenLevel < 1)
1932         break;
1933       Current = Current->Next;
1934     }
1935     return Current;
1936   }
1937 
isDeductionGuide(FormatToken & Current)1938   static bool isDeductionGuide(FormatToken &Current) {
1939     // Look for a deduction guide template<T> A(...) -> A<...>;
1940     if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1941         Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1942       // Find the TemplateCloser.
1943       FormatToken *TemplateCloser = Current.Next->Next;
1944       int NestingLevel = 0;
1945       while (TemplateCloser) {
1946         // Skip over an expressions in parens  A<(3 < 2)>;
1947         if (TemplateCloser->is(tok::l_paren)) {
1948           // No Matching Paren yet so skip to matching paren
1949           TemplateCloser = untilMatchingParen(TemplateCloser);
1950           if (!TemplateCloser)
1951             break;
1952         }
1953         if (TemplateCloser->is(tok::less))
1954           ++NestingLevel;
1955         if (TemplateCloser->is(tok::greater))
1956           --NestingLevel;
1957         if (NestingLevel < 1)
1958           break;
1959         TemplateCloser = TemplateCloser->Next;
1960       }
1961       // Assuming we have found the end of the template ensure its followed
1962       // with a semi-colon.
1963       if (TemplateCloser && TemplateCloser->Next &&
1964           TemplateCloser->Next->is(tok::semi) &&
1965           Current.Previous->MatchingParen) {
1966         // Determine if the identifier `A` prior to the A<..>; is the same as
1967         // prior to the A(..)
1968         FormatToken *LeadingIdentifier =
1969             Current.Previous->MatchingParen->Previous;
1970 
1971         return LeadingIdentifier &&
1972                LeadingIdentifier->TokenText == Current.Next->TokenText;
1973       }
1974     }
1975     return false;
1976   }
1977 
determineTokenType(FormatToken & Current)1978   void determineTokenType(FormatToken &Current) {
1979     if (Current.isNot(TT_Unknown)) {
1980       // The token type is already known.
1981       return;
1982     }
1983 
1984     if ((Style.isJavaScript() || Style.isCSharp()) &&
1985         Current.is(tok::exclaim)) {
1986       if (Current.Previous) {
1987         bool IsIdentifier =
1988             Style.isJavaScript()
1989                 ? Keywords.IsJavaScriptIdentifier(
1990                       *Current.Previous, /* AcceptIdentifierName= */ true)
1991                 : Current.Previous->is(tok::identifier);
1992         if (IsIdentifier ||
1993             Current.Previous->isOneOf(
1994                 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
1995                 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
1996                 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
1997             Current.Previous->Tok.isLiteral()) {
1998           Current.setType(TT_NonNullAssertion);
1999           return;
2000         }
2001       }
2002       if (Current.Next &&
2003           Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
2004         Current.setType(TT_NonNullAssertion);
2005         return;
2006       }
2007     }
2008 
2009     // Line.MightBeFunctionDecl can only be true after the parentheses of a
2010     // function declaration have been found. In this case, 'Current' is a
2011     // trailing token of this declaration and thus cannot be a name.
2012     if (Current.is(Keywords.kw_instanceof)) {
2013       Current.setType(TT_BinaryOperator);
2014     } else if (isStartOfName(Current) &&
2015                (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
2016       Contexts.back().FirstStartOfName = &Current;
2017       Current.setType(TT_StartOfName);
2018     } else if (Current.is(tok::semi)) {
2019       // Reset FirstStartOfName after finding a semicolon so that a for loop
2020       // with multiple increment statements is not confused with a for loop
2021       // having multiple variable declarations.
2022       Contexts.back().FirstStartOfName = nullptr;
2023     } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
2024       AutoFound = true;
2025     } else if (Current.is(tok::arrow) &&
2026                Style.Language == FormatStyle::LK_Java) {
2027       Current.setType(TT_TrailingReturnArrow);
2028     } else if (Current.is(tok::arrow) && Style.isVerilog()) {
2029       // The implication operator.
2030       Current.setType(TT_BinaryOperator);
2031     } else if (Current.is(tok::arrow) && AutoFound &&
2032                Line.MightBeFunctionDecl && Current.NestingLevel == 0 &&
2033                !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
2034       // not auto operator->() -> xxx;
2035       Current.setType(TT_TrailingReturnArrow);
2036     } else if (Current.is(tok::arrow) && Current.Previous &&
2037                Current.Previous->is(tok::r_brace)) {
2038       // Concept implicit conversion constraint needs to be treated like
2039       // a trailing return type  ... } -> <type>.
2040       Current.setType(TT_TrailingReturnArrow);
2041     } else if (isDeductionGuide(Current)) {
2042       // Deduction guides trailing arrow " A(...) -> A<T>;".
2043       Current.setType(TT_TrailingReturnArrow);
2044     } else if (Current.isPointerOrReference()) {
2045       Current.setType(determineStarAmpUsage(
2046           Current,
2047           Contexts.back().CanBeExpression && Contexts.back().IsExpression,
2048           Contexts.back().ContextType == Context::TemplateArgument));
2049     } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
2050                (Style.isVerilog() && Current.is(tok::pipe))) {
2051       Current.setType(determinePlusMinusCaretUsage(Current));
2052       if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
2053         Contexts.back().CaretFound = true;
2054     } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
2055       Current.setType(determineIncrementUsage(Current));
2056     } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
2057       Current.setType(TT_UnaryOperator);
2058     } else if (Current.is(tok::question)) {
2059       if (Style.isJavaScript() && Line.MustBeDeclaration &&
2060           !Contexts.back().IsExpression) {
2061         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
2062         // on the interface, not a ternary expression.
2063         Current.setType(TT_JsTypeOptionalQuestion);
2064       } else {
2065         Current.setType(TT_ConditionalExpr);
2066       }
2067     } else if (Current.isBinaryOperator() &&
2068                (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
2069                (Current.isNot(tok::greater) &&
2070                 Style.Language != FormatStyle::LK_TextProto)) {
2071       if (Style.isVerilog()) {
2072         if (Current.is(tok::lessequal) && Contexts.size() == 1 &&
2073             !Contexts.back().VerilogAssignmentFound) {
2074           // In Verilog `<=` is assignment if in its own statement. It is a
2075           // statement instead of an expression, that is it can not be chained.
2076           Current.ForcedPrecedence = prec::Assignment;
2077           Current.setFinalizedType(TT_BinaryOperator);
2078         }
2079         if (Current.getPrecedence() == prec::Assignment)
2080           Contexts.back().VerilogAssignmentFound = true;
2081       }
2082       Current.setType(TT_BinaryOperator);
2083     } else if (Current.is(tok::comment)) {
2084       if (Current.TokenText.starts_with("/*")) {
2085         if (Current.TokenText.ends_with("*/")) {
2086           Current.setType(TT_BlockComment);
2087         } else {
2088           // The lexer has for some reason determined a comment here. But we
2089           // cannot really handle it, if it isn't properly terminated.
2090           Current.Tok.setKind(tok::unknown);
2091         }
2092       } else {
2093         Current.setType(TT_LineComment);
2094       }
2095     } else if (Current.is(tok::string_literal)) {
2096       if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
2097           Current.getPreviousNonComment() &&
2098           Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
2099           Current.getNextNonComment() &&
2100           Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
2101         Current.setType(TT_StringInConcatenation);
2102       }
2103     } else if (Current.is(tok::l_paren)) {
2104       if (lParenStartsCppCast(Current))
2105         Current.setType(TT_CppCastLParen);
2106     } else if (Current.is(tok::r_paren)) {
2107       if (rParenEndsCast(Current))
2108         Current.setType(TT_CastRParen);
2109       if (Current.MatchingParen && Current.Next &&
2110           !Current.Next->isBinaryOperator() &&
2111           !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
2112                                  tok::comma, tok::period, tok::arrow,
2113                                  tok::coloncolon, tok::kw_noexcept)) {
2114         if (FormatToken *AfterParen = Current.MatchingParen->Next;
2115             AfterParen && AfterParen->isNot(tok::caret)) {
2116           // Make sure this isn't the return type of an Obj-C block declaration.
2117           if (FormatToken *BeforeParen = Current.MatchingParen->Previous;
2118               BeforeParen && BeforeParen->is(tok::identifier) &&
2119               BeforeParen->isNot(TT_TypenameMacro) &&
2120               BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
2121               (!BeforeParen->Previous ||
2122                BeforeParen->Previous->ClosesTemplateDeclaration ||
2123                BeforeParen->Previous->ClosesRequiresClause)) {
2124             Current.setType(TT_FunctionAnnotationRParen);
2125           }
2126         }
2127       }
2128     } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
2129                Style.Language != FormatStyle::LK_Java) {
2130       // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
2131       // marks declarations and properties that need special formatting.
2132       switch (Current.Next->Tok.getObjCKeywordID()) {
2133       case tok::objc_interface:
2134       case tok::objc_implementation:
2135       case tok::objc_protocol:
2136         Current.setType(TT_ObjCDecl);
2137         break;
2138       case tok::objc_property:
2139         Current.setType(TT_ObjCProperty);
2140         break;
2141       default:
2142         break;
2143       }
2144     } else if (Current.is(tok::period)) {
2145       FormatToken *PreviousNoComment = Current.getPreviousNonComment();
2146       if (PreviousNoComment &&
2147           PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
2148         Current.setType(TT_DesignatedInitializerPeriod);
2149       } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
2150                  Current.Previous->isOneOf(TT_JavaAnnotation,
2151                                            TT_LeadingJavaAnnotation)) {
2152         Current.setType(Current.Previous->getType());
2153       }
2154     } else if (canBeObjCSelectorComponent(Current) &&
2155                // FIXME(bug 36976): ObjC return types shouldn't use
2156                // TT_CastRParen.
2157                Current.Previous && Current.Previous->is(TT_CastRParen) &&
2158                Current.Previous->MatchingParen &&
2159                Current.Previous->MatchingParen->Previous &&
2160                Current.Previous->MatchingParen->Previous->is(
2161                    TT_ObjCMethodSpecifier)) {
2162       // This is the first part of an Objective-C selector name. (If there's no
2163       // colon after this, this is the only place which annotates the identifier
2164       // as a selector.)
2165       Current.setType(TT_SelectorName);
2166     } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2167                                tok::kw_requires) &&
2168                Current.Previous &&
2169                !Current.Previous->isOneOf(tok::equal, tok::at,
2170                                           TT_CtorInitializerComma,
2171                                           TT_CtorInitializerColon) &&
2172                Line.MightBeFunctionDecl && Contexts.size() == 1) {
2173       // Line.MightBeFunctionDecl can only be true after the parentheses of a
2174       // function declaration have been found.
2175       Current.setType(TT_TrailingAnnotation);
2176     } else if ((Style.Language == FormatStyle::LK_Java ||
2177                 Style.isJavaScript()) &&
2178                Current.Previous) {
2179       if (Current.Previous->is(tok::at) &&
2180           Current.isNot(Keywords.kw_interface)) {
2181         const FormatToken &AtToken = *Current.Previous;
2182         const FormatToken *Previous = AtToken.getPreviousNonComment();
2183         if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2184           Current.setType(TT_LeadingJavaAnnotation);
2185         else
2186           Current.setType(TT_JavaAnnotation);
2187       } else if (Current.Previous->is(tok::period) &&
2188                  Current.Previous->isOneOf(TT_JavaAnnotation,
2189                                            TT_LeadingJavaAnnotation)) {
2190         Current.setType(Current.Previous->getType());
2191       }
2192     }
2193   }
2194 
2195   /// Take a guess at whether \p Tok starts a name of a function or
2196   /// variable declaration.
2197   ///
2198   /// This is a heuristic based on whether \p Tok is an identifier following
2199   /// something that is likely a type.
isStartOfName(const FormatToken & Tok)2200   bool isStartOfName(const FormatToken &Tok) {
2201     // Handled in ExpressionParser for Verilog.
2202     if (Style.isVerilog())
2203       return false;
2204 
2205     if (Tok.isNot(tok::identifier) || !Tok.Previous)
2206       return false;
2207 
2208     if (const auto *NextNonComment = Tok.getNextNonComment();
2209         (!NextNonComment && !Line.InMacroBody) ||
2210         (NextNonComment &&
2211          (NextNonComment->isPointerOrReference() ||
2212           NextNonComment->is(tok::string_literal) ||
2213           (Line.InPragmaDirective && NextNonComment->is(tok::identifier))))) {
2214       return false;
2215     }
2216 
2217     if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2218                               Keywords.kw_as)) {
2219       return false;
2220     }
2221     if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2222       return false;
2223 
2224     // Skip "const" as it does not have an influence on whether this is a name.
2225     FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2226 
2227     // For javascript const can be like "let" or "var"
2228     if (!Style.isJavaScript())
2229       while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2230         PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2231 
2232     if (!PreviousNotConst)
2233       return false;
2234 
2235     if (PreviousNotConst->ClosesRequiresClause)
2236       return false;
2237 
2238     if (Style.isTableGen()) {
2239       // keywords such as let and def* defines names.
2240       if (Keywords.isTableGenDefinition(*PreviousNotConst))
2241         return true;
2242     }
2243 
2244     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2245                        PreviousNotConst->Previous &&
2246                        PreviousNotConst->Previous->is(tok::hash);
2247 
2248     if (PreviousNotConst->is(TT_TemplateCloser)) {
2249       return PreviousNotConst && PreviousNotConst->MatchingParen &&
2250              PreviousNotConst->MatchingParen->Previous &&
2251              PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2252              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2253     }
2254 
2255     if ((PreviousNotConst->is(tok::r_paren) &&
2256          PreviousNotConst->is(TT_TypeDeclarationParen)) ||
2257         PreviousNotConst->is(TT_AttributeRParen)) {
2258       return true;
2259     }
2260 
2261     // If is a preprocess keyword like #define.
2262     if (IsPPKeyword)
2263       return false;
2264 
2265     // int a or auto a.
2266     if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto))
2267       return true;
2268 
2269     // *a or &a or &&a.
2270     if (PreviousNotConst->is(TT_PointerOrReference))
2271       return true;
2272 
2273     // MyClass a;
2274     if (PreviousNotConst->isSimpleTypeSpecifier())
2275       return true;
2276 
2277     // type[] a in Java
2278     if (Style.Language == FormatStyle::LK_Java &&
2279         PreviousNotConst->is(tok::r_square)) {
2280       return true;
2281     }
2282 
2283     // const a = in JavaScript.
2284     return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2285   }
2286 
2287   /// Determine whether '(' is starting a C++ cast.
lParenStartsCppCast(const FormatToken & Tok)2288   bool lParenStartsCppCast(const FormatToken &Tok) {
2289     // C-style casts are only used in C++.
2290     if (!Style.isCpp())
2291       return false;
2292 
2293     FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2294     if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2295         LeftOfParens->MatchingParen) {
2296       auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2297       if (Prev &&
2298           Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2299                         tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2300         // FIXME: Maybe we should handle identifiers ending with "_cast",
2301         // e.g. any_cast?
2302         return true;
2303       }
2304     }
2305     return false;
2306   }
2307 
2308   /// Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)2309   bool rParenEndsCast(const FormatToken &Tok) {
2310     // C-style casts are only used in C++, C# and Java.
2311     if (!Style.isCSharp() && !Style.isCpp() &&
2312         Style.Language != FormatStyle::LK_Java) {
2313       return false;
2314     }
2315 
2316     // Empty parens aren't casts and there are no casts at the end of the line.
2317     if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
2318       return false;
2319 
2320     if (Tok.MatchingParen->is(TT_OverloadedOperatorLParen))
2321       return false;
2322 
2323     FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
2324     if (LeftOfParens) {
2325       // If there is a closing parenthesis left of the current
2326       // parentheses, look past it as these might be chained casts.
2327       if (LeftOfParens->is(tok::r_paren) &&
2328           LeftOfParens->isNot(TT_CastRParen)) {
2329         if (!LeftOfParens->MatchingParen ||
2330             !LeftOfParens->MatchingParen->Previous) {
2331           return false;
2332         }
2333         LeftOfParens = LeftOfParens->MatchingParen->Previous;
2334       }
2335 
2336       if (LeftOfParens->is(tok::r_square)) {
2337         //   delete[] (void *)ptr;
2338         auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2339           if (Tok->isNot(tok::r_square))
2340             return nullptr;
2341 
2342           Tok = Tok->getPreviousNonComment();
2343           if (!Tok || Tok->isNot(tok::l_square))
2344             return nullptr;
2345 
2346           Tok = Tok->getPreviousNonComment();
2347           if (!Tok || Tok->isNot(tok::kw_delete))
2348             return nullptr;
2349           return Tok;
2350         };
2351         if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2352           LeftOfParens = MaybeDelete;
2353       }
2354 
2355       // The Condition directly below this one will see the operator arguments
2356       // as a (void *foo) cast.
2357       //   void operator delete(void *foo) ATTRIB;
2358       if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2359           LeftOfParens->Previous->is(tok::kw_operator)) {
2360         return false;
2361       }
2362 
2363       // If there is an identifier (or with a few exceptions a keyword) right
2364       // before the parentheses, this is unlikely to be a cast.
2365       if (LeftOfParens->Tok.getIdentifierInfo() &&
2366           !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2367                                  tok::kw_delete, tok::kw_throw)) {
2368         return false;
2369       }
2370 
2371       // Certain other tokens right before the parentheses are also signals that
2372       // this cannot be a cast.
2373       if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2374                                 TT_TemplateCloser, tok::ellipsis)) {
2375         return false;
2376       }
2377     }
2378 
2379     if (Tok.Next->isOneOf(tok::question, tok::ampamp))
2380       return false;
2381 
2382     // `foreach((A a, B b) in someList)` should not be seen as a cast.
2383     if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
2384       return false;
2385 
2386     // Functions which end with decorations like volatile, noexcept are unlikely
2387     // to be casts.
2388     if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2389                           tok::kw_requires, tok::kw_throw, tok::arrow,
2390                           Keywords.kw_override, Keywords.kw_final) ||
2391         isCppAttribute(Style.isCpp(), *Tok.Next)) {
2392       return false;
2393     }
2394 
2395     // As Java has no function types, a "(" after the ")" likely means that this
2396     // is a cast.
2397     if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
2398       return true;
2399 
2400     // If a (non-string) literal follows, this is likely a cast.
2401     if (Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof) ||
2402         (Tok.Next->Tok.isLiteral() && Tok.Next->isNot(tok::string_literal))) {
2403       return true;
2404     }
2405 
2406     // Heuristically try to determine whether the parentheses contain a type.
2407     auto IsQualifiedPointerOrReference = [](FormatToken *T) {
2408       // This is used to handle cases such as x = (foo *const)&y;
2409       assert(!T->isSimpleTypeSpecifier() && "Should have already been checked");
2410       // Strip trailing qualifiers such as const or volatile when checking
2411       // whether the parens could be a cast to a pointer/reference type.
2412       while (T) {
2413         if (T->is(TT_AttributeRParen)) {
2414           // Handle `x = (foo *__attribute__((foo)))&v;`:
2415           assert(T->is(tok::r_paren));
2416           assert(T->MatchingParen);
2417           assert(T->MatchingParen->is(tok::l_paren));
2418           assert(T->MatchingParen->is(TT_AttributeLParen));
2419           if (const auto *Tok = T->MatchingParen->Previous;
2420               Tok && Tok->isAttribute()) {
2421             T = Tok->Previous;
2422             continue;
2423           }
2424         } else if (T->is(TT_AttributeSquare)) {
2425           // Handle `x = (foo *[[clang::foo]])&v;`:
2426           if (T->MatchingParen && T->MatchingParen->Previous) {
2427             T = T->MatchingParen->Previous;
2428             continue;
2429           }
2430         } else if (T->canBePointerOrReferenceQualifier()) {
2431           T = T->Previous;
2432           continue;
2433         }
2434         break;
2435       }
2436       return T && T->is(TT_PointerOrReference);
2437     };
2438     bool ParensAreType =
2439         !Tok.Previous ||
2440         Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2441         Tok.Previous->isSimpleTypeSpecifier() ||
2442         IsQualifiedPointerOrReference(Tok.Previous);
2443     bool ParensCouldEndDecl =
2444         Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2445     if (ParensAreType && !ParensCouldEndDecl)
2446       return true;
2447 
2448     // At this point, we heuristically assume that there are no casts at the
2449     // start of the line. We assume that we have found most cases where there
2450     // are by the logic above, e.g. "(void)x;".
2451     if (!LeftOfParens)
2452       return false;
2453 
2454     // Certain token types inside the parentheses mean that this can't be a
2455     // cast.
2456     for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
2457          Token = Token->Next) {
2458       if (Token->is(TT_BinaryOperator))
2459         return false;
2460     }
2461 
2462     // If the following token is an identifier or 'this', this is a cast. All
2463     // cases where this can be something else are handled above.
2464     if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
2465       return true;
2466 
2467     // Look for a cast `( x ) (`.
2468     if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
2469       if (Tok.Previous->is(tok::identifier) &&
2470           Tok.Previous->Previous->is(tok::l_paren)) {
2471         return true;
2472       }
2473     }
2474 
2475     if (!Tok.Next->Next)
2476       return false;
2477 
2478     // If the next token after the parenthesis is a unary operator, assume
2479     // that this is cast, unless there are unexpected tokens inside the
2480     // parenthesis.
2481     const bool NextIsAmpOrStar = Tok.Next->isOneOf(tok::amp, tok::star);
2482     if (!(Tok.Next->isUnaryOperator() || NextIsAmpOrStar) ||
2483         Tok.Next->is(tok::plus) ||
2484         !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2485       return false;
2486     }
2487     if (NextIsAmpOrStar &&
2488         (Tok.Next->Next->is(tok::numeric_constant) || Line.InPPDirective)) {
2489       return false;
2490     }
2491     if (Line.InPPDirective && Tok.Next->is(tok::minus))
2492       return false;
2493     // Search for unexpected tokens.
2494     for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
2495          Prev = Prev->Previous) {
2496       if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2497         return false;
2498     }
2499     return true;
2500   }
2501 
2502   /// Returns true if the token is used as a unary operator.
determineUnaryOperatorByUsage(const FormatToken & Tok)2503   bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2504     const FormatToken *PrevToken = Tok.getPreviousNonComment();
2505     if (!PrevToken)
2506       return true;
2507 
2508     // These keywords are deliberately not included here because they may
2509     // precede only one of unary star/amp and plus/minus but not both.  They are
2510     // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2511     //
2512     // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2513     //   know how they can be followed by a star or amp.
2514     if (PrevToken->isOneOf(
2515             TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2516             tok::equal, tok::question, tok::l_square, tok::l_brace,
2517             tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2518             tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2519       return true;
2520     }
2521 
2522     // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2523     // where the unary `+` operator is overloaded, it is reasonable to write
2524     // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2525     if (PrevToken->is(tok::kw_sizeof))
2526       return true;
2527 
2528     // A sequence of leading unary operators.
2529     if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2530       return true;
2531 
2532     // There can't be two consecutive binary operators.
2533     if (PrevToken->is(TT_BinaryOperator))
2534       return true;
2535 
2536     return false;
2537   }
2538 
2539   /// Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)2540   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2541                                   bool InTemplateArgument) {
2542     if (Style.isJavaScript())
2543       return TT_BinaryOperator;
2544 
2545     // && in C# must be a binary operator.
2546     if (Style.isCSharp() && Tok.is(tok::ampamp))
2547       return TT_BinaryOperator;
2548 
2549     if (Style.isVerilog()) {
2550       // In Verilog, `*` can only be a binary operator.  `&` can be either unary
2551       // or binary.  `*` also includes `*>` in module path declarations in
2552       // specify blocks because merged tokens take the type of the first one by
2553       // default.
2554       if (Tok.is(tok::star))
2555         return TT_BinaryOperator;
2556       return determineUnaryOperatorByUsage(Tok) ? TT_UnaryOperator
2557                                                 : TT_BinaryOperator;
2558     }
2559 
2560     const FormatToken *PrevToken = Tok.getPreviousNonComment();
2561     if (!PrevToken)
2562       return TT_UnaryOperator;
2563     if (PrevToken->is(TT_TypeName))
2564       return TT_PointerOrReference;
2565 
2566     const FormatToken *NextToken = Tok.getNextNonComment();
2567 
2568     if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2569       return TT_BinaryOperator;
2570 
2571     if (!NextToken ||
2572         NextToken->isOneOf(tok::arrow, tok::equal, tok::comma, tok::r_paren,
2573                            TT_RequiresClause) ||
2574         (NextToken->is(tok::kw_noexcept) && !IsExpression) ||
2575         NextToken->canBePointerOrReferenceQualifier() ||
2576         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2577       return TT_PointerOrReference;
2578     }
2579 
2580     if (PrevToken->is(tok::coloncolon))
2581       return TT_PointerOrReference;
2582 
2583     if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2584       return TT_PointerOrReference;
2585 
2586     if (determineUnaryOperatorByUsage(Tok))
2587       return TT_UnaryOperator;
2588 
2589     if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2590       return TT_PointerOrReference;
2591     if (NextToken->is(tok::kw_operator) && !IsExpression)
2592       return TT_PointerOrReference;
2593     if (NextToken->isOneOf(tok::comma, tok::semi))
2594       return TT_PointerOrReference;
2595 
2596     // After right braces, star tokens are likely to be pointers to struct,
2597     // union, or class.
2598     //   struct {} *ptr;
2599     // This by itself is not sufficient to distinguish from multiplication
2600     // following a brace-initialized expression, as in:
2601     // int i = int{42} * 2;
2602     // In the struct case, the part of the struct declaration until the `{` and
2603     // the `}` are put on separate unwrapped lines; in the brace-initialized
2604     // case, the matching `{` is on the same unwrapped line, so check for the
2605     // presence of the matching brace to distinguish between those.
2606     if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
2607         !PrevToken->MatchingParen) {
2608       return TT_PointerOrReference;
2609     }
2610 
2611     if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
2612       return TT_UnaryOperator;
2613 
2614     if (PrevToken->Tok.isLiteral() ||
2615         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2616                            tok::kw_false, tok::r_brace)) {
2617       return TT_BinaryOperator;
2618     }
2619 
2620     const FormatToken *NextNonParen = NextToken;
2621     while (NextNonParen && NextNonParen->is(tok::l_paren))
2622       NextNonParen = NextNonParen->getNextNonComment();
2623     if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
2624                          NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
2625                          NextNonParen->isUnaryOperator())) {
2626       return TT_BinaryOperator;
2627     }
2628 
2629     // If we know we're in a template argument, there are no named declarations.
2630     // Thus, having an identifier on the right-hand side indicates a binary
2631     // operator.
2632     if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
2633       return TT_BinaryOperator;
2634 
2635     // "&&" followed by "(", "*", or "&" is quite unlikely to be two successive
2636     // unary "&".
2637     if (Tok.is(tok::ampamp) &&
2638         NextToken->isOneOf(tok::l_paren, tok::star, tok::amp)) {
2639       return TT_BinaryOperator;
2640     }
2641 
2642     // This catches some cases where evaluation order is used as control flow:
2643     //   aaa && aaa->f();
2644     if (NextToken->Tok.isAnyIdentifier()) {
2645       const FormatToken *NextNextToken = NextToken->getNextNonComment();
2646       if (NextNextToken && NextNextToken->is(tok::arrow))
2647         return TT_BinaryOperator;
2648     }
2649 
2650     // It is very unlikely that we are going to find a pointer or reference type
2651     // definition on the RHS of an assignment.
2652     if (IsExpression && !Contexts.back().CaretFound)
2653       return TT_BinaryOperator;
2654 
2655     // Opeartors at class scope are likely pointer or reference members.
2656     if (!Scopes.empty() && Scopes.back() == ST_Class)
2657       return TT_PointerOrReference;
2658 
2659     // Tokens that indicate member access or chained operator& use.
2660     auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
2661       return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
2662                                       tok::arrowstar, tok::periodstar);
2663     };
2664 
2665     // It's more likely that & represents operator& than an uninitialized
2666     // reference.
2667     if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
2668         IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
2669         NextToken && NextToken->Tok.isAnyIdentifier()) {
2670       if (auto NextNext = NextToken->getNextNonComment();
2671           NextNext &&
2672           (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
2673         return TT_BinaryOperator;
2674       }
2675     }
2676 
2677     return TT_PointerOrReference;
2678   }
2679 
determinePlusMinusCaretUsage(const FormatToken & Tok)2680   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
2681     if (determineUnaryOperatorByUsage(Tok))
2682       return TT_UnaryOperator;
2683 
2684     const FormatToken *PrevToken = Tok.getPreviousNonComment();
2685     if (!PrevToken)
2686       return TT_UnaryOperator;
2687 
2688     if (PrevToken->is(tok::at))
2689       return TT_UnaryOperator;
2690 
2691     // Fall back to marking the token as binary operator.
2692     return TT_BinaryOperator;
2693   }
2694 
2695   /// Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)2696   TokenType determineIncrementUsage(const FormatToken &Tok) {
2697     const FormatToken *PrevToken = Tok.getPreviousNonComment();
2698     if (!PrevToken || PrevToken->is(TT_CastRParen))
2699       return TT_UnaryOperator;
2700     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2701       return TT_TrailingUnaryOperator;
2702 
2703     return TT_UnaryOperator;
2704   }
2705 
2706   SmallVector<Context, 8> Contexts;
2707 
2708   const FormatStyle &Style;
2709   AnnotatedLine &Line;
2710   FormatToken *CurrentToken;
2711   bool AutoFound;
2712   const AdditionalKeywords &Keywords;
2713 
2714   SmallVector<ScopeType> &Scopes;
2715 
2716   // Set of "<" tokens that do not open a template parameter list. If parseAngle
2717   // determines that a specific token can't be a template opener, it will make
2718   // same decision irrespective of the decisions for tokens leading up to it.
2719   // Store this information to prevent this from causing exponential runtime.
2720   llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2721 };
2722 
2723 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2724 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2725 
2726 /// Parses binary expressions by inserting fake parenthesis based on
2727 /// operator precedence.
2728 class ExpressionParser {
2729 public:
ExpressionParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,AnnotatedLine & Line)2730   ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2731                    AnnotatedLine &Line)
2732       : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
2733 
2734   /// Parse expressions with the given operator precedence.
parse(int Precedence=0)2735   void parse(int Precedence = 0) {
2736     // Skip 'return' and ObjC selector colons as they are not part of a binary
2737     // expression.
2738     while (Current && (Current->is(tok::kw_return) ||
2739                        (Current->is(tok::colon) &&
2740                         Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
2741       next();
2742     }
2743 
2744     if (!Current || Precedence > PrecedenceArrowAndPeriod)
2745       return;
2746 
2747     // Conditional expressions need to be parsed separately for proper nesting.
2748     if (Precedence == prec::Conditional) {
2749       parseConditionalExpr();
2750       return;
2751     }
2752 
2753     // Parse unary operators, which all have a higher precedence than binary
2754     // operators.
2755     if (Precedence == PrecedenceUnaryOperator) {
2756       parseUnaryOperator();
2757       return;
2758     }
2759 
2760     FormatToken *Start = Current;
2761     FormatToken *LatestOperator = nullptr;
2762     unsigned OperatorIndex = 0;
2763     // The first name of the current type in a port list.
2764     FormatToken *VerilogFirstOfType = nullptr;
2765 
2766     while (Current) {
2767       // In Verilog ports in a module header that don't have a type take the
2768       // type of the previous one.  For example,
2769       //   module a(output b,
2770       //                   c,
2771       //            output d);
2772       // In this case there need to be fake parentheses around b and c.
2773       if (Style.isVerilog() && Precedence == prec::Comma) {
2774         VerilogFirstOfType =
2775             verilogGroupDecl(VerilogFirstOfType, LatestOperator);
2776       }
2777 
2778       // Consume operators with higher precedence.
2779       parse(Precedence + 1);
2780 
2781       int CurrentPrecedence = getCurrentPrecedence();
2782 
2783       if (Precedence == CurrentPrecedence && Current &&
2784           Current->is(TT_SelectorName)) {
2785         if (LatestOperator)
2786           addFakeParenthesis(Start, prec::Level(Precedence));
2787         Start = Current;
2788       }
2789 
2790       if ((Style.isCSharp() || Style.isJavaScript() ||
2791            Style.Language == FormatStyle::LK_Java) &&
2792           Precedence == prec::Additive && Current) {
2793         // A string can be broken without parentheses around it when it is
2794         // already in a sequence of strings joined by `+` signs.
2795         FormatToken *Prev = Current->getPreviousNonComment();
2796         if (Prev && Prev->is(tok::string_literal) &&
2797             (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
2798                                                  TT_StringInConcatenation))) {
2799           Prev->setType(TT_StringInConcatenation);
2800         }
2801       }
2802 
2803       // At the end of the line or when an operator with lower precedence is
2804       // found, insert fake parenthesis and return.
2805       if (!Current ||
2806           (Current->closesScope() &&
2807            (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2808           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2809           (CurrentPrecedence == prec::Conditional &&
2810            Precedence == prec::Assignment && Current->is(tok::colon))) {
2811         break;
2812       }
2813 
2814       // Consume scopes: (), [], <> and {}
2815       // In addition to that we handle require clauses as scope, so that the
2816       // constraints in that are correctly indented.
2817       if (Current->opensScope() ||
2818           Current->isOneOf(TT_RequiresClause,
2819                            TT_RequiresClauseInARequiresExpression)) {
2820         // In fragment of a JavaScript template string can look like '}..${' and
2821         // thus close a scope and open a new one at the same time.
2822         while (Current && (!Current->closesScope() || Current->opensScope())) {
2823           next();
2824           parse();
2825         }
2826         next();
2827       } else {
2828         // Operator found.
2829         if (CurrentPrecedence == Precedence) {
2830           if (LatestOperator)
2831             LatestOperator->NextOperator = Current;
2832           LatestOperator = Current;
2833           Current->OperatorIndex = OperatorIndex;
2834           ++OperatorIndex;
2835         }
2836         next(/*SkipPastLeadingComments=*/Precedence > 0);
2837       }
2838     }
2839 
2840     // Group variables of the same type.
2841     if (Style.isVerilog() && Precedence == prec::Comma && VerilogFirstOfType)
2842       addFakeParenthesis(VerilogFirstOfType, prec::Comma);
2843 
2844     if (LatestOperator && (Current || Precedence > 0)) {
2845       // The requires clauses do not neccessarily end in a semicolon or a brace,
2846       // but just go over to struct/class or a function declaration, we need to
2847       // intervene so that the fake right paren is inserted correctly.
2848       auto End =
2849           (Start->Previous &&
2850            Start->Previous->isOneOf(TT_RequiresClause,
2851                                     TT_RequiresClauseInARequiresExpression))
2852               ? [this]() {
2853                   auto Ret = Current ? Current : Line.Last;
2854                   while (!Ret->ClosesRequiresClause && Ret->Previous)
2855                     Ret = Ret->Previous;
2856                   return Ret;
2857                 }()
2858               : nullptr;
2859 
2860       if (Precedence == PrecedenceArrowAndPeriod) {
2861         // Call expressions don't have a binary operator precedence.
2862         addFakeParenthesis(Start, prec::Unknown, End);
2863       } else {
2864         addFakeParenthesis(Start, prec::Level(Precedence), End);
2865       }
2866     }
2867   }
2868 
2869 private:
2870   /// Gets the precedence (+1) of the given token for binary operators
2871   /// and other tokens that we treat like binary operators.
getCurrentPrecedence()2872   int getCurrentPrecedence() {
2873     if (Current) {
2874       const FormatToken *NextNonComment = Current->getNextNonComment();
2875       if (Current->is(TT_ConditionalExpr))
2876         return prec::Conditional;
2877       if (NextNonComment && Current->is(TT_SelectorName) &&
2878           (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2879            (Style.isProto() && NextNonComment->is(tok::less)))) {
2880         return prec::Assignment;
2881       }
2882       if (Current->is(TT_JsComputedPropertyName))
2883         return prec::Assignment;
2884       if (Current->is(TT_TrailingReturnArrow))
2885         return prec::Comma;
2886       if (Current->is(TT_FatArrow))
2887         return prec::Assignment;
2888       if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2889           (Current->is(tok::comment) && NextNonComment &&
2890            NextNonComment->is(TT_SelectorName))) {
2891         return 0;
2892       }
2893       if (Current->is(TT_RangeBasedForLoopColon))
2894         return prec::Comma;
2895       if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2896           Current->is(Keywords.kw_instanceof)) {
2897         return prec::Relational;
2898       }
2899       if (Style.isJavaScript() &&
2900           Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
2901         return prec::Relational;
2902       }
2903       if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2904         return Current->getPrecedence();
2905       if (Current->isOneOf(tok::period, tok::arrow) &&
2906           Current->isNot(TT_TrailingReturnArrow)) {
2907         return PrecedenceArrowAndPeriod;
2908       }
2909       if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2910           Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2911                            Keywords.kw_throws)) {
2912         return 0;
2913       }
2914       // In Verilog case labels are not on separate lines straight out of
2915       // UnwrappedLineParser. The colon is not part of an expression.
2916       if (Style.isVerilog() && Current->is(tok::colon))
2917         return 0;
2918     }
2919     return -1;
2920   }
2921 
addFakeParenthesis(FormatToken * Start,prec::Level Precedence,FormatToken * End=nullptr)2922   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
2923                           FormatToken *End = nullptr) {
2924     // Do not assign fake parenthesis to tokens that are part of an
2925     // unexpanded macro call. The line within the macro call contains
2926     // the parenthesis and commas, and we will not find operators within
2927     // that structure.
2928     if (Start->MacroParent)
2929       return;
2930 
2931     Start->FakeLParens.push_back(Precedence);
2932     if (Precedence > prec::Unknown)
2933       Start->StartsBinaryExpression = true;
2934     if (!End && Current)
2935       End = Current->getPreviousNonComment();
2936     if (End) {
2937       ++End->FakeRParens;
2938       if (Precedence > prec::Unknown)
2939         End->EndsBinaryExpression = true;
2940     }
2941   }
2942 
2943   /// Parse unary operator expressions and surround them with fake
2944   /// parentheses if appropriate.
parseUnaryOperator()2945   void parseUnaryOperator() {
2946     llvm::SmallVector<FormatToken *, 2> Tokens;
2947     while (Current && Current->is(TT_UnaryOperator)) {
2948       Tokens.push_back(Current);
2949       next();
2950     }
2951     parse(PrecedenceArrowAndPeriod);
2952     for (FormatToken *Token : llvm::reverse(Tokens)) {
2953       // The actual precedence doesn't matter.
2954       addFakeParenthesis(Token, prec::Unknown);
2955     }
2956   }
2957 
parseConditionalExpr()2958   void parseConditionalExpr() {
2959     while (Current && Current->isTrailingComment())
2960       next();
2961     FormatToken *Start = Current;
2962     parse(prec::LogicalOr);
2963     if (!Current || Current->isNot(tok::question))
2964       return;
2965     next();
2966     parse(prec::Assignment);
2967     if (!Current || Current->isNot(TT_ConditionalExpr))
2968       return;
2969     next();
2970     parse(prec::Assignment);
2971     addFakeParenthesis(Start, prec::Conditional);
2972   }
2973 
next(bool SkipPastLeadingComments=true)2974   void next(bool SkipPastLeadingComments = true) {
2975     if (Current)
2976       Current = Current->Next;
2977     while (Current &&
2978            (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2979            Current->isTrailingComment()) {
2980       Current = Current->Next;
2981     }
2982   }
2983 
2984   // Add fake parenthesis around declarations of the same type for example in a
2985   // module prototype. Return the first port / variable of the current type.
verilogGroupDecl(FormatToken * FirstOfType,FormatToken * PreviousComma)2986   FormatToken *verilogGroupDecl(FormatToken *FirstOfType,
2987                                 FormatToken *PreviousComma) {
2988     if (!Current)
2989       return nullptr;
2990 
2991     FormatToken *Start = Current;
2992 
2993     // Skip attributes.
2994     while (Start->startsSequence(tok::l_paren, tok::star)) {
2995       if (!(Start = Start->MatchingParen) ||
2996           !(Start = Start->getNextNonComment())) {
2997         return nullptr;
2998       }
2999     }
3000 
3001     FormatToken *Tok = Start;
3002 
3003     if (Tok->is(Keywords.kw_assign))
3004       Tok = Tok->getNextNonComment();
3005 
3006     // Skip any type qualifiers to find the first identifier. It may be either a
3007     // new type name or a variable name. There can be several type qualifiers
3008     // preceding a variable name, and we can not tell them apart by looking at
3009     // the word alone since a macro can be defined as either a type qualifier or
3010     // a variable name. Thus we use the last word before the dimensions instead
3011     // of the first word as the candidate for the variable or type name.
3012     FormatToken *First = nullptr;
3013     while (Tok) {
3014       FormatToken *Next = Tok->getNextNonComment();
3015 
3016       if (Tok->is(tok::hash)) {
3017         // Start of a macro expansion.
3018         First = Tok;
3019         Tok = Next;
3020         if (Tok)
3021           Tok = Tok->getNextNonComment();
3022       } else if (Tok->is(tok::hashhash)) {
3023         // Concatenation. Skip.
3024         Tok = Next;
3025         if (Tok)
3026           Tok = Tok->getNextNonComment();
3027       } else if (Keywords.isVerilogQualifier(*Tok) ||
3028                  Keywords.isVerilogIdentifier(*Tok)) {
3029         First = Tok;
3030         Tok = Next;
3031         // The name may have dots like `interface_foo.modport_foo`.
3032         while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) &&
3033                (Tok = Tok->getNextNonComment())) {
3034           if (Keywords.isVerilogIdentifier(*Tok))
3035             Tok = Tok->getNextNonComment();
3036         }
3037       } else if (!Next) {
3038         Tok = nullptr;
3039       } else if (Tok->is(tok::l_paren)) {
3040         // Make sure the parenthesized list is a drive strength. Otherwise the
3041         // statement may be a module instantiation in which case we have already
3042         // found the instance name.
3043         if (Next->isOneOf(
3044                 Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large,
3045                 Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1,
3046                 Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1,
3047                 Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0,
3048                 Keywords.kw_weak1)) {
3049           Tok->setType(TT_VerilogStrength);
3050           Tok = Tok->MatchingParen;
3051           if (Tok) {
3052             Tok->setType(TT_VerilogStrength);
3053             Tok = Tok->getNextNonComment();
3054           }
3055         } else {
3056           break;
3057         }
3058       } else if (Tok->is(tok::hash)) {
3059         if (Next->is(tok::l_paren))
3060           Next = Next->MatchingParen;
3061         if (Next)
3062           Tok = Next->getNextNonComment();
3063       } else {
3064         break;
3065       }
3066     }
3067 
3068     // Find the second identifier. If it exists it will be the name.
3069     FormatToken *Second = nullptr;
3070     // Dimensions.
3071     while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen))
3072       Tok = Tok->getNextNonComment();
3073     if (Tok && (Tok->is(tok::hash) || Keywords.isVerilogIdentifier(*Tok)))
3074       Second = Tok;
3075 
3076     // If the second identifier doesn't exist and there are qualifiers, the type
3077     // is implied.
3078     FormatToken *TypedName = nullptr;
3079     if (Second) {
3080       TypedName = Second;
3081       if (First && First->is(TT_Unknown))
3082         First->setType(TT_VerilogDimensionedTypeName);
3083     } else if (First != Start) {
3084       // If 'First' is null, then this isn't a declaration, 'TypedName' gets set
3085       // to null as intended.
3086       TypedName = First;
3087     }
3088 
3089     if (TypedName) {
3090       // This is a declaration with a new type.
3091       if (TypedName->is(TT_Unknown))
3092         TypedName->setType(TT_StartOfName);
3093       // Group variables of the previous type.
3094       if (FirstOfType && PreviousComma) {
3095         PreviousComma->setType(TT_VerilogTypeComma);
3096         addFakeParenthesis(FirstOfType, prec::Comma, PreviousComma->Previous);
3097       }
3098 
3099       FirstOfType = TypedName;
3100 
3101       // Don't let higher precedence handle the qualifiers. For example if we
3102       // have:
3103       //    parameter x = 0
3104       // We skip `parameter` here. This way the fake parentheses for the
3105       // assignment will be around `x = 0`.
3106       while (Current && Current != FirstOfType) {
3107         if (Current->opensScope()) {
3108           next();
3109           parse();
3110         }
3111         next();
3112       }
3113     }
3114 
3115     return FirstOfType;
3116   }
3117 
3118   const FormatStyle &Style;
3119   const AdditionalKeywords &Keywords;
3120   const AnnotatedLine &Line;
3121   FormatToken *Current;
3122 };
3123 
3124 } // end anonymous namespace
3125 
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines) const3126 void TokenAnnotator::setCommentLineLevels(
3127     SmallVectorImpl<AnnotatedLine *> &Lines) const {
3128   const AnnotatedLine *NextNonCommentLine = nullptr;
3129   for (AnnotatedLine *Line : llvm::reverse(Lines)) {
3130     assert(Line->First);
3131 
3132     // If the comment is currently aligned with the line immediately following
3133     // it, that's probably intentional and we should keep it.
3134     if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
3135         Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
3136         NextNonCommentLine->First->OriginalColumn ==
3137             Line->First->OriginalColumn) {
3138       const bool PPDirectiveOrImportStmt =
3139           NextNonCommentLine->Type == LT_PreprocessorDirective ||
3140           NextNonCommentLine->Type == LT_ImportStatement;
3141       if (PPDirectiveOrImportStmt)
3142         Line->Type = LT_CommentAbovePPDirective;
3143       // Align comments for preprocessor lines with the # in column 0 if
3144       // preprocessor lines are not indented. Otherwise, align with the next
3145       // line.
3146       Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
3147                             PPDirectiveOrImportStmt
3148                         ? 0
3149                         : NextNonCommentLine->Level;
3150     } else {
3151       NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
3152     }
3153 
3154     setCommentLineLevels(Line->Children);
3155   }
3156 }
3157 
maxNestingDepth(const AnnotatedLine & Line)3158 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
3159   unsigned Result = 0;
3160   for (const auto *Tok = Line.First; Tok; Tok = Tok->Next)
3161     Result = std::max(Result, Tok->NestingLevel);
3162   return Result;
3163 }
3164 
3165 // Returns the name of a function with no return type, e.g. a constructor or
3166 // destructor.
getFunctionName(const AnnotatedLine & Line)3167 static FormatToken *getFunctionName(const AnnotatedLine &Line) {
3168   for (FormatToken *Tok = Line.getFirstNonComment(), *Name = nullptr; Tok;
3169        Tok = Tok->getNextNonComment()) {
3170     // Skip C++11 attributes both before and after the function name.
3171     if (Tok->is(tok::l_square) && Tok->is(TT_AttributeSquare)) {
3172       Tok = Tok->MatchingParen;
3173       if (!Tok)
3174         break;
3175       continue;
3176     }
3177 
3178     // Make sure the name is followed by a pair of parentheses.
3179     if (Name) {
3180       return Tok->is(tok::l_paren) && Tok->isNot(TT_FunctionTypeLParen) &&
3181                      Tok->MatchingParen
3182                  ? Name
3183                  : nullptr;
3184     }
3185 
3186     // Skip keywords that may precede the constructor/destructor name.
3187     if (Tok->isOneOf(tok::kw_friend, tok::kw_inline, tok::kw_virtual,
3188                      tok::kw_constexpr, tok::kw_consteval, tok::kw_explicit)) {
3189       continue;
3190     }
3191 
3192     // A qualified name may start from the global namespace.
3193     if (Tok->is(tok::coloncolon)) {
3194       Tok = Tok->Next;
3195       if (!Tok)
3196         break;
3197     }
3198 
3199     // Skip to the unqualified part of the name.
3200     while (Tok->startsSequence(tok::identifier, tok::coloncolon)) {
3201       assert(Tok->Next);
3202       Tok = Tok->Next->Next;
3203       if (!Tok)
3204         return nullptr;
3205     }
3206 
3207     // Skip the `~` if a destructor name.
3208     if (Tok->is(tok::tilde)) {
3209       Tok = Tok->Next;
3210       if (!Tok)
3211         break;
3212     }
3213 
3214     // Make sure the name is not already annotated, e.g. as NamespaceMacro.
3215     if (Tok->isNot(tok::identifier) || Tok->isNot(TT_Unknown))
3216       break;
3217 
3218     Name = Tok;
3219   }
3220 
3221   return nullptr;
3222 }
3223 
3224 // Checks if Tok is a constructor/destructor name qualified by its class name.
isCtorOrDtorName(const FormatToken * Tok)3225 static bool isCtorOrDtorName(const FormatToken *Tok) {
3226   assert(Tok && Tok->is(tok::identifier));
3227   const auto *Prev = Tok->Previous;
3228 
3229   if (Prev && Prev->is(tok::tilde))
3230     Prev = Prev->Previous;
3231 
3232   if (!Prev || !Prev->endsSequence(tok::coloncolon, tok::identifier))
3233     return false;
3234 
3235   assert(Prev->Previous);
3236   return Prev->Previous->TokenText == Tok->TokenText;
3237 }
3238 
annotate(AnnotatedLine & Line)3239 void TokenAnnotator::annotate(AnnotatedLine &Line) {
3240   AnnotatingParser Parser(Style, Line, Keywords, Scopes);
3241   Line.Type = Parser.parseLine();
3242 
3243   for (auto &Child : Line.Children)
3244     annotate(*Child);
3245 
3246   // With very deep nesting, ExpressionParser uses lots of stack and the
3247   // formatting algorithm is very slow. We're not going to do a good job here
3248   // anyway - it's probably generated code being formatted by mistake.
3249   // Just skip the whole line.
3250   if (maxNestingDepth(Line) > 50)
3251     Line.Type = LT_Invalid;
3252 
3253   if (Line.Type == LT_Invalid)
3254     return;
3255 
3256   ExpressionParser ExprParser(Style, Keywords, Line);
3257   ExprParser.parse();
3258 
3259   if (Style.isCpp()) {
3260     auto *Tok = getFunctionName(Line);
3261     if (Tok && ((!Scopes.empty() && Scopes.back() == ST_Class) ||
3262                 Line.endsWith(TT_FunctionLBrace) || isCtorOrDtorName(Tok))) {
3263       Tok->setFinalizedType(TT_CtorDtorDeclName);
3264     }
3265   }
3266 
3267   if (Line.startsWith(TT_ObjCMethodSpecifier))
3268     Line.Type = LT_ObjCMethodDecl;
3269   else if (Line.startsWith(TT_ObjCDecl))
3270     Line.Type = LT_ObjCDecl;
3271   else if (Line.startsWith(TT_ObjCProperty))
3272     Line.Type = LT_ObjCProperty;
3273 
3274   auto *First = Line.First;
3275   First->SpacesRequiredBefore = 1;
3276   First->CanBreakBefore = First->MustBreakBefore;
3277 
3278   if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
3279       Style.InsertNewlineAtEOF) {
3280     First->NewlinesBefore = 1;
3281   }
3282 }
3283 
3284 // This function heuristically determines whether 'Current' starts the name of a
3285 // function declaration.
isFunctionDeclarationName(bool IsCpp,const FormatToken & Current,const AnnotatedLine & Line,FormatToken * & ClosingParen)3286 static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
3287                                       const AnnotatedLine &Line,
3288                                       FormatToken *&ClosingParen) {
3289   assert(Current.Previous);
3290 
3291   if (Current.is(TT_FunctionDeclarationName))
3292     return true;
3293 
3294   if (!Current.Tok.getIdentifierInfo())
3295     return false;
3296 
3297   auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
3298     for (; Next; Next = Next->Next) {
3299       if (Next->is(TT_OverloadedOperatorLParen))
3300         return Next;
3301       if (Next->is(TT_OverloadedOperator))
3302         continue;
3303       if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
3304         // For 'new[]' and 'delete[]'.
3305         if (Next->Next &&
3306             Next->Next->startsSequence(tok::l_square, tok::r_square)) {
3307           Next = Next->Next->Next;
3308         }
3309         continue;
3310       }
3311       if (Next->startsSequence(tok::l_square, tok::r_square)) {
3312         // For operator[]().
3313         Next = Next->Next;
3314         continue;
3315       }
3316       if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
3317           Next->Next && Next->Next->isPointerOrReference()) {
3318         // For operator void*(), operator char*(), operator Foo*().
3319         Next = Next->Next;
3320         continue;
3321       }
3322       if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3323         Next = Next->MatchingParen;
3324         continue;
3325       }
3326 
3327       break;
3328     }
3329     return nullptr;
3330   };
3331 
3332   // Find parentheses of parameter list.
3333   const FormatToken *Next = Current.Next;
3334   if (Current.is(tok::kw_operator)) {
3335     const auto *Previous = Current.Previous;
3336     if (Previous->Tok.getIdentifierInfo() &&
3337         !Previous->isOneOf(tok::kw_return, tok::kw_co_return)) {
3338       return true;
3339     }
3340     if (Previous->is(tok::r_paren) && Previous->is(TT_TypeDeclarationParen)) {
3341       assert(Previous->MatchingParen);
3342       assert(Previous->MatchingParen->is(tok::l_paren));
3343       assert(Previous->MatchingParen->is(TT_TypeDeclarationParen));
3344       return true;
3345     }
3346     if (!Previous->isPointerOrReference() && Previous->isNot(TT_TemplateCloser))
3347       return false;
3348     Next = skipOperatorName(Next);
3349   } else {
3350     if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0)
3351       return false;
3352     for (; Next; Next = Next->Next) {
3353       if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3354         Next = Next->MatchingParen;
3355       } else if (Next->is(tok::coloncolon)) {
3356         Next = Next->Next;
3357         if (!Next)
3358           return false;
3359         if (Next->is(tok::kw_operator)) {
3360           Next = skipOperatorName(Next->Next);
3361           break;
3362         }
3363         if (Next->isNot(tok::identifier))
3364           return false;
3365       } else if (isCppAttribute(IsCpp, *Next)) {
3366         Next = Next->MatchingParen;
3367         if (!Next)
3368           return false;
3369       } else if (Next->is(tok::l_paren)) {
3370         break;
3371       } else {
3372         return false;
3373       }
3374     }
3375   }
3376 
3377   // Check whether parameter list can belong to a function declaration.
3378   if (!Next || Next->isNot(tok::l_paren) || !Next->MatchingParen)
3379     return false;
3380   ClosingParen = Next->MatchingParen;
3381   assert(ClosingParen->is(tok::r_paren));
3382   // If the lines ends with "{", this is likely a function definition.
3383   if (Line.Last->is(tok::l_brace))
3384     return true;
3385   if (Next->Next == ClosingParen)
3386     return true; // Empty parentheses.
3387   // If there is an &/&& after the r_paren, this is likely a function.
3388   if (ClosingParen->Next && ClosingParen->Next->is(TT_PointerOrReference))
3389     return true;
3390 
3391   // Check for K&R C function definitions (and C++ function definitions with
3392   // unnamed parameters), e.g.:
3393   //   int f(i)
3394   //   {
3395   //     return i + 1;
3396   //   }
3397   //   bool g(size_t = 0, bool b = false)
3398   //   {
3399   //     return !b;
3400   //   }
3401   if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
3402       !Line.endsWith(tok::semi)) {
3403     return true;
3404   }
3405 
3406   for (const FormatToken *Tok = Next->Next; Tok && Tok != ClosingParen;
3407        Tok = Tok->Next) {
3408     if (Tok->is(TT_TypeDeclarationParen))
3409       return true;
3410     if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
3411       Tok = Tok->MatchingParen;
3412       continue;
3413     }
3414     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
3415         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis,
3416                      TT_TypeName)) {
3417       return true;
3418     }
3419     if (Tok->isOneOf(tok::l_brace, TT_ObjCMethodExpr) || Tok->Tok.isLiteral())
3420       return false;
3421   }
3422   return false;
3423 }
3424 
mustBreakForReturnType(const AnnotatedLine & Line) const3425 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
3426   assert(Line.MightBeFunctionDecl);
3427 
3428   if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
3429        Style.AlwaysBreakAfterReturnType ==
3430            FormatStyle::RTBS_TopLevelDefinitions) &&
3431       Line.Level > 0) {
3432     return false;
3433   }
3434 
3435   switch (Style.AlwaysBreakAfterReturnType) {
3436   case FormatStyle::RTBS_None:
3437     return false;
3438   case FormatStyle::RTBS_All:
3439   case FormatStyle::RTBS_TopLevel:
3440     return true;
3441   case FormatStyle::RTBS_AllDefinitions:
3442   case FormatStyle::RTBS_TopLevelDefinitions:
3443     return Line.mightBeFunctionDefinition();
3444   }
3445 
3446   return false;
3447 }
3448 
calculateFormattingInformation(AnnotatedLine & Line) const3449 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
3450   for (AnnotatedLine *ChildLine : Line.Children)
3451     calculateFormattingInformation(*ChildLine);
3452 
3453   auto *First = Line.First;
3454   First->TotalLength = First->IsMultiline
3455                            ? Style.ColumnLimit
3456                            : Line.FirstStartColumn + First->ColumnWidth;
3457   FormatToken *Current = First->Next;
3458   bool InFunctionDecl = Line.MightBeFunctionDecl;
3459   bool AlignArrayOfStructures =
3460       (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
3461        Line.Type == LT_ArrayOfStructInitializer);
3462   if (AlignArrayOfStructures)
3463     calculateArrayInitializerColumnList(Line);
3464 
3465   const bool IsCpp = Style.isCpp();
3466   bool SeenName = false;
3467   bool LineIsFunctionDeclaration = false;
3468   FormatToken *ClosingParen = nullptr;
3469   FormatToken *AfterLastAttribute = nullptr;
3470 
3471   for (auto *Tok = Current; Tok; Tok = Tok->Next) {
3472     if (Tok->is(TT_StartOfName))
3473       SeenName = true;
3474     if (Tok->Previous->EndsCppAttributeGroup)
3475       AfterLastAttribute = Tok;
3476     if (const bool IsCtorOrDtor = Tok->is(TT_CtorDtorDeclName);
3477         IsCtorOrDtor ||
3478         isFunctionDeclarationName(Style.isCpp(), *Tok, Line, ClosingParen)) {
3479       if (!IsCtorOrDtor)
3480         Tok->setFinalizedType(TT_FunctionDeclarationName);
3481       LineIsFunctionDeclaration = true;
3482       SeenName = true;
3483       break;
3484     }
3485   }
3486 
3487   if (IsCpp && (LineIsFunctionDeclaration || First->is(TT_CtorDtorDeclName)) &&
3488       Line.endsWith(tok::semi, tok::r_brace)) {
3489     auto *Tok = Line.Last->Previous;
3490     while (Tok->isNot(tok::r_brace))
3491       Tok = Tok->Previous;
3492     if (auto *LBrace = Tok->MatchingParen; LBrace) {
3493       assert(LBrace->is(tok::l_brace));
3494       Tok->setBlockKind(BK_Block);
3495       LBrace->setBlockKind(BK_Block);
3496       LBrace->setFinalizedType(TT_FunctionLBrace);
3497     }
3498   }
3499 
3500   if (IsCpp && SeenName && AfterLastAttribute &&
3501       mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
3502     AfterLastAttribute->MustBreakBefore = true;
3503     if (LineIsFunctionDeclaration)
3504       Line.ReturnTypeWrapped = true;
3505   }
3506 
3507   if (IsCpp) {
3508     if (!LineIsFunctionDeclaration) {
3509       // Annotate */&/&& in `operator` function calls as binary operators.
3510       for (const auto *Tok = First; Tok; Tok = Tok->Next) {
3511         if (Tok->isNot(tok::kw_operator))
3512           continue;
3513         do {
3514           Tok = Tok->Next;
3515         } while (Tok && Tok->isNot(TT_OverloadedOperatorLParen));
3516         if (!Tok)
3517           break;
3518         const auto *LeftParen = Tok;
3519         for (Tok = Tok->Next; Tok && Tok != LeftParen->MatchingParen;
3520              Tok = Tok->Next) {
3521           if (Tok->isNot(tok::identifier))
3522             continue;
3523           auto *Next = Tok->Next;
3524           const bool NextIsBinaryOperator =
3525               Next && Next->isPointerOrReference() && Next->Next &&
3526               Next->Next->is(tok::identifier);
3527           if (!NextIsBinaryOperator)
3528             continue;
3529           Next->setType(TT_BinaryOperator);
3530           Tok = Next;
3531         }
3532       }
3533     } else if (ClosingParen) {
3534       for (auto *Tok = ClosingParen->Next; Tok; Tok = Tok->Next) {
3535         if (Tok->is(TT_CtorInitializerColon))
3536           break;
3537         if (Tok->is(tok::arrow)) {
3538           Tok->setType(TT_TrailingReturnArrow);
3539           break;
3540         }
3541         if (Tok->isNot(TT_TrailingAnnotation))
3542           continue;
3543         const auto *Next = Tok->Next;
3544         if (!Next || Next->isNot(tok::l_paren))
3545           continue;
3546         Tok = Next->MatchingParen;
3547         if (!Tok)
3548           break;
3549       }
3550     }
3551   }
3552 
3553   while (Current) {
3554     const FormatToken *Prev = Current->Previous;
3555     if (Current->is(TT_LineComment)) {
3556       if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
3557         Current->SpacesRequiredBefore =
3558             (Style.Cpp11BracedListStyle && !Style.SpacesInParensOptions.Other)
3559                 ? 0
3560                 : 1;
3561       } else if (Prev->is(TT_VerilogMultiLineListLParen)) {
3562         Current->SpacesRequiredBefore = 0;
3563       } else {
3564         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
3565       }
3566 
3567       // If we find a trailing comment, iterate backwards to determine whether
3568       // it seems to relate to a specific parameter. If so, break before that
3569       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
3570       // to the previous line in:
3571       //   SomeFunction(a,
3572       //                b, // comment
3573       //                c);
3574       if (!Current->HasUnescapedNewline) {
3575         for (FormatToken *Parameter = Current->Previous; Parameter;
3576              Parameter = Parameter->Previous) {
3577           if (Parameter->isOneOf(tok::comment, tok::r_brace))
3578             break;
3579           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
3580             if (Parameter->Previous->isNot(TT_CtorInitializerComma) &&
3581                 Parameter->HasUnescapedNewline) {
3582               Parameter->MustBreakBefore = true;
3583             }
3584             break;
3585           }
3586         }
3587       }
3588     } else if (!Current->Finalized && Current->SpacesRequiredBefore == 0 &&
3589                spaceRequiredBefore(Line, *Current)) {
3590       Current->SpacesRequiredBefore = 1;
3591     }
3592 
3593     const auto &Children = Prev->Children;
3594     if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
3595       Current->MustBreakBefore = true;
3596     } else {
3597       Current->MustBreakBefore =
3598           Current->MustBreakBefore || mustBreakBefore(Line, *Current);
3599       if (!Current->MustBreakBefore && InFunctionDecl &&
3600           Current->is(TT_FunctionDeclarationName)) {
3601         Current->MustBreakBefore = mustBreakForReturnType(Line);
3602       }
3603     }
3604 
3605     Current->CanBreakBefore =
3606         Current->MustBreakBefore || canBreakBefore(Line, *Current);
3607     unsigned ChildSize = 0;
3608     if (Prev->Children.size() == 1) {
3609       FormatToken &LastOfChild = *Prev->Children[0]->Last;
3610       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
3611                                                   : LastOfChild.TotalLength + 1;
3612     }
3613     if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
3614         (Prev->Children.size() == 1 &&
3615          Prev->Children[0]->First->MustBreakBefore) ||
3616         Current->IsMultiline) {
3617       Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
3618     } else {
3619       Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
3620                              ChildSize + Current->SpacesRequiredBefore;
3621     }
3622 
3623     if (Current->is(TT_CtorInitializerColon))
3624       InFunctionDecl = false;
3625 
3626     // FIXME: Only calculate this if CanBreakBefore is true once static
3627     // initializers etc. are sorted out.
3628     // FIXME: Move magic numbers to a better place.
3629 
3630     // Reduce penalty for aligning ObjC method arguments using the colon
3631     // alignment as this is the canonical way (still prefer fitting everything
3632     // into one line if possible). Trying to fit a whole expression into one
3633     // line should not force other line breaks (e.g. when ObjC method
3634     // expression is a part of other expression).
3635     Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
3636     if (Style.Language == FormatStyle::LK_ObjC &&
3637         Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
3638       if (Current->ParameterIndex == 1)
3639         Current->SplitPenalty += 5 * Current->BindingStrength;
3640     } else {
3641       Current->SplitPenalty += 20 * Current->BindingStrength;
3642     }
3643 
3644     Current = Current->Next;
3645   }
3646 
3647   calculateUnbreakableTailLengths(Line);
3648   unsigned IndentLevel = Line.Level;
3649   for (Current = First; Current; Current = Current->Next) {
3650     if (Current->Role)
3651       Current->Role->precomputeFormattingInfos(Current);
3652     if (Current->MatchingParen &&
3653         Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
3654         IndentLevel > 0) {
3655       --IndentLevel;
3656     }
3657     Current->IndentLevel = IndentLevel;
3658     if (Current->opensBlockOrBlockTypeList(Style))
3659       ++IndentLevel;
3660   }
3661 
3662   LLVM_DEBUG({ printDebugInfo(Line); });
3663 }
3664 
calculateUnbreakableTailLengths(AnnotatedLine & Line) const3665 void TokenAnnotator::calculateUnbreakableTailLengths(
3666     AnnotatedLine &Line) const {
3667   unsigned UnbreakableTailLength = 0;
3668   FormatToken *Current = Line.Last;
3669   while (Current) {
3670     Current->UnbreakableTailLength = UnbreakableTailLength;
3671     if (Current->CanBreakBefore ||
3672         Current->isOneOf(tok::comment, tok::string_literal)) {
3673       UnbreakableTailLength = 0;
3674     } else {
3675       UnbreakableTailLength +=
3676           Current->ColumnWidth + Current->SpacesRequiredBefore;
3677     }
3678     Current = Current->Previous;
3679   }
3680 }
3681 
calculateArrayInitializerColumnList(AnnotatedLine & Line) const3682 void TokenAnnotator::calculateArrayInitializerColumnList(
3683     AnnotatedLine &Line) const {
3684   if (Line.First == Line.Last)
3685     return;
3686   auto *CurrentToken = Line.First;
3687   CurrentToken->ArrayInitializerLineStart = true;
3688   unsigned Depth = 0;
3689   while (CurrentToken && CurrentToken != Line.Last) {
3690     if (CurrentToken->is(tok::l_brace)) {
3691       CurrentToken->IsArrayInitializer = true;
3692       if (CurrentToken->Next)
3693         CurrentToken->Next->MustBreakBefore = true;
3694       CurrentToken =
3695           calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
3696     } else {
3697       CurrentToken = CurrentToken->Next;
3698     }
3699   }
3700 }
3701 
calculateInitializerColumnList(AnnotatedLine & Line,FormatToken * CurrentToken,unsigned Depth) const3702 FormatToken *TokenAnnotator::calculateInitializerColumnList(
3703     AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
3704   while (CurrentToken && CurrentToken != Line.Last) {
3705     if (CurrentToken->is(tok::l_brace))
3706       ++Depth;
3707     else if (CurrentToken->is(tok::r_brace))
3708       --Depth;
3709     if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
3710       CurrentToken = CurrentToken->Next;
3711       if (!CurrentToken)
3712         break;
3713       CurrentToken->StartsColumn = true;
3714       CurrentToken = CurrentToken->Previous;
3715     }
3716     CurrentToken = CurrentToken->Next;
3717   }
3718   return CurrentToken;
3719 }
3720 
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl) const3721 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
3722                                       const FormatToken &Tok,
3723                                       bool InFunctionDecl) const {
3724   const FormatToken &Left = *Tok.Previous;
3725   const FormatToken &Right = Tok;
3726 
3727   if (Left.is(tok::semi))
3728     return 0;
3729 
3730   // Language specific handling.
3731   if (Style.Language == FormatStyle::LK_Java) {
3732     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
3733       return 1;
3734     if (Right.is(Keywords.kw_implements))
3735       return 2;
3736     if (Left.is(tok::comma) && Left.NestingLevel == 0)
3737       return 3;
3738   } else if (Style.isJavaScript()) {
3739     if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
3740       return 100;
3741     if (Left.is(TT_JsTypeColon))
3742       return 35;
3743     if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
3744         (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
3745       return 100;
3746     }
3747     // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
3748     if (Left.opensScope() && Right.closesScope())
3749       return 200;
3750   } else if (Style.Language == FormatStyle::LK_Proto) {
3751     if (Right.is(tok::l_square))
3752       return 1;
3753     if (Right.is(tok::period))
3754       return 500;
3755   }
3756 
3757   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3758     return 1;
3759   if (Right.is(tok::l_square)) {
3760     if (Left.is(tok::r_square))
3761       return 200;
3762     // Slightly prefer formatting local lambda definitions like functions.
3763     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
3764       return 35;
3765     if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3766                        TT_ArrayInitializerLSquare,
3767                        TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
3768       return 500;
3769     }
3770   }
3771 
3772   if (Left.is(tok::coloncolon))
3773     return Style.PenaltyBreakScopeResolution;
3774   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3775       Right.is(tok::kw_operator)) {
3776     if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
3777       return 3;
3778     if (Left.is(TT_StartOfName))
3779       return 110;
3780     if (InFunctionDecl && Right.NestingLevel == 0)
3781       return Style.PenaltyReturnTypeOnItsOwnLine;
3782     return 200;
3783   }
3784   if (Right.is(TT_PointerOrReference))
3785     return 190;
3786   if (Right.is(TT_TrailingReturnArrow))
3787     return 110;
3788   if (Left.is(tok::equal) && Right.is(tok::l_brace))
3789     return 160;
3790   if (Left.is(TT_CastRParen))
3791     return 100;
3792   if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
3793     return 5000;
3794   if (Left.is(tok::comment))
3795     return 1000;
3796 
3797   if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
3798                    TT_CtorInitializerColon)) {
3799     return 2;
3800   }
3801 
3802   if (Right.isMemberAccess()) {
3803     // Breaking before the "./->" of a chained call/member access is reasonably
3804     // cheap, as formatting those with one call per line is generally
3805     // desirable. In particular, it should be cheaper to break before the call
3806     // than it is to break inside a call's parameters, which could lead to weird
3807     // "hanging" indents. The exception is the very last "./->" to support this
3808     // frequent pattern:
3809     //
3810     //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
3811     //       dddddddd);
3812     //
3813     // which might otherwise be blown up onto many lines. Here, clang-format
3814     // won't produce "hanging" indents anyway as there is no other trailing
3815     // call.
3816     //
3817     // Also apply higher penalty is not a call as that might lead to a wrapping
3818     // like:
3819     //
3820     //   aaaaaaa
3821     //       .aaaaaaaaa.bbbbbbbb(cccccccc);
3822     return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
3823                ? 150
3824                : 35;
3825   }
3826 
3827   if (Right.is(TT_TrailingAnnotation) &&
3828       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
3829     // Moving trailing annotations to the next line is fine for ObjC method
3830     // declarations.
3831     if (Line.startsWith(TT_ObjCMethodSpecifier))
3832       return 10;
3833     // Generally, breaking before a trailing annotation is bad unless it is
3834     // function-like. It seems to be especially preferable to keep standard
3835     // annotations (i.e. "const", "final" and "override") on the same line.
3836     // Use a slightly higher penalty after ")" so that annotations like
3837     // "const override" are kept together.
3838     bool is_short_annotation = Right.TokenText.size() < 10;
3839     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
3840   }
3841 
3842   // In for-loops, prefer breaking at ',' and ';'.
3843   if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
3844     return 4;
3845 
3846   // In Objective-C method expressions, prefer breaking before "param:" over
3847   // breaking after it.
3848   if (Right.is(TT_SelectorName))
3849     return 0;
3850   if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
3851     return Line.MightBeFunctionDecl ? 50 : 500;
3852 
3853   // In Objective-C type declarations, avoid breaking after the category's
3854   // open paren (we'll prefer breaking after the protocol list's opening
3855   // angle bracket, if present).
3856   if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
3857       Left.Previous->isOneOf(tok::identifier, tok::greater)) {
3858     return 500;
3859   }
3860 
3861   if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
3862     return Style.PenaltyBreakOpenParenthesis;
3863   if (Left.is(tok::l_paren) && InFunctionDecl &&
3864       Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
3865     return 100;
3866   }
3867   if (Left.is(tok::l_paren) && Left.Previous &&
3868       (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
3869        Left.Previous->isIf())) {
3870     return 1000;
3871   }
3872   if (Left.is(tok::equal) && InFunctionDecl)
3873     return 110;
3874   if (Right.is(tok::r_brace))
3875     return 1;
3876   if (Left.is(TT_TemplateOpener))
3877     return 100;
3878   if (Left.opensScope()) {
3879     // If we aren't aligning after opening parens/braces we can always break
3880     // here unless the style does not want us to place all arguments on the
3881     // next line.
3882     if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
3883         (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
3884       return 0;
3885     }
3886     if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
3887       return 19;
3888     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
3889                                    : 19;
3890   }
3891   if (Left.is(TT_JavaAnnotation))
3892     return 50;
3893 
3894   if (Left.is(TT_UnaryOperator))
3895     return 60;
3896   if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
3897       Left.Previous->isLabelString() &&
3898       (Left.NextOperator || Left.OperatorIndex != 0)) {
3899     return 50;
3900   }
3901   if (Right.is(tok::plus) && Left.isLabelString() &&
3902       (Right.NextOperator || Right.OperatorIndex != 0)) {
3903     return 25;
3904   }
3905   if (Left.is(tok::comma))
3906     return 1;
3907   if (Right.is(tok::lessless) && Left.isLabelString() &&
3908       (Right.NextOperator || Right.OperatorIndex != 1)) {
3909     return 25;
3910   }
3911   if (Right.is(tok::lessless)) {
3912     // Breaking at a << is really cheap.
3913     if (Left.isNot(tok::r_paren) || Right.OperatorIndex > 0) {
3914       // Slightly prefer to break before the first one in log-like statements.
3915       return 2;
3916     }
3917     return 1;
3918   }
3919   if (Left.ClosesTemplateDeclaration)
3920     return Style.PenaltyBreakTemplateDeclaration;
3921   if (Left.ClosesRequiresClause)
3922     return 0;
3923   if (Left.is(TT_ConditionalExpr))
3924     return prec::Conditional;
3925   prec::Level Level = Left.getPrecedence();
3926   if (Level == prec::Unknown)
3927     Level = Right.getPrecedence();
3928   if (Level == prec::Assignment)
3929     return Style.PenaltyBreakAssignment;
3930   if (Level != prec::Unknown)
3931     return Level;
3932 
3933   return 3;
3934 }
3935 
spaceRequiredBeforeParens(const FormatToken & Right) const3936 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
3937   if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
3938     return true;
3939   if (Right.is(TT_OverloadedOperatorLParen) &&
3940       Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
3941     return true;
3942   }
3943   if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
3944       Right.ParameterCount > 0) {
3945     return true;
3946   }
3947   return false;
3948 }
3949 
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right) const3950 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
3951                                           const FormatToken &Left,
3952                                           const FormatToken &Right) const {
3953   if (Left.is(tok::kw_return) &&
3954       !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
3955     return true;
3956   }
3957   if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
3958       Right.MatchingParen->is(TT_CastRParen)) {
3959     return true;
3960   }
3961   if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
3962     return true;
3963   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
3964       Left.Tok.getObjCKeywordID() == tok::objc_property) {
3965     return true;
3966   }
3967   if (Right.is(tok::hashhash))
3968     return Left.is(tok::hash);
3969   if (Left.isOneOf(tok::hashhash, tok::hash))
3970     return Right.is(tok::hash);
3971   if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
3972       (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
3973        Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
3974     return Style.SpacesInParensOptions.InEmptyParentheses;
3975   }
3976   if (Style.SpacesInParensOptions.InConditionalStatements) {
3977     const FormatToken *LeftParen = nullptr;
3978     if (Left.is(tok::l_paren))
3979       LeftParen = &Left;
3980     else if (Right.is(tok::r_paren) && Right.MatchingParen)
3981       LeftParen = Right.MatchingParen;
3982     if (LeftParen) {
3983       if (LeftParen->is(TT_ConditionLParen))
3984         return true;
3985       if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
3986         return true;
3987     }
3988   }
3989 
3990   // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
3991   if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
3992                                              // function return type 'auto'
3993                                              TT_FunctionTypeLParen)) {
3994     return true;
3995   }
3996 
3997   // auto{x} auto(x)
3998   if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
3999     return false;
4000 
4001   // operator co_await(x)
4002   if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous &&
4003       Left.Previous->is(tok::kw_operator)) {
4004     return false;
4005   }
4006   // co_await (x), co_yield (x), co_return (x)
4007   if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
4008       !Right.isOneOf(tok::semi, tok::r_paren)) {
4009     return true;
4010   }
4011 
4012   if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
4013     return (Right.is(TT_CastRParen) ||
4014             (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
4015                ? Style.SpacesInParensOptions.InCStyleCasts
4016                : Style.SpacesInParensOptions.Other;
4017   }
4018   if (Right.isOneOf(tok::semi, tok::comma))
4019     return false;
4020   if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
4021     bool IsLightweightGeneric = Right.MatchingParen &&
4022                                 Right.MatchingParen->Next &&
4023                                 Right.MatchingParen->Next->is(tok::colon);
4024     return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
4025   }
4026   if (Right.is(tok::less) && Left.is(tok::kw_template))
4027     return Style.SpaceAfterTemplateKeyword;
4028   if (Left.isOneOf(tok::exclaim, tok::tilde))
4029     return false;
4030   if (Left.is(tok::at) &&
4031       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
4032                     tok::numeric_constant, tok::l_paren, tok::l_brace,
4033                     tok::kw_true, tok::kw_false)) {
4034     return false;
4035   }
4036   if (Left.is(tok::colon))
4037     return Left.isNot(TT_ObjCMethodExpr);
4038   if (Left.is(tok::coloncolon))
4039     return false;
4040   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
4041     if (Style.Language == FormatStyle::LK_TextProto ||
4042         (Style.Language == FormatStyle::LK_Proto &&
4043          (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
4044       // Format empty list as `<>`.
4045       if (Left.is(tok::less) && Right.is(tok::greater))
4046         return false;
4047       return !Style.Cpp11BracedListStyle;
4048     }
4049     // Don't attempt to format operator<(), as it is handled later.
4050     if (Right.isNot(TT_OverloadedOperatorLParen))
4051       return false;
4052   }
4053   if (Right.is(tok::ellipsis)) {
4054     return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
4055                                     Left.Previous->is(tok::kw_case));
4056   }
4057   if (Left.is(tok::l_square) && Right.is(tok::amp))
4058     return Style.SpacesInSquareBrackets;
4059   if (Right.is(TT_PointerOrReference)) {
4060     if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
4061       if (!Left.MatchingParen)
4062         return true;
4063       FormatToken *TokenBeforeMatchingParen =
4064           Left.MatchingParen->getPreviousNonComment();
4065       if (!TokenBeforeMatchingParen || Left.isNot(TT_TypeDeclarationParen))
4066         return true;
4067     }
4068     // Add a space if the previous token is a pointer qualifier or the closing
4069     // parenthesis of __attribute__(()) expression and the style requires spaces
4070     // after pointer qualifiers.
4071     if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
4072          Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4073         (Left.is(TT_AttributeRParen) ||
4074          Left.canBePointerOrReferenceQualifier())) {
4075       return true;
4076     }
4077     if (Left.Tok.isLiteral())
4078       return true;
4079     // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
4080     if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next &&
4081         Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
4082       return getTokenPointerOrReferenceAlignment(Right) !=
4083              FormatStyle::PAS_Left;
4084     }
4085     return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
4086            (getTokenPointerOrReferenceAlignment(Right) !=
4087                 FormatStyle::PAS_Left ||
4088             (Line.IsMultiVariableDeclStmt &&
4089              (Left.NestingLevel == 0 ||
4090               (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
4091   }
4092   if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
4093       (Left.isNot(TT_PointerOrReference) ||
4094        (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
4095         !Line.IsMultiVariableDeclStmt))) {
4096     return true;
4097   }
4098   if (Left.is(TT_PointerOrReference)) {
4099     // Add a space if the next token is a pointer qualifier and the style
4100     // requires spaces before pointer qualifiers.
4101     if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
4102          Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4103         Right.canBePointerOrReferenceQualifier()) {
4104       return true;
4105     }
4106     // & 1
4107     if (Right.Tok.isLiteral())
4108       return true;
4109     // & /* comment
4110     if (Right.is(TT_BlockComment))
4111       return true;
4112     // foo() -> const Bar * override/final
4113     // S::foo() & noexcept/requires
4114     if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final, tok::kw_noexcept,
4115                       TT_RequiresClause) &&
4116         Right.isNot(TT_StartOfName)) {
4117       return true;
4118     }
4119     // & {
4120     if (Right.is(tok::l_brace) && Right.is(BK_Block))
4121       return true;
4122     // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
4123     if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next &&
4124         Right.Next->is(TT_RangeBasedForLoopColon)) {
4125       return getTokenPointerOrReferenceAlignment(Left) !=
4126              FormatStyle::PAS_Right;
4127     }
4128     if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
4129                       tok::l_paren)) {
4130       return false;
4131     }
4132     if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
4133       return false;
4134     // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
4135     // because it does not take into account nested scopes like lambdas.
4136     // In multi-variable declaration statements, attach */& to the variable
4137     // independently of the style. However, avoid doing it if we are in a nested
4138     // scope, e.g. lambda. We still need to special-case statements with
4139     // initializers.
4140     if (Line.IsMultiVariableDeclStmt &&
4141         (Left.NestingLevel == Line.First->NestingLevel ||
4142          ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
4143           startsWithInitStatement(Line)))) {
4144       return false;
4145     }
4146     return Left.Previous && !Left.Previous->isOneOf(
4147                                 tok::l_paren, tok::coloncolon, tok::l_square);
4148   }
4149   // Ensure right pointer alignment with ellipsis e.g. int *...P
4150   if (Left.is(tok::ellipsis) && Left.Previous &&
4151       Left.Previous->isPointerOrReference()) {
4152     return Style.PointerAlignment != FormatStyle::PAS_Right;
4153   }
4154 
4155   if (Right.is(tok::star) && Left.is(tok::l_paren))
4156     return false;
4157   if (Left.is(tok::star) && Right.isPointerOrReference())
4158     return false;
4159   if (Right.isPointerOrReference()) {
4160     const FormatToken *Previous = &Left;
4161     while (Previous && Previous->isNot(tok::kw_operator)) {
4162       if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
4163         Previous = Previous->getPreviousNonComment();
4164         continue;
4165       }
4166       if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
4167         Previous = Previous->MatchingParen->getPreviousNonComment();
4168         continue;
4169       }
4170       if (Previous->is(tok::coloncolon)) {
4171         Previous = Previous->getPreviousNonComment();
4172         continue;
4173       }
4174       break;
4175     }
4176     // Space between the type and the * in:
4177     //   operator void*()
4178     //   operator char*()
4179     //   operator void const*()
4180     //   operator void volatile*()
4181     //   operator /*comment*/ const char*()
4182     //   operator volatile /*comment*/ char*()
4183     //   operator Foo*()
4184     //   operator C<T>*()
4185     //   operator std::Foo*()
4186     //   operator C<T>::D<U>*()
4187     // dependent on PointerAlignment style.
4188     if (Previous) {
4189       if (Previous->endsSequence(tok::kw_operator))
4190         return Style.PointerAlignment != FormatStyle::PAS_Left;
4191       if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
4192         return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
4193                (Style.SpaceAroundPointerQualifiers ==
4194                 FormatStyle::SAPQ_After) ||
4195                (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
4196       }
4197     }
4198   }
4199   if (Style.isCSharp() && Left.is(Keywords.kw_is) && Right.is(tok::l_square))
4200     return true;
4201   const auto SpaceRequiredForArrayInitializerLSquare =
4202       [](const FormatToken &LSquareTok, const FormatStyle &Style) {
4203         return Style.SpacesInContainerLiterals ||
4204                (Style.isProto() && !Style.Cpp11BracedListStyle &&
4205                 LSquareTok.endsSequence(tok::l_square, tok::colon,
4206                                         TT_SelectorName));
4207       };
4208   if (Left.is(tok::l_square)) {
4209     return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
4210             SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
4211            (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
4212                          TT_LambdaLSquare) &&
4213             Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
4214   }
4215   if (Right.is(tok::r_square)) {
4216     return Right.MatchingParen &&
4217            ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
4218              SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
4219                                                      Style)) ||
4220             (Style.SpacesInSquareBrackets &&
4221              Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
4222                                           TT_StructuredBindingLSquare,
4223                                           TT_LambdaLSquare)));
4224   }
4225   if (Right.is(tok::l_square) &&
4226       !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4227                      TT_DesignatedInitializerLSquare,
4228                      TT_StructuredBindingLSquare, TT_AttributeSquare) &&
4229       !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
4230       !(Left.isNot(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
4231         Right.is(TT_ArraySubscriptLSquare))) {
4232     return false;
4233   }
4234   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
4235     return !Left.Children.empty(); // No spaces in "{}".
4236   if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
4237       (Right.is(tok::r_brace) && Right.MatchingParen &&
4238        Right.MatchingParen->isNot(BK_Block))) {
4239     return !Style.Cpp11BracedListStyle || Style.SpacesInParensOptions.Other;
4240   }
4241   if (Left.is(TT_BlockComment)) {
4242     // No whitespace in x(/*foo=*/1), except for JavaScript.
4243     return Style.isJavaScript() || !Left.TokenText.ends_with("=*/");
4244   }
4245 
4246   // Space between template and attribute.
4247   // e.g. template <typename T> [[nodiscard]] ...
4248   if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
4249     return true;
4250   // Space before parentheses common for all languages
4251   if (Right.is(tok::l_paren)) {
4252     if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
4253       return spaceRequiredBeforeParens(Right);
4254     if (Left.isOneOf(TT_RequiresClause,
4255                      TT_RequiresClauseInARequiresExpression)) {
4256       return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
4257              spaceRequiredBeforeParens(Right);
4258     }
4259     if (Left.is(TT_RequiresExpression)) {
4260       return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
4261              spaceRequiredBeforeParens(Right);
4262     }
4263     if (Left.is(TT_AttributeRParen) ||
4264         (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
4265       return true;
4266     }
4267     if (Left.is(TT_ForEachMacro)) {
4268       return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
4269              spaceRequiredBeforeParens(Right);
4270     }
4271     if (Left.is(TT_IfMacro)) {
4272       return Style.SpaceBeforeParensOptions.AfterIfMacros ||
4273              spaceRequiredBeforeParens(Right);
4274     }
4275     if (Style.SpaceBeforeParens == FormatStyle::SBPO_Custom &&
4276         Left.isOneOf(tok::kw_new, tok::kw_delete) &&
4277         Right.isNot(TT_OverloadedOperatorLParen) &&
4278         !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
4279       return Style.SpaceBeforeParensOptions.AfterPlacementOperator;
4280     }
4281     if (Line.Type == LT_ObjCDecl)
4282       return true;
4283     if (Left.is(tok::semi))
4284       return true;
4285     if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
4286                      tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
4287         Left.isIf(Line.Type != LT_PreprocessorDirective) ||
4288         Right.is(TT_ConditionLParen)) {
4289       return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4290              spaceRequiredBeforeParens(Right);
4291     }
4292 
4293     // TODO add Operator overloading specific Options to
4294     // SpaceBeforeParensOptions
4295     if (Right.is(TT_OverloadedOperatorLParen))
4296       return spaceRequiredBeforeParens(Right);
4297     // Function declaration or definition
4298     if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
4299       if (Line.mightBeFunctionDefinition()) {
4300         return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4301                spaceRequiredBeforeParens(Right);
4302       } else {
4303         return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
4304                spaceRequiredBeforeParens(Right);
4305       }
4306     }
4307     // Lambda
4308     if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
4309         Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
4310       return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4311              spaceRequiredBeforeParens(Right);
4312     }
4313     if (!Left.Previous || Left.Previous->isNot(tok::period)) {
4314       if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
4315         return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4316                spaceRequiredBeforeParens(Right);
4317       }
4318       if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
4319         return ((!Line.MightBeFunctionDecl || !Left.Previous) &&
4320                 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4321                spaceRequiredBeforeParens(Right);
4322       }
4323 
4324       if (Left.is(tok::r_square) && Left.MatchingParen &&
4325           Left.MatchingParen->Previous &&
4326           Left.MatchingParen->Previous->is(tok::kw_delete)) {
4327         return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4328                spaceRequiredBeforeParens(Right);
4329       }
4330     }
4331     // Handle builtins like identifiers.
4332     if (Line.Type != LT_PreprocessorDirective &&
4333         (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
4334       return spaceRequiredBeforeParens(Right);
4335     }
4336     return false;
4337   }
4338   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
4339     return false;
4340   if (Right.is(TT_UnaryOperator)) {
4341     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
4342            (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
4343   }
4344   // No space between the variable name and the initializer list.
4345   // A a1{1};
4346   // Verilog doesn't have such syntax, but it has word operators that are C++
4347   // identifiers like `a inside {b, c}`. So the rule is not applicable.
4348   if (!Style.isVerilog() &&
4349       (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
4350                     tok::r_paren) ||
4351        Left.isSimpleTypeSpecifier()) &&
4352       Right.is(tok::l_brace) && Right.getNextNonComment() &&
4353       Right.isNot(BK_Block)) {
4354     return false;
4355   }
4356   if (Left.is(tok::period) || Right.is(tok::period))
4357     return false;
4358   // u#str, U#str, L#str, u8#str
4359   // uR#str, UR#str, LR#str, u8R#str
4360   if (Right.is(tok::hash) && Left.is(tok::identifier) &&
4361       (Left.TokenText == "L" || Left.TokenText == "u" ||
4362        Left.TokenText == "U" || Left.TokenText == "u8" ||
4363        Left.TokenText == "LR" || Left.TokenText == "uR" ||
4364        Left.TokenText == "UR" || Left.TokenText == "u8R")) {
4365     return false;
4366   }
4367   if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
4368       Left.MatchingParen->Previous &&
4369       (Left.MatchingParen->Previous->is(tok::period) ||
4370        Left.MatchingParen->Previous->is(tok::coloncolon))) {
4371     // Java call to generic function with explicit type:
4372     // A.<B<C<...>>>DoSomething();
4373     // A::<B<C<...>>>DoSomething();  // With a Java 8 method reference.
4374     return false;
4375   }
4376   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
4377     return false;
4378   if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
4379     // Objective-C dictionary literal -> no space after opening brace.
4380     return false;
4381   }
4382   if (Right.is(tok::r_brace) && Right.MatchingParen &&
4383       Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
4384     // Objective-C dictionary literal -> no space before closing brace.
4385     return false;
4386   }
4387   if (Right.getType() == TT_TrailingAnnotation &&
4388       Right.isOneOf(tok::amp, tok::ampamp) &&
4389       Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
4390       (!Right.Next || Right.Next->is(tok::semi))) {
4391     // Match const and volatile ref-qualifiers without any additional
4392     // qualifiers such as
4393     // void Fn() const &;
4394     return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4395   }
4396 
4397   return true;
4398 }
4399 
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Right) const4400 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
4401                                          const FormatToken &Right) const {
4402   const FormatToken &Left = *Right.Previous;
4403 
4404   // If the token is finalized don't touch it (as it could be in a
4405   // clang-format-off section).
4406   if (Left.Finalized)
4407     return Right.hasWhitespaceBefore();
4408 
4409   // Never ever merge two words.
4410   if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
4411     return true;
4412 
4413   // Leave a space between * and /* to avoid C4138 `comment end` found outside
4414   // of comment.
4415   if (Left.is(tok::star) && Right.is(tok::comment))
4416     return true;
4417 
4418   if (Style.isCpp()) {
4419     if (Left.is(TT_OverloadedOperator) &&
4420         Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) {
4421       return true;
4422     }
4423     // Space between UDL and dot: auto b = 4s .count();
4424     if (Right.is(tok::period) && Left.is(tok::numeric_constant))
4425       return true;
4426     // Space between import <iostream>.
4427     // or import .....;
4428     if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
4429       return true;
4430     // Space between `module :` and `import :`.
4431     if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
4432         Right.is(TT_ModulePartitionColon)) {
4433       return true;
4434     }
4435     // No space between import foo:bar but keep a space between import :bar;
4436     if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
4437       return false;
4438     // No space between :bar;
4439     if (Left.is(TT_ModulePartitionColon) &&
4440         Right.isOneOf(tok::identifier, tok::kw_private)) {
4441       return false;
4442     }
4443     if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
4444         Line.First->is(Keywords.kw_import)) {
4445       return false;
4446     }
4447     // Space in __attribute__((attr)) ::type.
4448     if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
4449         Right.is(tok::coloncolon)) {
4450       return true;
4451     }
4452 
4453     if (Left.is(tok::kw_operator))
4454       return Right.is(tok::coloncolon);
4455     if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
4456         !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
4457       return true;
4458     }
4459     if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
4460         Right.is(TT_TemplateOpener)) {
4461       return true;
4462     }
4463   } else if (Style.isProto()) {
4464     if (Right.is(tok::period) &&
4465         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
4466                      Keywords.kw_repeated, Keywords.kw_extend)) {
4467       return true;
4468     }
4469     if (Right.is(tok::l_paren) &&
4470         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
4471       return true;
4472     }
4473     if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
4474       return true;
4475     // Slashes occur in text protocol extension syntax: [type/type] { ... }.
4476     if (Left.is(tok::slash) || Right.is(tok::slash))
4477       return false;
4478     if (Left.MatchingParen &&
4479         Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
4480         Right.isOneOf(tok::l_brace, tok::less)) {
4481       return !Style.Cpp11BracedListStyle;
4482     }
4483     // A percent is probably part of a formatting specification, such as %lld.
4484     if (Left.is(tok::percent))
4485       return false;
4486     // Preserve the existence of a space before a percent for cases like 0x%04x
4487     // and "%d %d"
4488     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
4489       return Right.hasWhitespaceBefore();
4490   } else if (Style.isJson()) {
4491     if (Right.is(tok::colon) && Left.is(tok::string_literal))
4492       return Style.SpaceBeforeJsonColon;
4493   } else if (Style.isCSharp()) {
4494     // Require spaces around '{' and  before '}' unless they appear in
4495     // interpolated strings. Interpolated strings are merged into a single token
4496     // so cannot have spaces inserted by this function.
4497 
4498     // No space between 'this' and '['
4499     if (Left.is(tok::kw_this) && Right.is(tok::l_square))
4500       return false;
4501 
4502     // No space between 'new' and '('
4503     if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
4504       return false;
4505 
4506     // Space before { (including space within '{ {').
4507     if (Right.is(tok::l_brace))
4508       return true;
4509 
4510     // Spaces inside braces.
4511     if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
4512       return true;
4513 
4514     if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
4515       return true;
4516 
4517     // Spaces around '=>'.
4518     if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
4519       return true;
4520 
4521     // No spaces around attribute target colons
4522     if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
4523       return false;
4524 
4525     // space between type and variable e.g. Dictionary<string,string> foo;
4526     if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
4527       return true;
4528 
4529     // spaces inside square brackets.
4530     if (Left.is(tok::l_square) || Right.is(tok::r_square))
4531       return Style.SpacesInSquareBrackets;
4532 
4533     // No space before ? in nullable types.
4534     if (Right.is(TT_CSharpNullable))
4535       return false;
4536 
4537     // No space before null forgiving '!'.
4538     if (Right.is(TT_NonNullAssertion))
4539       return false;
4540 
4541     // No space between consecutive commas '[,,]'.
4542     if (Left.is(tok::comma) && Right.is(tok::comma))
4543       return false;
4544 
4545     // space after var in `var (key, value)`
4546     if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
4547       return true;
4548 
4549     // space between keywords and paren e.g. "using ("
4550     if (Right.is(tok::l_paren)) {
4551       if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
4552                        Keywords.kw_lock)) {
4553         return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4554                spaceRequiredBeforeParens(Right);
4555       }
4556     }
4557 
4558     // space between method modifier and opening parenthesis of a tuple return
4559     // type
4560     if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
4561                      tok::kw_virtual, tok::kw_extern, tok::kw_static,
4562                      Keywords.kw_internal, Keywords.kw_abstract,
4563                      Keywords.kw_sealed, Keywords.kw_override,
4564                      Keywords.kw_async, Keywords.kw_unsafe) &&
4565         Right.is(tok::l_paren)) {
4566       return true;
4567     }
4568   } else if (Style.isJavaScript()) {
4569     if (Left.is(TT_FatArrow))
4570       return true;
4571     // for await ( ...
4572     if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
4573         Left.Previous->is(tok::kw_for)) {
4574       return true;
4575     }
4576     if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
4577         Right.MatchingParen) {
4578       const FormatToken *Next = Right.MatchingParen->getNextNonComment();
4579       // An async arrow function, for example: `x = async () => foo();`,
4580       // as opposed to calling a function called async: `x = async();`
4581       if (Next && Next->is(TT_FatArrow))
4582         return true;
4583     }
4584     if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
4585         (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
4586       return false;
4587     }
4588     // In tagged template literals ("html`bar baz`"), there is no space between
4589     // the tag identifier and the template string.
4590     if (Keywords.IsJavaScriptIdentifier(Left,
4591                                         /* AcceptIdentifierName= */ false) &&
4592         Right.is(TT_TemplateString)) {
4593       return false;
4594     }
4595     if (Right.is(tok::star) &&
4596         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
4597       return false;
4598     }
4599     if (Right.isOneOf(tok::l_brace, tok::l_square) &&
4600         Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
4601                      Keywords.kw_extends, Keywords.kw_implements)) {
4602       return true;
4603     }
4604     if (Right.is(tok::l_paren)) {
4605       // JS methods can use some keywords as names (e.g. `delete()`).
4606       if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
4607         return false;
4608       // Valid JS method names can include keywords, e.g. `foo.delete()` or
4609       // `bar.instanceof()`. Recognize call positions by preceding period.
4610       if (Left.Previous && Left.Previous->is(tok::period) &&
4611           Left.Tok.getIdentifierInfo()) {
4612         return false;
4613       }
4614       // Additional unary JavaScript operators that need a space after.
4615       if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
4616                        tok::kw_void)) {
4617         return true;
4618       }
4619     }
4620     // `foo as const;` casts into a const type.
4621     if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
4622       return false;
4623     if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
4624                       tok::kw_const) ||
4625          // "of" is only a keyword if it appears after another identifier
4626          // (e.g. as "const x of y" in a for loop), or after a destructuring
4627          // operation (const [x, y] of z, const {a, b} of c).
4628          (Left.is(Keywords.kw_of) && Left.Previous &&
4629           (Left.Previous->is(tok::identifier) ||
4630            Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
4631         (!Left.Previous || Left.Previous->isNot(tok::period))) {
4632       return true;
4633     }
4634     if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
4635         Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
4636       return false;
4637     }
4638     if (Left.is(Keywords.kw_as) &&
4639         Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
4640       return true;
4641     }
4642     if (Left.is(tok::kw_default) && Left.Previous &&
4643         Left.Previous->is(tok::kw_export)) {
4644       return true;
4645     }
4646     if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
4647       return true;
4648     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
4649       return false;
4650     if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
4651       return false;
4652     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
4653         Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
4654       return false;
4655     }
4656     if (Left.is(tok::ellipsis))
4657       return false;
4658     if (Left.is(TT_TemplateCloser) &&
4659         !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
4660                        Keywords.kw_implements, Keywords.kw_extends)) {
4661       // Type assertions ('<type>expr') are not followed by whitespace. Other
4662       // locations that should have whitespace following are identified by the
4663       // above set of follower tokens.
4664       return false;
4665     }
4666     if (Right.is(TT_NonNullAssertion))
4667       return false;
4668     if (Left.is(TT_NonNullAssertion) &&
4669         Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
4670       return true; // "x! as string", "x! in y"
4671     }
4672   } else if (Style.Language == FormatStyle::LK_Java) {
4673     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
4674       return true;
4675     // spaces inside square brackets.
4676     if (Left.is(tok::l_square) || Right.is(tok::r_square))
4677       return Style.SpacesInSquareBrackets;
4678 
4679     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
4680       return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4681              spaceRequiredBeforeParens(Right);
4682     }
4683     if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
4684                       tok::kw_protected) ||
4685          Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
4686                       Keywords.kw_native)) &&
4687         Right.is(TT_TemplateOpener)) {
4688       return true;
4689     }
4690   } else if (Style.isVerilog()) {
4691     // An escaped identifier ends with whitespace.
4692     if (Style.isVerilog() && Left.is(tok::identifier) &&
4693         Left.TokenText[0] == '\\') {
4694       return true;
4695     }
4696     // Add space between things in a primitive's state table unless in a
4697     // transition like `(0?)`.
4698     if ((Left.is(TT_VerilogTableItem) &&
4699          !Right.isOneOf(tok::r_paren, tok::semi)) ||
4700         (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
4701       const FormatToken *Next = Right.getNextNonComment();
4702       return !(Next && Next->is(tok::r_paren));
4703     }
4704     // Don't add space within a delay like `#0`.
4705     if (Left.isNot(TT_BinaryOperator) &&
4706         Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
4707       return false;
4708     }
4709     // Add space after a delay.
4710     if (Right.isNot(tok::semi) &&
4711         (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
4712          Left.endsSequence(tok::numeric_constant,
4713                            Keywords.kw_verilogHashHash) ||
4714          (Left.is(tok::r_paren) && Left.MatchingParen &&
4715           Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
4716       return true;
4717     }
4718     // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
4719     // literal like `'{}`.
4720     if (Left.is(Keywords.kw_apostrophe) ||
4721         (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
4722       return false;
4723     }
4724     // Add spaces around the implication operator `->`.
4725     if (Left.is(tok::arrow) || Right.is(tok::arrow))
4726       return true;
4727     // Don't add spaces between two at signs. Like in a coverage event.
4728     // Don't add spaces between at and a sensitivity list like
4729     // `@(posedge clk)`.
4730     if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at))
4731       return false;
4732     // Add space between the type name and dimension like `logic [1:0]`.
4733     if (Right.is(tok::l_square) &&
4734         Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
4735       return true;
4736     }
4737     // In a tagged union expression, there should be a space after the tag.
4738     if (Right.isOneOf(tok::period, Keywords.kw_apostrophe) &&
4739         Keywords.isVerilogIdentifier(Left) && Left.getPreviousNonComment() &&
4740         Left.getPreviousNonComment()->is(Keywords.kw_tagged)) {
4741       return true;
4742     }
4743     // Don't add spaces between a casting type and the quote or repetition count
4744     // and the brace. The case of tagged union expressions is handled by the
4745     // previous rule.
4746     if ((Right.is(Keywords.kw_apostrophe) ||
4747          (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
4748         !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
4749           Keywords.isVerilogWordOperator(Left)) &&
4750         (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
4751                       tok::numeric_constant) ||
4752          Keywords.isWordLike(Left))) {
4753       return false;
4754     }
4755     // Don't add spaces in imports like `import foo::*;`.
4756     if ((Right.is(tok::star) && Left.is(tok::coloncolon)) ||
4757         (Left.is(tok::star) && Right.is(tok::semi))) {
4758       return false;
4759     }
4760     // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
4761     if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
4762       return true;
4763     // Add space before drive strength like in `wire (strong1, pull0)`.
4764     if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength))
4765       return true;
4766     // Don't add space in a streaming concatenation like `{>>{j}}`.
4767     if ((Left.is(tok::l_brace) &&
4768          Right.isOneOf(tok::lessless, tok::greatergreater)) ||
4769         (Left.endsSequence(tok::lessless, tok::l_brace) ||
4770          Left.endsSequence(tok::greatergreater, tok::l_brace))) {
4771       return false;
4772     }
4773   }
4774   if (Left.is(TT_ImplicitStringLiteral))
4775     return Right.hasWhitespaceBefore();
4776   if (Line.Type == LT_ObjCMethodDecl) {
4777     if (Left.is(TT_ObjCMethodSpecifier))
4778       return true;
4779     if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) &&
4780         canBeObjCSelectorComponent(Right)) {
4781       // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
4782       // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
4783       // method declaration.
4784       return false;
4785     }
4786   }
4787   if (Line.Type == LT_ObjCProperty &&
4788       (Right.is(tok::equal) || Left.is(tok::equal))) {
4789     return false;
4790   }
4791 
4792   if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow))
4793     return true;
4794 
4795   if (Left.is(tok::comma) && Right.isNot(TT_OverloadedOperatorLParen) &&
4796       // In an unexpanded macro call we only find the parentheses and commas
4797       // in a line; the commas and closing parenthesis do not require a space.
4798       (Left.Children.empty() || !Left.MacroParent)) {
4799     return true;
4800   }
4801   if (Right.is(tok::comma))
4802     return false;
4803   if (Right.is(TT_ObjCBlockLParen))
4804     return true;
4805   if (Right.is(TT_CtorInitializerColon))
4806     return Style.SpaceBeforeCtorInitializerColon;
4807   if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
4808     return false;
4809   if (Right.is(TT_RangeBasedForLoopColon) &&
4810       !Style.SpaceBeforeRangeBasedForLoopColon) {
4811     return false;
4812   }
4813   if (Left.is(TT_BitFieldColon)) {
4814     return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4815            Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
4816   }
4817   if (Right.is(tok::colon)) {
4818     if (Right.is(TT_CaseLabelColon))
4819       return Style.SpaceBeforeCaseColon;
4820     if (Right.is(TT_GotoLabelColon))
4821       return false;
4822     // `private:` and `public:`.
4823     if (!Right.getNextNonComment())
4824       return false;
4825     if (Right.is(TT_ObjCMethodExpr))
4826       return false;
4827     if (Left.is(tok::question))
4828       return false;
4829     if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
4830       return false;
4831     if (Right.is(TT_DictLiteral))
4832       return Style.SpacesInContainerLiterals;
4833     if (Right.is(TT_AttributeColon))
4834       return false;
4835     if (Right.is(TT_CSharpNamedArgumentColon))
4836       return false;
4837     if (Right.is(TT_GenericSelectionColon))
4838       return false;
4839     if (Right.is(TT_BitFieldColon)) {
4840       return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4841              Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
4842     }
4843     return true;
4844   }
4845   // Do not merge "- -" into "--".
4846   if ((Left.isOneOf(tok::minus, tok::minusminus) &&
4847        Right.isOneOf(tok::minus, tok::minusminus)) ||
4848       (Left.isOneOf(tok::plus, tok::plusplus) &&
4849        Right.isOneOf(tok::plus, tok::plusplus))) {
4850     return true;
4851   }
4852   if (Left.is(TT_UnaryOperator)) {
4853     if (Right.isNot(tok::l_paren)) {
4854       // The alternative operators for ~ and ! are "compl" and "not".
4855       // If they are used instead, we do not want to combine them with
4856       // the token to the right, unless that is a left paren.
4857       if (Left.is(tok::exclaim) && Left.TokenText == "not")
4858         return true;
4859       if (Left.is(tok::tilde) && Left.TokenText == "compl")
4860         return true;
4861       // Lambda captures allow for a lone &, so "&]" needs to be properly
4862       // handled.
4863       if (Left.is(tok::amp) && Right.is(tok::r_square))
4864         return Style.SpacesInSquareBrackets;
4865     }
4866     return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
4867            Right.is(TT_BinaryOperator);
4868   }
4869 
4870   // If the next token is a binary operator or a selector name, we have
4871   // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
4872   if (Left.is(TT_CastRParen)) {
4873     return Style.SpaceAfterCStyleCast ||
4874            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
4875   }
4876 
4877   auto ShouldAddSpacesInAngles = [this, &Right]() {
4878     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
4879       return true;
4880     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
4881       return Right.hasWhitespaceBefore();
4882     return false;
4883   };
4884 
4885   if (Left.is(tok::greater) && Right.is(tok::greater)) {
4886     if (Style.Language == FormatStyle::LK_TextProto ||
4887         (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
4888       return !Style.Cpp11BracedListStyle;
4889     }
4890     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
4891            ((Style.Standard < FormatStyle::LS_Cpp11) ||
4892             ShouldAddSpacesInAngles());
4893   }
4894   if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
4895       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
4896       (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
4897     return false;
4898   }
4899   if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
4900       Right.getPrecedence() == prec::Assignment) {
4901     return false;
4902   }
4903   if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
4904       (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
4905     return false;
4906   }
4907   if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
4908     // Generally don't remove existing spaces between an identifier and "::".
4909     // The identifier might actually be a macro name such as ALWAYS_INLINE. If
4910     // this turns out to be too lenient, add analysis of the identifier itself.
4911     return Right.hasWhitespaceBefore();
4912   }
4913   if (Right.is(tok::coloncolon) &&
4914       !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
4915     // Put a space between < and :: in vector< ::std::string >
4916     return (Left.is(TT_TemplateOpener) &&
4917             ((Style.Standard < FormatStyle::LS_Cpp11) ||
4918              ShouldAddSpacesInAngles())) ||
4919            !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
4920                           tok::kw___super, TT_TemplateOpener,
4921                           TT_TemplateCloser)) ||
4922            (Left.is(tok::l_paren) && Style.SpacesInParensOptions.Other);
4923   }
4924   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
4925     return ShouldAddSpacesInAngles();
4926   // Space before TT_StructuredBindingLSquare.
4927   if (Right.is(TT_StructuredBindingLSquare)) {
4928     return !Left.isOneOf(tok::amp, tok::ampamp) ||
4929            getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
4930   }
4931   // Space before & or && following a TT_StructuredBindingLSquare.
4932   if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
4933       Right.isOneOf(tok::amp, tok::ampamp)) {
4934     return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4935   }
4936   if ((Right.is(TT_BinaryOperator) && Left.isNot(tok::l_paren)) ||
4937       (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
4938        Right.isNot(tok::r_paren))) {
4939     return true;
4940   }
4941   if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
4942       Left.MatchingParen &&
4943       Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
4944     return false;
4945   }
4946   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
4947       Line.Type == LT_ImportStatement) {
4948     return true;
4949   }
4950   if (Right.is(TT_TrailingUnaryOperator))
4951     return false;
4952   if (Left.is(TT_RegexLiteral))
4953     return false;
4954   return spaceRequiredBetween(Line, Left, Right);
4955 }
4956 
4957 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)4958 static bool isAllmanBrace(const FormatToken &Tok) {
4959   return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4960          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
4961 }
4962 
4963 // Returns 'true' if 'Tok' is a function argument.
IsFunctionArgument(const FormatToken & Tok)4964 static bool IsFunctionArgument(const FormatToken &Tok) {
4965   return Tok.MatchingParen && Tok.MatchingParen->Next &&
4966          Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
4967 }
4968 
4969 static bool
isItAnEmptyLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)4970 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
4971                          FormatStyle::ShortLambdaStyle ShortLambdaOption) {
4972   return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
4973 }
4974 
isAllmanLambdaBrace(const FormatToken & Tok)4975 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
4976   return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4977          !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
4978 }
4979 
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const4980 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
4981                                      const FormatToken &Right) const {
4982   const FormatToken &Left = *Right.Previous;
4983   if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
4984     return true;
4985 
4986   if (Style.isCSharp()) {
4987     if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
4988         Style.BraceWrapping.AfterFunction) {
4989       return true;
4990     }
4991     if (Right.is(TT_CSharpNamedArgumentColon) ||
4992         Left.is(TT_CSharpNamedArgumentColon)) {
4993       return false;
4994     }
4995     if (Right.is(TT_CSharpGenericTypeConstraint))
4996       return true;
4997     if (Right.Next && Right.Next->is(TT_FatArrow) &&
4998         (Right.is(tok::numeric_constant) ||
4999          (Right.is(tok::identifier) && Right.TokenText == "_"))) {
5000       return true;
5001     }
5002 
5003     // Break after C# [...] and before public/protected/private/internal.
5004     if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
5005         (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
5006          Right.is(Keywords.kw_internal))) {
5007       return true;
5008     }
5009     // Break between ] and [ but only when there are really 2 attributes.
5010     if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
5011         Left.is(tok::r_square) && Right.is(tok::l_square)) {
5012       return true;
5013     }
5014 
5015   } else if (Style.isJavaScript()) {
5016     // FIXME: This might apply to other languages and token kinds.
5017     if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
5018         Left.Previous->is(tok::string_literal)) {
5019       return true;
5020     }
5021     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
5022         Left.Previous && Left.Previous->is(tok::equal) &&
5023         Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
5024                             tok::kw_const) &&
5025         // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
5026         // above.
5027         !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
5028       // Object literals on the top level of a file are treated as "enum-style".
5029       // Each key/value pair is put on a separate line, instead of bin-packing.
5030       return true;
5031     }
5032     if (Left.is(tok::l_brace) && Line.Level == 0 &&
5033         (Line.startsWith(tok::kw_enum) ||
5034          Line.startsWith(tok::kw_const, tok::kw_enum) ||
5035          Line.startsWith(tok::kw_export, tok::kw_enum) ||
5036          Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
5037       // JavaScript top-level enum key/value pairs are put on separate lines
5038       // instead of bin-packing.
5039       return true;
5040     }
5041     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
5042         Left.Previous->is(TT_FatArrow)) {
5043       // JS arrow function (=> {...}).
5044       switch (Style.AllowShortLambdasOnASingleLine) {
5045       case FormatStyle::SLS_All:
5046         return false;
5047       case FormatStyle::SLS_None:
5048         return true;
5049       case FormatStyle::SLS_Empty:
5050         return !Left.Children.empty();
5051       case FormatStyle::SLS_Inline:
5052         // allow one-lining inline (e.g. in function call args) and empty arrow
5053         // functions.
5054         return (Left.NestingLevel == 0 && Line.Level == 0) &&
5055                !Left.Children.empty();
5056       }
5057       llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
5058     }
5059 
5060     if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
5061         !Left.Children.empty()) {
5062       // Support AllowShortFunctionsOnASingleLine for JavaScript.
5063       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
5064              Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
5065              (Left.NestingLevel == 0 && Line.Level == 0 &&
5066               Style.AllowShortFunctionsOnASingleLine &
5067                   FormatStyle::SFS_InlineOnly);
5068     }
5069   } else if (Style.Language == FormatStyle::LK_Java) {
5070     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
5071         Right.Next->is(tok::string_literal)) {
5072       return true;
5073     }
5074   } else if (Style.isVerilog()) {
5075     // Break between assignments.
5076     if (Left.is(TT_VerilogAssignComma))
5077       return true;
5078     // Break between ports of different types.
5079     if (Left.is(TT_VerilogTypeComma))
5080       return true;
5081     // Break between ports in a module instantiation and after the parameter
5082     // list.
5083     if (Style.VerilogBreakBetweenInstancePorts &&
5084         (Left.is(TT_VerilogInstancePortComma) ||
5085          (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) &&
5086           Left.MatchingParen &&
5087           Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) {
5088       return true;
5089     }
5090     // Break after labels. In Verilog labels don't have the 'case' keyword, so
5091     // it is hard to identify them in UnwrappedLineParser.
5092     if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
5093       return true;
5094   } else if (Style.BreakAdjacentStringLiterals &&
5095              (Style.isCpp() || Style.isProto() ||
5096               Style.Language == FormatStyle::LK_TableGen)) {
5097     if (Left.isStringLiteral() && Right.isStringLiteral())
5098       return true;
5099   }
5100 
5101   // Basic JSON newline processing.
5102   if (Style.isJson()) {
5103     // Always break after a JSON record opener.
5104     // {
5105     // }
5106     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
5107       return true;
5108     // Always break after a JSON array opener based on BreakArrays.
5109     if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
5110          Right.isNot(tok::r_square)) ||
5111         Left.is(tok::comma)) {
5112       if (Right.is(tok::l_brace))
5113         return true;
5114       // scan to the right if an we see an object or an array inside
5115       // then break.
5116       for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
5117         if (Tok->isOneOf(tok::l_brace, tok::l_square))
5118           return true;
5119         if (Tok->isOneOf(tok::r_brace, tok::r_square))
5120           break;
5121       }
5122       return Style.BreakArrays;
5123     }
5124   }
5125 
5126   if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
5127       Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
5128     return true;
5129   }
5130 
5131   // If the last token before a '}', ']', or ')' is a comma or a trailing
5132   // comment, the intention is to insert a line break after it in order to make
5133   // shuffling around entries easier. Import statements, especially in
5134   // JavaScript, can be an exception to this rule.
5135   if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
5136     const FormatToken *BeforeClosingBrace = nullptr;
5137     if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
5138          (Style.isJavaScript() && Left.is(tok::l_paren))) &&
5139         Left.isNot(BK_Block) && Left.MatchingParen) {
5140       BeforeClosingBrace = Left.MatchingParen->Previous;
5141     } else if (Right.MatchingParen &&
5142                (Right.MatchingParen->isOneOf(tok::l_brace,
5143                                              TT_ArrayInitializerLSquare) ||
5144                 (Style.isJavaScript() &&
5145                  Right.MatchingParen->is(tok::l_paren)))) {
5146       BeforeClosingBrace = &Left;
5147     }
5148     if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
5149                                BeforeClosingBrace->isTrailingComment())) {
5150       return true;
5151     }
5152   }
5153 
5154   if (Right.is(tok::comment)) {
5155     return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
5156            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
5157   }
5158   if (Left.isTrailingComment())
5159     return true;
5160   if (Left.IsUnterminatedLiteral)
5161     return true;
5162   if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
5163       Right.Next->is(tok::string_literal)) {
5164     return true;
5165   }
5166   if (Right.is(TT_RequiresClause)) {
5167     switch (Style.RequiresClausePosition) {
5168     case FormatStyle::RCPS_OwnLine:
5169     case FormatStyle::RCPS_WithFollowing:
5170       return true;
5171     default:
5172       break;
5173     }
5174   }
5175   // Can break after template<> declaration
5176   if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
5177       Left.MatchingParen->NestingLevel == 0) {
5178     // Put concepts on the next line e.g.
5179     // template<typename T>
5180     // concept ...
5181     if (Right.is(tok::kw_concept))
5182       return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
5183     return Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes;
5184   }
5185   if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
5186     switch (Style.RequiresClausePosition) {
5187     case FormatStyle::RCPS_OwnLine:
5188     case FormatStyle::RCPS_WithPreceding:
5189       return true;
5190     default:
5191       break;
5192     }
5193   }
5194   if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
5195     if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
5196         (Left.is(TT_CtorInitializerComma) ||
5197          Right.is(TT_CtorInitializerColon))) {
5198       return true;
5199     }
5200 
5201     if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5202         Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
5203       return true;
5204     }
5205   }
5206   if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
5207       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
5208       Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
5209     return true;
5210   }
5211   if (Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly) {
5212     if ((Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon ||
5213          Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) &&
5214         Right.is(TT_CtorInitializerColon)) {
5215       return true;
5216     }
5217 
5218     if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5219         Left.is(TT_CtorInitializerColon)) {
5220       return true;
5221     }
5222   }
5223   // Break only if we have multiple inheritance.
5224   if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
5225       Right.is(TT_InheritanceComma)) {
5226     return true;
5227   }
5228   if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
5229       Left.is(TT_InheritanceComma)) {
5230     return true;
5231   }
5232   if (Right.is(tok::string_literal) && Right.TokenText.starts_with("R\"")) {
5233     // Multiline raw string literals are special wrt. line breaks. The author
5234     // has made a deliberate choice and might have aligned the contents of the
5235     // string literal accordingly. Thus, we try keep existing line breaks.
5236     return Right.IsMultiline && Right.NewlinesBefore > 0;
5237   }
5238   if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous &&
5239                                  Left.Previous->is(tok::equal))) &&
5240       Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
5241     // Don't put enums or option definitions onto single lines in protocol
5242     // buffers.
5243     return true;
5244   }
5245   if (Right.is(TT_InlineASMBrace))
5246     return Right.HasUnescapedNewline;
5247 
5248   if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
5249     auto *FirstNonComment = Line.getFirstNonComment();
5250     bool AccessSpecifier =
5251         FirstNonComment &&
5252         FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
5253                                  tok::kw_private, tok::kw_protected);
5254 
5255     if (Style.BraceWrapping.AfterEnum) {
5256       if (Line.startsWith(tok::kw_enum) ||
5257           Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
5258         return true;
5259       }
5260       // Ensure BraceWrapping for `public enum A {`.
5261       if (AccessSpecifier && FirstNonComment->Next &&
5262           FirstNonComment->Next->is(tok::kw_enum)) {
5263         return true;
5264       }
5265     }
5266 
5267     // Ensure BraceWrapping for `public interface A {`.
5268     if (Style.BraceWrapping.AfterClass &&
5269         ((AccessSpecifier && FirstNonComment->Next &&
5270           FirstNonComment->Next->is(Keywords.kw_interface)) ||
5271          Line.startsWith(Keywords.kw_interface))) {
5272       return true;
5273     }
5274 
5275     // Don't attempt to interpret struct return types as structs.
5276     if (Right.isNot(TT_FunctionLBrace)) {
5277       return (Line.startsWith(tok::kw_class) &&
5278               Style.BraceWrapping.AfterClass) ||
5279              (Line.startsWith(tok::kw_struct) &&
5280               Style.BraceWrapping.AfterStruct);
5281     }
5282   }
5283 
5284   if (Left.is(TT_ObjCBlockLBrace) &&
5285       Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
5286     return true;
5287   }
5288 
5289   // Ensure wrapping after __attribute__((XX)) and @interface etc.
5290   if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
5291       Right.is(TT_ObjCDecl)) {
5292     return true;
5293   }
5294 
5295   if (Left.is(TT_LambdaLBrace)) {
5296     if (IsFunctionArgument(Left) &&
5297         Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
5298       return false;
5299     }
5300 
5301     if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
5302         Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
5303         (!Left.Children.empty() &&
5304          Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
5305       return true;
5306     }
5307   }
5308 
5309   if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
5310       (Left.isPointerOrReference() || Left.is(TT_TemplateCloser))) {
5311     return true;
5312   }
5313 
5314   // Put multiple Java annotation on a new line.
5315   if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
5316       Left.is(TT_LeadingJavaAnnotation) &&
5317       Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
5318       (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
5319     return true;
5320   }
5321 
5322   if (Right.is(TT_ProtoExtensionLSquare))
5323     return true;
5324 
5325   // In text proto instances if a submessage contains at least 2 entries and at
5326   // least one of them is a submessage, like A { ... B { ... } ... },
5327   // put all of the entries of A on separate lines by forcing the selector of
5328   // the submessage B to be put on a newline.
5329   //
5330   // Example: these can stay on one line:
5331   // a { scalar_1: 1 scalar_2: 2 }
5332   // a { b { key: value } }
5333   //
5334   // and these entries need to be on a new line even if putting them all in one
5335   // line is under the column limit:
5336   // a {
5337   //   scalar: 1
5338   //   b { key: value }
5339   // }
5340   //
5341   // We enforce this by breaking before a submessage field that has previous
5342   // siblings, *and* breaking before a field that follows a submessage field.
5343   //
5344   // Be careful to exclude the case  [proto.ext] { ... } since the `]` is
5345   // the TT_SelectorName there, but we don't want to break inside the brackets.
5346   //
5347   // Another edge case is @submessage { key: value }, which is a common
5348   // substitution placeholder. In this case we want to keep `@` and `submessage`
5349   // together.
5350   //
5351   // We ensure elsewhere that extensions are always on their own line.
5352   if (Style.isProto() && Right.is(TT_SelectorName) &&
5353       Right.isNot(tok::r_square) && Right.Next) {
5354     // Keep `@submessage` together in:
5355     // @submessage { key: value }
5356     if (Left.is(tok::at))
5357       return false;
5358     // Look for the scope opener after selector in cases like:
5359     // selector { ...
5360     // selector: { ...
5361     // selector: @base { ...
5362     FormatToken *LBrace = Right.Next;
5363     if (LBrace && LBrace->is(tok::colon)) {
5364       LBrace = LBrace->Next;
5365       if (LBrace && LBrace->is(tok::at)) {
5366         LBrace = LBrace->Next;
5367         if (LBrace)
5368           LBrace = LBrace->Next;
5369       }
5370     }
5371     if (LBrace &&
5372         // The scope opener is one of {, [, <:
5373         // selector { ... }
5374         // selector [ ... ]
5375         // selector < ... >
5376         //
5377         // In case of selector { ... }, the l_brace is TT_DictLiteral.
5378         // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
5379         // so we check for immediately following r_brace.
5380         ((LBrace->is(tok::l_brace) &&
5381           (LBrace->is(TT_DictLiteral) ||
5382            (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
5383          LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
5384       // If Left.ParameterCount is 0, then this submessage entry is not the
5385       // first in its parent submessage, and we want to break before this entry.
5386       // If Left.ParameterCount is greater than 0, then its parent submessage
5387       // might contain 1 or more entries and we want to break before this entry
5388       // if it contains at least 2 entries. We deal with this case later by
5389       // detecting and breaking before the next entry in the parent submessage.
5390       if (Left.ParameterCount == 0)
5391         return true;
5392       // However, if this submessage is the first entry in its parent
5393       // submessage, Left.ParameterCount might be 1 in some cases.
5394       // We deal with this case later by detecting an entry
5395       // following a closing paren of this submessage.
5396     }
5397 
5398     // If this is an entry immediately following a submessage, it will be
5399     // preceded by a closing paren of that submessage, like in:
5400     //     left---.  .---right
5401     //            v  v
5402     // sub: { ... } key: value
5403     // If there was a comment between `}` an `key` above, then `key` would be
5404     // put on a new line anyways.
5405     if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
5406       return true;
5407   }
5408 
5409   return false;
5410 }
5411 
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const5412 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
5413                                     const FormatToken &Right) const {
5414   const FormatToken &Left = *Right.Previous;
5415   // Language-specific stuff.
5416   if (Style.isCSharp()) {
5417     if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
5418         Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
5419       return false;
5420     }
5421     // Only break after commas for generic type constraints.
5422     if (Line.First->is(TT_CSharpGenericTypeConstraint))
5423       return Left.is(TT_CSharpGenericTypeConstraintComma);
5424     // Keep nullable operators attached to their identifiers.
5425     if (Right.is(TT_CSharpNullable))
5426       return false;
5427   } else if (Style.Language == FormatStyle::LK_Java) {
5428     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5429                      Keywords.kw_implements)) {
5430       return false;
5431     }
5432     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5433                       Keywords.kw_implements)) {
5434       return true;
5435     }
5436   } else if (Style.isJavaScript()) {
5437     const FormatToken *NonComment = Right.getPreviousNonComment();
5438     if (NonComment &&
5439         NonComment->isOneOf(
5440             tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
5441             tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
5442             tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
5443             Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
5444             Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
5445             Keywords.kw_await)) {
5446       return false; // Otherwise automatic semicolon insertion would trigger.
5447     }
5448     if (Right.NestingLevel == 0 &&
5449         (Left.Tok.getIdentifierInfo() ||
5450          Left.isOneOf(tok::r_square, tok::r_paren)) &&
5451         Right.isOneOf(tok::l_square, tok::l_paren)) {
5452       return false; // Otherwise automatic semicolon insertion would trigger.
5453     }
5454     if (NonComment && NonComment->is(tok::identifier) &&
5455         NonComment->TokenText == "asserts") {
5456       return false;
5457     }
5458     if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
5459       return false;
5460     if (Left.is(TT_JsTypeColon))
5461       return true;
5462     // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
5463     if (Left.is(tok::exclaim) && Right.is(tok::colon))
5464       return false;
5465     // Look for is type annotations like:
5466     // function f(): a is B { ... }
5467     // Do not break before is in these cases.
5468     if (Right.is(Keywords.kw_is)) {
5469       const FormatToken *Next = Right.getNextNonComment();
5470       // If `is` is followed by a colon, it's likely that it's a dict key, so
5471       // ignore it for this check.
5472       // For example this is common in Polymer:
5473       // Polymer({
5474       //   is: 'name',
5475       //   ...
5476       // });
5477       if (!Next || Next->isNot(tok::colon))
5478         return false;
5479     }
5480     if (Left.is(Keywords.kw_in))
5481       return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
5482     if (Right.is(Keywords.kw_in))
5483       return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5484     if (Right.is(Keywords.kw_as))
5485       return false; // must not break before as in 'x as type' casts
5486     if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
5487       // extends and infer can appear as keywords in conditional types:
5488       //   https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
5489       // do not break before them, as the expressions are subject to ASI.
5490       return false;
5491     }
5492     if (Left.is(Keywords.kw_as))
5493       return true;
5494     if (Left.is(TT_NonNullAssertion))
5495       return true;
5496     if (Left.is(Keywords.kw_declare) &&
5497         Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
5498                       Keywords.kw_function, tok::kw_class, tok::kw_enum,
5499                       Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
5500                       Keywords.kw_let, tok::kw_const)) {
5501       // See grammar for 'declare' statements at:
5502       // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
5503       return false;
5504     }
5505     if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
5506         Right.isOneOf(tok::identifier, tok::string_literal)) {
5507       return false; // must not break in "module foo { ...}"
5508     }
5509     if (Right.is(TT_TemplateString) && Right.closesScope())
5510       return false;
5511     // Don't split tagged template literal so there is a break between the tag
5512     // identifier and template string.
5513     if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
5514       return false;
5515     if (Left.is(TT_TemplateString) && Left.opensScope())
5516       return true;
5517   }
5518 
5519   if (Left.is(tok::at))
5520     return false;
5521   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
5522     return false;
5523   if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
5524     return Right.isNot(tok::l_paren);
5525   if (Right.is(TT_PointerOrReference)) {
5526     return Line.IsMultiVariableDeclStmt ||
5527            (getTokenPointerOrReferenceAlignment(Right) ==
5528                 FormatStyle::PAS_Right &&
5529             (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
5530   }
5531   if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
5532       Right.is(tok::kw_operator)) {
5533     return true;
5534   }
5535   if (Left.is(TT_PointerOrReference))
5536     return false;
5537   if (Right.isTrailingComment()) {
5538     // We rely on MustBreakBefore being set correctly here as we should not
5539     // change the "binding" behavior of a comment.
5540     // The first comment in a braced lists is always interpreted as belonging to
5541     // the first list element. Otherwise, it should be placed outside of the
5542     // list.
5543     return Left.is(BK_BracedInit) ||
5544            (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
5545             Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
5546   }
5547   if (Left.is(tok::question) && Right.is(tok::colon))
5548     return false;
5549   if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
5550     return Style.BreakBeforeTernaryOperators;
5551   if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
5552     return !Style.BreakBeforeTernaryOperators;
5553   if (Left.is(TT_InheritanceColon))
5554     return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
5555   if (Right.is(TT_InheritanceColon))
5556     return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
5557   if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) &&
5558       Left.isNot(TT_SelectorName)) {
5559     return true;
5560   }
5561 
5562   if (Right.is(tok::colon) &&
5563       !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
5564     return false;
5565   }
5566   if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
5567     if (Style.isProto()) {
5568       if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
5569         return false;
5570       // Prevent cases like:
5571       //
5572       // submessage:
5573       //     { key: valueeeeeeeeeeee }
5574       //
5575       // when the snippet does not fit into one line.
5576       // Prefer:
5577       //
5578       // submessage: {
5579       //   key: valueeeeeeeeeeee
5580       // }
5581       //
5582       // instead, even if it is longer by one line.
5583       //
5584       // Note that this allows the "{" to go over the column limit
5585       // when the column limit is just between ":" and "{", but that does
5586       // not happen too often and alternative formattings in this case are
5587       // not much better.
5588       //
5589       // The code covers the cases:
5590       //
5591       // submessage: { ... }
5592       // submessage: < ... >
5593       // repeated: [ ... ]
5594       if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
5595            Right.is(TT_DictLiteral)) ||
5596           Right.is(TT_ArrayInitializerLSquare)) {
5597         return false;
5598       }
5599     }
5600     return true;
5601   }
5602   if (Right.is(tok::r_square) && Right.MatchingParen &&
5603       Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
5604     return false;
5605   }
5606   if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
5607                                     Right.Next->is(TT_ObjCMethodExpr))) {
5608     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
5609   }
5610   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
5611     return true;
5612   if (Right.is(tok::kw_concept))
5613     return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
5614   if (Right.is(TT_RequiresClause))
5615     return true;
5616   if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
5617     return true;
5618   if (Left.ClosesRequiresClause)
5619     return true;
5620   if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
5621                     TT_OverloadedOperator)) {
5622     return false;
5623   }
5624   if (Left.is(TT_RangeBasedForLoopColon))
5625     return true;
5626   if (Right.is(TT_RangeBasedForLoopColon))
5627     return false;
5628   if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
5629     return true;
5630   if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
5631       (Left.is(tok::less) && Right.is(tok::less))) {
5632     return false;
5633   }
5634   if (Right.is(TT_BinaryOperator) &&
5635       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
5636       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
5637        Right.getPrecedence() != prec::Assignment)) {
5638     return true;
5639   }
5640   if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
5641       Left.is(tok::kw_operator)) {
5642     return false;
5643   }
5644   if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
5645       Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
5646     return false;
5647   }
5648   if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
5649       !Style.Cpp11BracedListStyle) {
5650     return false;
5651   }
5652   if (Left.is(TT_AttributeLParen) ||
5653       (Left.is(tok::l_paren) && Left.is(TT_TypeDeclarationParen))) {
5654     return false;
5655   }
5656   if (Left.is(tok::l_paren) && Left.Previous &&
5657       (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
5658     return false;
5659   }
5660   if (Right.is(TT_ImplicitStringLiteral))
5661     return false;
5662 
5663   if (Right.is(TT_TemplateCloser))
5664     return false;
5665   if (Right.is(tok::r_square) && Right.MatchingParen &&
5666       Right.MatchingParen->is(TT_LambdaLSquare)) {
5667     return false;
5668   }
5669 
5670   // We only break before r_brace if there was a corresponding break before
5671   // the l_brace, which is tracked by BreakBeforeClosingBrace.
5672   if (Right.is(tok::r_brace)) {
5673     return Right.MatchingParen && (Right.MatchingParen->is(BK_Block) ||
5674                                    (Right.isBlockIndentedInitRBrace(Style)));
5675   }
5676 
5677   // We only break before r_paren if we're in a block indented context.
5678   if (Right.is(tok::r_paren)) {
5679     if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
5680         !Right.MatchingParen) {
5681       return false;
5682     }
5683     auto Next = Right.Next;
5684     if (Next && Next->is(tok::r_paren))
5685       Next = Next->Next;
5686     if (Next && Next->is(tok::l_paren))
5687       return false;
5688     const FormatToken *Previous = Right.MatchingParen->Previous;
5689     return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
5690   }
5691 
5692   // Allow breaking after a trailing annotation, e.g. after a method
5693   // declaration.
5694   if (Left.is(TT_TrailingAnnotation)) {
5695     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
5696                           tok::less, tok::coloncolon);
5697   }
5698 
5699   if (Right.isAttribute())
5700     return true;
5701 
5702   if (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))
5703     return Left.isNot(TT_AttributeSquare);
5704 
5705   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
5706     return true;
5707 
5708   if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
5709     return true;
5710 
5711   if (Left.is(TT_CtorInitializerColon)) {
5712     return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5713            (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
5714   }
5715   if (Right.is(TT_CtorInitializerColon))
5716     return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
5717   if (Left.is(TT_CtorInitializerComma) &&
5718       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5719     return false;
5720   }
5721   if (Right.is(TT_CtorInitializerComma) &&
5722       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5723     return true;
5724   }
5725   if (Left.is(TT_InheritanceComma) &&
5726       Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5727     return false;
5728   }
5729   if (Right.is(TT_InheritanceComma) &&
5730       Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5731     return true;
5732   }
5733   if (Left.is(TT_ArrayInitializerLSquare))
5734     return true;
5735   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
5736     return true;
5737   if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
5738       !Left.isOneOf(tok::arrowstar, tok::lessless) &&
5739       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
5740       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
5741        Left.getPrecedence() == prec::Assignment)) {
5742     return true;
5743   }
5744   if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
5745       (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
5746     return false;
5747   }
5748 
5749   auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
5750   if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
5751     if (isAllmanLambdaBrace(Left))
5752       return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
5753     if (isAllmanLambdaBrace(Right))
5754       return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
5755   }
5756 
5757   if (Right.is(tok::kw_noexcept) && Right.is(TT_TrailingAnnotation)) {
5758     switch (Style.AllowBreakBeforeNoexceptSpecifier) {
5759     case FormatStyle::BBNSS_Never:
5760       return false;
5761     case FormatStyle::BBNSS_Always:
5762       return true;
5763     case FormatStyle::BBNSS_OnlyWithParen:
5764       return Right.Next && Right.Next->is(tok::l_paren);
5765     }
5766   }
5767 
5768   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
5769                       tok::kw_class, tok::kw_struct, tok::comment) ||
5770          Right.isMemberAccess() ||
5771          Right.isOneOf(TT_TrailingReturnArrow, tok::lessless, tok::colon,
5772                        tok::l_square, tok::at) ||
5773          (Left.is(tok::r_paren) &&
5774           Right.isOneOf(tok::identifier, tok::kw_const)) ||
5775          (Left.is(tok::l_paren) && Right.isNot(tok::r_paren)) ||
5776          (Left.is(TT_TemplateOpener) && Right.isNot(TT_TemplateCloser));
5777 }
5778 
printDebugInfo(const AnnotatedLine & Line) const5779 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
5780   llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
5781                << ", T=" << Line.Type << ", C=" << Line.IsContinuation
5782                << "):\n";
5783   const FormatToken *Tok = Line.First;
5784   while (Tok) {
5785     llvm::errs() << " M=" << Tok->MustBreakBefore
5786                  << " C=" << Tok->CanBreakBefore
5787                  << " T=" << getTokenTypeName(Tok->getType())
5788                  << " S=" << Tok->SpacesRequiredBefore
5789                  << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
5790                  << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
5791                  << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
5792                  << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
5793     for (prec::Level LParen : Tok->FakeLParens)
5794       llvm::errs() << LParen << "/";
5795     llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
5796     llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
5797     llvm::errs() << " Text='" << Tok->TokenText << "'\n";
5798     if (!Tok->Next)
5799       assert(Tok == Line.Last);
5800     Tok = Tok->Next;
5801   }
5802   llvm::errs() << "----\n";
5803 }
5804 
5805 FormatStyle::PointerAlignmentStyle
getTokenReferenceAlignment(const FormatToken & Reference) const5806 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
5807   assert(Reference.isOneOf(tok::amp, tok::ampamp));
5808   switch (Style.ReferenceAlignment) {
5809   case FormatStyle::RAS_Pointer:
5810     return Style.PointerAlignment;
5811   case FormatStyle::RAS_Left:
5812     return FormatStyle::PAS_Left;
5813   case FormatStyle::RAS_Right:
5814     return FormatStyle::PAS_Right;
5815   case FormatStyle::RAS_Middle:
5816     return FormatStyle::PAS_Middle;
5817   }
5818   assert(0); //"Unhandled value of ReferenceAlignment"
5819   return Style.PointerAlignment;
5820 }
5821 
5822 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(const FormatToken & PointerOrReference) const5823 TokenAnnotator::getTokenPointerOrReferenceAlignment(
5824     const FormatToken &PointerOrReference) const {
5825   if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
5826     switch (Style.ReferenceAlignment) {
5827     case FormatStyle::RAS_Pointer:
5828       return Style.PointerAlignment;
5829     case FormatStyle::RAS_Left:
5830       return FormatStyle::PAS_Left;
5831     case FormatStyle::RAS_Right:
5832       return FormatStyle::PAS_Right;
5833     case FormatStyle::RAS_Middle:
5834       return FormatStyle::PAS_Middle;
5835     }
5836   }
5837   assert(PointerOrReference.is(tok::star));
5838   return Style.PointerAlignment;
5839 }
5840 
5841 } // namespace format
5842 } // namespace clang
5843