1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
21
22 #define DEBUG_TYPE "format-token-annotator"
23
24 namespace clang {
25 namespace format {
26
27 namespace {
28
29 /// Returns \c true if the token can be used as an identifier in
30 /// an Objective-C \c @selector, \c false otherwise.
31 ///
32 /// Because getFormattingLangOpts() always lexes source code as
33 /// Objective-C++, C++ keywords like \c new and \c delete are
34 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
35 ///
36 /// For Objective-C and Objective-C++, both identifiers and keywords
37 /// are valid inside @selector(...) (or a macro which
38 /// invokes @selector(...)). So, we allow treat any identifier or
39 /// keyword as a potential Objective-C selector component.
canBeObjCSelectorComponent(const FormatToken & Tok)40 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
41 return Tok.Tok.getIdentifierInfo() != nullptr;
42 }
43
44 /// With `Left` being '(', check if we're at either `[...](` or
45 /// `[...]<...>(`, where the [ opens a lambda capture list.
isLambdaParameterList(const FormatToken * Left)46 static bool isLambdaParameterList(const FormatToken *Left) {
47 // Skip <...> if present.
48 if (Left->Previous && Left->Previous->is(tok::greater) &&
49 Left->Previous->MatchingParen &&
50 Left->Previous->MatchingParen->is(TT_TemplateOpener))
51 Left = Left->Previous->MatchingParen;
52
53 // Check for `[...]`.
54 return Left->Previous && Left->Previous->is(tok::r_square) &&
55 Left->Previous->MatchingParen &&
56 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
57 }
58
59 /// Returns \c true if the token is followed by a boolean condition, \c false
60 /// otherwise.
isKeywordWithCondition(const FormatToken & Tok)61 static bool isKeywordWithCondition(const FormatToken &Tok) {
62 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
63 tok::kw_constexpr, tok::kw_catch);
64 }
65
66 /// A parser that gathers additional information about tokens.
67 ///
68 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
69 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
70 /// into template parameter lists.
71 class AnnotatingParser {
72 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,const AdditionalKeywords & Keywords)73 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
74 const AdditionalKeywords &Keywords)
75 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
76 Keywords(Keywords) {
77 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
78 resetTokenMetadata(CurrentToken);
79 }
80
81 private:
parseAngle()82 bool parseAngle() {
83 if (!CurrentToken || !CurrentToken->Previous)
84 return false;
85 if (NonTemplateLess.count(CurrentToken->Previous))
86 return false;
87
88 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
89 if (Previous.Previous) {
90 if (Previous.Previous->Tok.isLiteral())
91 return false;
92 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
93 (!Previous.Previous->MatchingParen ||
94 !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
95 return false;
96 }
97
98 FormatToken *Left = CurrentToken->Previous;
99 Left->ParentBracket = Contexts.back().ContextKind;
100 ScopedContextCreator ContextCreator(*this, tok::less, 12);
101
102 // If this angle is in the context of an expression, we need to be more
103 // hesitant to detect it as opening template parameters.
104 bool InExprContext = Contexts.back().IsExpression;
105
106 Contexts.back().IsExpression = false;
107 // If there's a template keyword before the opening angle bracket, this is a
108 // template parameter, not an argument.
109 Contexts.back().InTemplateArgument =
110 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
111
112 if (Style.Language == FormatStyle::LK_Java &&
113 CurrentToken->is(tok::question))
114 next();
115
116 while (CurrentToken) {
117 if (CurrentToken->is(tok::greater)) {
118 // Try to do a better job at looking for ">>" within the condition of
119 // a statement.
120 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
121 Left->ParentBracket != tok::less &&
122 isKeywordWithCondition(*Line.First))
123 return false;
124 Left->MatchingParen = CurrentToken;
125 CurrentToken->MatchingParen = Left;
126 // In TT_Proto, we must distignuish between:
127 // map<key, value>
128 // msg < item: data >
129 // msg: < item: data >
130 // In TT_TextProto, map<key, value> does not occur.
131 if (Style.Language == FormatStyle::LK_TextProto ||
132 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
133 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
134 CurrentToken->setType(TT_DictLiteral);
135 else
136 CurrentToken->setType(TT_TemplateCloser);
137 next();
138 return true;
139 }
140 if (CurrentToken->is(tok::question) &&
141 Style.Language == FormatStyle::LK_Java) {
142 next();
143 continue;
144 }
145 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
146 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
147 !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto &&
148 Style.Language != FormatStyle::LK_TextProto))
149 return false;
150 // If a && or || is found and interpreted as a binary operator, this set
151 // of angles is likely part of something like "a < b && c > d". If the
152 // angles are inside an expression, the ||/&& might also be a binary
153 // operator that was misinterpreted because we are parsing template
154 // parameters.
155 // FIXME: This is getting out of hand, write a decent parser.
156 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
157 CurrentToken->Previous->is(TT_BinaryOperator) &&
158 Contexts[Contexts.size() - 2].IsExpression &&
159 !Line.startsWith(tok::kw_template))
160 return false;
161 updateParameterCount(Left, CurrentToken);
162 if (Style.Language == FormatStyle::LK_Proto) {
163 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
164 if (CurrentToken->is(tok::colon) ||
165 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
166 Previous->isNot(tok::colon)))
167 Previous->setType(TT_SelectorName);
168 }
169 }
170 if (!consumeToken())
171 return false;
172 }
173 return false;
174 }
175
parseUntouchableParens()176 bool parseUntouchableParens() {
177 while (CurrentToken) {
178 CurrentToken->Finalized = true;
179 switch (CurrentToken->Tok.getKind()) {
180 case tok::l_paren:
181 next();
182 if (!parseUntouchableParens())
183 return false;
184 continue;
185 case tok::r_paren:
186 next();
187 return true;
188 default:
189 // no-op
190 break;
191 }
192 next();
193 }
194 return false;
195 }
196
parseParens(bool LookForDecls=false)197 bool parseParens(bool LookForDecls = false) {
198 if (!CurrentToken)
199 return false;
200 FormatToken *Left = CurrentToken->Previous;
201 Left->ParentBracket = Contexts.back().ContextKind;
202 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
203
204 // FIXME: This is a bit of a hack. Do better.
205 Contexts.back().ColonIsForRangeExpr =
206 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
207
208 if (Left->Previous && Left->Previous->is(TT_UntouchableMacroFunc)) {
209 Left->Finalized = true;
210 return parseUntouchableParens();
211 }
212
213 bool StartsObjCMethodExpr = false;
214 if (FormatToken *MaybeSel = Left->Previous) {
215 // @selector( starts a selector.
216 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
217 MaybeSel->Previous->is(tok::at)) {
218 StartsObjCMethodExpr = true;
219 }
220 }
221
222 if (Left->is(TT_OverloadedOperatorLParen)) {
223 Contexts.back().IsExpression = false;
224 } else if (Style.Language == FormatStyle::LK_JavaScript &&
225 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
226 Line.startsWith(tok::kw_export, Keywords.kw_type,
227 tok::identifier))) {
228 // type X = (...);
229 // export type X = (...);
230 Contexts.back().IsExpression = false;
231 } else if (Left->Previous &&
232 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
233 tok::kw_while, tok::l_paren,
234 tok::comma) ||
235 Left->Previous->isIf() ||
236 Left->Previous->is(TT_BinaryOperator))) {
237 // static_assert, if and while usually contain expressions.
238 Contexts.back().IsExpression = true;
239 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
240 (Left->Previous->is(Keywords.kw_function) ||
241 (Left->Previous->endsSequence(tok::identifier,
242 Keywords.kw_function)))) {
243 // function(...) or function f(...)
244 Contexts.back().IsExpression = false;
245 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
246 Left->Previous->is(TT_JsTypeColon)) {
247 // let x: (SomeType);
248 Contexts.back().IsExpression = false;
249 } else if (isLambdaParameterList(Left)) {
250 // This is a parameter list of a lambda expression.
251 Contexts.back().IsExpression = false;
252 } else if (Line.InPPDirective &&
253 (!Left->Previous || !Left->Previous->is(tok::identifier))) {
254 Contexts.back().IsExpression = true;
255 } else if (Contexts[Contexts.size() - 2].CaretFound) {
256 // This is the parameter list of an ObjC block.
257 Contexts.back().IsExpression = false;
258 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
259 Left->setType(TT_AttributeParen);
260 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
261 // The first argument to a foreach macro is a declaration.
262 Contexts.back().IsForEachMacro = true;
263 Contexts.back().IsExpression = false;
264 } else if (Left->Previous && Left->Previous->MatchingParen &&
265 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
266 Contexts.back().IsExpression = false;
267 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
268 bool IsForOrCatch =
269 Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
270 Contexts.back().IsExpression = !IsForOrCatch;
271 }
272
273 if (StartsObjCMethodExpr) {
274 Contexts.back().ColonIsObjCMethodExpr = true;
275 Left->setType(TT_ObjCMethodExpr);
276 }
277
278 // MightBeFunctionType and ProbablyFunctionType are used for
279 // function pointer and reference types as well as Objective-C
280 // block types:
281 //
282 // void (*FunctionPointer)(void);
283 // void (&FunctionReference)(void);
284 // void (^ObjCBlock)(void);
285 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
286 bool ProbablyFunctionType =
287 CurrentToken->isOneOf(tok::star, tok::amp, tok::caret);
288 bool HasMultipleLines = false;
289 bool HasMultipleParametersOnALine = false;
290 bool MightBeObjCForRangeLoop =
291 Left->Previous && Left->Previous->is(tok::kw_for);
292 FormatToken *PossibleObjCForInToken = nullptr;
293 while (CurrentToken) {
294 // LookForDecls is set when "if (" has been seen. Check for
295 // 'identifier' '*' 'identifier' followed by not '=' -- this
296 // '*' has to be a binary operator but determineStarAmpUsage() will
297 // categorize it as an unary operator, so set the right type here.
298 if (LookForDecls && CurrentToken->Next) {
299 FormatToken *Prev = CurrentToken->getPreviousNonComment();
300 if (Prev) {
301 FormatToken *PrevPrev = Prev->getPreviousNonComment();
302 FormatToken *Next = CurrentToken->Next;
303 if (PrevPrev && PrevPrev->is(tok::identifier) &&
304 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
305 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
306 Prev->setType(TT_BinaryOperator);
307 LookForDecls = false;
308 }
309 }
310 }
311
312 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
313 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
314 tok::coloncolon))
315 ProbablyFunctionType = true;
316 if (CurrentToken->is(tok::comma))
317 MightBeFunctionType = false;
318 if (CurrentToken->Previous->is(TT_BinaryOperator))
319 Contexts.back().IsExpression = true;
320 if (CurrentToken->is(tok::r_paren)) {
321 if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
322 (CurrentToken->Next->is(tok::l_paren) ||
323 (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
324 Left->setType(Left->Next->is(tok::caret) ? TT_ObjCBlockLParen
325 : TT_FunctionTypeLParen);
326 Left->MatchingParen = CurrentToken;
327 CurrentToken->MatchingParen = Left;
328
329 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
330 Left->Previous && Left->Previous->is(tok::l_paren)) {
331 // Detect the case where macros are used to generate lambdas or
332 // function bodies, e.g.:
333 // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
334 for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
335 if (Tok->is(TT_BinaryOperator) &&
336 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
337 Tok->setType(TT_PointerOrReference);
338 }
339 }
340
341 if (StartsObjCMethodExpr) {
342 CurrentToken->setType(TT_ObjCMethodExpr);
343 if (Contexts.back().FirstObjCSelectorName) {
344 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
345 Contexts.back().LongestObjCSelectorName;
346 }
347 }
348
349 if (Left->is(TT_AttributeParen))
350 CurrentToken->setType(TT_AttributeParen);
351 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
352 CurrentToken->setType(TT_JavaAnnotation);
353 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
354 CurrentToken->setType(TT_LeadingJavaAnnotation);
355 if (Left->Previous && Left->Previous->is(TT_AttributeSquare))
356 CurrentToken->setType(TT_AttributeSquare);
357
358 if (!HasMultipleLines)
359 Left->PackingKind = PPK_Inconclusive;
360 else if (HasMultipleParametersOnALine)
361 Left->PackingKind = PPK_BinPacked;
362 else
363 Left->PackingKind = PPK_OnePerLine;
364
365 next();
366 return true;
367 }
368 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
369 return false;
370
371 if (CurrentToken->is(tok::l_brace))
372 Left->setType(TT_Unknown); // Not TT_ObjCBlockLParen
373 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
374 !CurrentToken->Next->HasUnescapedNewline &&
375 !CurrentToken->Next->isTrailingComment())
376 HasMultipleParametersOnALine = true;
377 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
378 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
379 !CurrentToken->is(tok::l_brace))
380 Contexts.back().IsExpression = false;
381 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
382 MightBeObjCForRangeLoop = false;
383 if (PossibleObjCForInToken) {
384 PossibleObjCForInToken->setType(TT_Unknown);
385 PossibleObjCForInToken = nullptr;
386 }
387 }
388 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
389 PossibleObjCForInToken = CurrentToken;
390 PossibleObjCForInToken->setType(TT_ObjCForIn);
391 }
392 // When we discover a 'new', we set CanBeExpression to 'false' in order to
393 // parse the type correctly. Reset that after a comma.
394 if (CurrentToken->is(tok::comma))
395 Contexts.back().CanBeExpression = true;
396
397 FormatToken *Tok = CurrentToken;
398 if (!consumeToken())
399 return false;
400 updateParameterCount(Left, Tok);
401 if (CurrentToken && CurrentToken->HasUnescapedNewline)
402 HasMultipleLines = true;
403 }
404 return false;
405 }
406
isCSharpAttributeSpecifier(const FormatToken & Tok)407 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
408 if (!Style.isCSharp())
409 return false;
410
411 // `identifier[i]` is not an attribute.
412 if (Tok.Previous && Tok.Previous->is(tok::identifier))
413 return false;
414
415 // Chains of [] in `identifier[i][j][k]` are not attributes.
416 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
417 auto *MatchingParen = Tok.Previous->MatchingParen;
418 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
419 return false;
420 }
421
422 const FormatToken *AttrTok = Tok.Next;
423 if (!AttrTok)
424 return false;
425
426 // Just an empty declaration e.g. string [].
427 if (AttrTok->is(tok::r_square))
428 return false;
429
430 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
431 while (AttrTok && AttrTok->isNot(tok::r_square)) {
432 AttrTok = AttrTok->Next;
433 }
434
435 if (!AttrTok)
436 return false;
437
438 // Allow an attribute to be the only content of a file.
439 AttrTok = AttrTok->Next;
440 if (!AttrTok)
441 return true;
442
443 // Limit this to being an access modifier that follows.
444 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
445 tok::comment, tok::kw_class, tok::kw_static,
446 tok::l_square, Keywords.kw_internal)) {
447 return true;
448 }
449
450 // incase its a [XXX] retval func(....
451 if (AttrTok->Next &&
452 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren))
453 return true;
454
455 return false;
456 }
457
isCpp11AttributeSpecifier(const FormatToken & Tok)458 bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
459 if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
460 return false;
461 // The first square bracket is part of an ObjC array literal
462 if (Tok.Previous && Tok.Previous->is(tok::at)) {
463 return false;
464 }
465 const FormatToken *AttrTok = Tok.Next->Next;
466 if (!AttrTok)
467 return false;
468 // C++17 '[[using ns: foo, bar(baz, blech)]]'
469 // We assume nobody will name an ObjC variable 'using'.
470 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
471 return true;
472 if (AttrTok->isNot(tok::identifier))
473 return false;
474 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
475 // ObjC message send. We assume nobody will use : in a C++11 attribute
476 // specifier parameter, although this is technically valid:
477 // [[foo(:)]].
478 if (AttrTok->is(tok::colon) ||
479 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
480 AttrTok->startsSequence(tok::r_paren, tok::identifier))
481 return false;
482 if (AttrTok->is(tok::ellipsis))
483 return true;
484 AttrTok = AttrTok->Next;
485 }
486 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
487 }
488
parseSquare()489 bool parseSquare() {
490 if (!CurrentToken)
491 return false;
492
493 // A '[' could be an index subscript (after an identifier or after
494 // ')' or ']'), it could be the start of an Objective-C method
495 // expression, it could the start of an Objective-C array literal,
496 // or it could be a C++ attribute specifier [[foo::bar]].
497 FormatToken *Left = CurrentToken->Previous;
498 Left->ParentBracket = Contexts.back().ContextKind;
499 FormatToken *Parent = Left->getPreviousNonComment();
500
501 // Cases where '>' is followed by '['.
502 // In C++, this can happen either in array of templates (foo<int>[10])
503 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
504 bool CppArrayTemplates =
505 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
506 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
507 Contexts.back().InTemplateArgument);
508
509 bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
510 Contexts.back().InCpp11AttributeSpecifier;
511
512 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
513 bool IsCSharpAttributeSpecifier =
514 isCSharpAttributeSpecifier(*Left) ||
515 Contexts.back().InCSharpAttributeSpecifier;
516
517 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
518 bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
519 bool StartsObjCMethodExpr =
520 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
521 Style.isCpp() && !IsCpp11AttributeSpecifier &&
522 !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
523 Left->isNot(TT_LambdaLSquare) &&
524 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
525 (!Parent ||
526 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
527 tok::kw_return, tok::kw_throw) ||
528 Parent->isUnaryOperator() ||
529 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
530 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
531 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
532 prec::Unknown));
533 bool ColonFound = false;
534
535 unsigned BindingIncrease = 1;
536 if (IsCppStructuredBinding) {
537 Left->setType(TT_StructuredBindingLSquare);
538 } else if (Left->is(TT_Unknown)) {
539 if (StartsObjCMethodExpr) {
540 Left->setType(TT_ObjCMethodExpr);
541 } else if (InsideInlineASM) {
542 Left->setType(TT_InlineASMSymbolicNameLSquare);
543 } else if (IsCpp11AttributeSpecifier) {
544 Left->setType(TT_AttributeSquare);
545 } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
546 Contexts.back().ContextKind == tok::l_brace &&
547 Parent->isOneOf(tok::l_brace, tok::comma)) {
548 Left->setType(TT_JsComputedPropertyName);
549 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
550 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
551 Left->setType(TT_DesignatedInitializerLSquare);
552 } else if (IsCSharpAttributeSpecifier) {
553 Left->setType(TT_AttributeSquare);
554 } else if (CurrentToken->is(tok::r_square) && Parent &&
555 Parent->is(TT_TemplateCloser)) {
556 Left->setType(TT_ArraySubscriptLSquare);
557 } else if (Style.Language == FormatStyle::LK_Proto ||
558 Style.Language == FormatStyle::LK_TextProto) {
559 // Square braces in LK_Proto can either be message field attributes:
560 //
561 // optional Aaa aaa = 1 [
562 // (aaa) = aaa
563 // ];
564 //
565 // extensions 123 [
566 // (aaa) = aaa
567 // ];
568 //
569 // or text proto extensions (in options):
570 //
571 // option (Aaa.options) = {
572 // [type.type/type] {
573 // key: value
574 // }
575 // }
576 //
577 // or repeated fields (in options):
578 //
579 // option (Aaa.options) = {
580 // keys: [ 1, 2, 3 ]
581 // }
582 //
583 // In the first and the third case we want to spread the contents inside
584 // the square braces; in the second we want to keep them inline.
585 Left->setType(TT_ArrayInitializerLSquare);
586 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
587 tok::equal) &&
588 !Left->endsSequence(tok::l_square, tok::numeric_constant,
589 tok::identifier) &&
590 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
591 Left->setType(TT_ProtoExtensionLSquare);
592 BindingIncrease = 10;
593 }
594 } else if (!CppArrayTemplates && Parent &&
595 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
596 tok::comma, tok::l_paren, tok::l_square,
597 tok::question, tok::colon, tok::kw_return,
598 // Should only be relevant to JavaScript:
599 tok::kw_default)) {
600 Left->setType(TT_ArrayInitializerLSquare);
601 } else {
602 BindingIncrease = 10;
603 Left->setType(TT_ArraySubscriptLSquare);
604 }
605 }
606
607 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
608 Contexts.back().IsExpression = true;
609 if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
610 Parent->is(TT_JsTypeColon))
611 Contexts.back().IsExpression = false;
612
613 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
614 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
615 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
616
617 while (CurrentToken) {
618 if (CurrentToken->is(tok::r_square)) {
619 if (IsCpp11AttributeSpecifier)
620 CurrentToken->setType(TT_AttributeSquare);
621 if (IsCSharpAttributeSpecifier)
622 CurrentToken->setType(TT_AttributeSquare);
623 else if (((CurrentToken->Next &&
624 CurrentToken->Next->is(tok::l_paren)) ||
625 (CurrentToken->Previous &&
626 CurrentToken->Previous->Previous == Left)) &&
627 Left->is(TT_ObjCMethodExpr)) {
628 // An ObjC method call is rarely followed by an open parenthesis. It
629 // also can't be composed of just one token, unless it's a macro that
630 // will be expanded to more tokens.
631 // FIXME: Do we incorrectly label ":" with this?
632 StartsObjCMethodExpr = false;
633 Left->setType(TT_Unknown);
634 }
635 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
636 CurrentToken->setType(TT_ObjCMethodExpr);
637 // If we haven't seen a colon yet, make sure the last identifier
638 // before the r_square is tagged as a selector name component.
639 if (!ColonFound && CurrentToken->Previous &&
640 CurrentToken->Previous->is(TT_Unknown) &&
641 canBeObjCSelectorComponent(*CurrentToken->Previous))
642 CurrentToken->Previous->setType(TT_SelectorName);
643 // determineStarAmpUsage() thinks that '*' '[' is allocating an
644 // array of pointers, but if '[' starts a selector then '*' is a
645 // binary operator.
646 if (Parent && Parent->is(TT_PointerOrReference))
647 Parent->setType(TT_BinaryOperator);
648 }
649 // An arrow after an ObjC method expression is not a lambda arrow.
650 if (CurrentToken->getType() == TT_ObjCMethodExpr &&
651 CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow))
652 CurrentToken->Next->setType(TT_Unknown);
653 Left->MatchingParen = CurrentToken;
654 CurrentToken->MatchingParen = Left;
655 // FirstObjCSelectorName is set when a colon is found. This does
656 // not work, however, when the method has no parameters.
657 // Here, we set FirstObjCSelectorName when the end of the method call is
658 // reached, in case it was not set already.
659 if (!Contexts.back().FirstObjCSelectorName) {
660 FormatToken *Previous = CurrentToken->getPreviousNonComment();
661 if (Previous && Previous->is(TT_SelectorName)) {
662 Previous->ObjCSelectorNameParts = 1;
663 Contexts.back().FirstObjCSelectorName = Previous;
664 }
665 } else {
666 Left->ParameterCount =
667 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
668 }
669 if (Contexts.back().FirstObjCSelectorName) {
670 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
671 Contexts.back().LongestObjCSelectorName;
672 if (Left->BlockParameterCount > 1)
673 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
674 }
675 next();
676 return true;
677 }
678 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
679 return false;
680 if (CurrentToken->is(tok::colon)) {
681 if (IsCpp11AttributeSpecifier &&
682 CurrentToken->endsSequence(tok::colon, tok::identifier,
683 tok::kw_using)) {
684 // Remember that this is a [[using ns: foo]] C++ attribute, so we
685 // don't add a space before the colon (unlike other colons).
686 CurrentToken->setType(TT_AttributeColon);
687 } else if (Left->isOneOf(TT_ArraySubscriptLSquare,
688 TT_DesignatedInitializerLSquare)) {
689 Left->setType(TT_ObjCMethodExpr);
690 StartsObjCMethodExpr = true;
691 Contexts.back().ColonIsObjCMethodExpr = true;
692 if (Parent && Parent->is(tok::r_paren))
693 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
694 Parent->setType(TT_CastRParen);
695 }
696 ColonFound = true;
697 }
698 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
699 !ColonFound)
700 Left->setType(TT_ArrayInitializerLSquare);
701 FormatToken *Tok = CurrentToken;
702 if (!consumeToken())
703 return false;
704 updateParameterCount(Left, Tok);
705 }
706 return false;
707 }
708
parseBrace()709 bool parseBrace() {
710 if (CurrentToken) {
711 FormatToken *Left = CurrentToken->Previous;
712 Left->ParentBracket = Contexts.back().ContextKind;
713
714 if (Contexts.back().CaretFound)
715 Left->setType(TT_ObjCBlockLBrace);
716 Contexts.back().CaretFound = false;
717
718 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
719 Contexts.back().ColonIsDictLiteral = true;
720 if (Left->BlockKind == BK_BracedInit)
721 Contexts.back().IsExpression = true;
722 if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
723 Left->Previous->is(TT_JsTypeColon))
724 Contexts.back().IsExpression = false;
725
726 while (CurrentToken) {
727 if (CurrentToken->is(tok::r_brace)) {
728 Left->MatchingParen = CurrentToken;
729 CurrentToken->MatchingParen = Left;
730 next();
731 return true;
732 }
733 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
734 return false;
735 updateParameterCount(Left, CurrentToken);
736 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
737 FormatToken *Previous = CurrentToken->getPreviousNonComment();
738 if (Previous->is(TT_JsTypeOptionalQuestion))
739 Previous = Previous->getPreviousNonComment();
740 if ((CurrentToken->is(tok::colon) &&
741 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
742 Style.Language == FormatStyle::LK_Proto ||
743 Style.Language == FormatStyle::LK_TextProto) {
744 Left->setType(TT_DictLiteral);
745 if (Previous->Tok.getIdentifierInfo() ||
746 Previous->is(tok::string_literal))
747 Previous->setType(TT_SelectorName);
748 }
749 if (CurrentToken->is(tok::colon) ||
750 Style.Language == FormatStyle::LK_JavaScript)
751 Left->setType(TT_DictLiteral);
752 }
753 if (CurrentToken->is(tok::comma) &&
754 Style.Language == FormatStyle::LK_JavaScript)
755 Left->setType(TT_DictLiteral);
756 if (!consumeToken())
757 return false;
758 }
759 }
760 return true;
761 }
762
updateParameterCount(FormatToken * Left,FormatToken * Current)763 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
764 // For ObjC methods, the number of parameters is calculated differently as
765 // method declarations have a different structure (the parameters are not
766 // inside a bracket scope).
767 if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
768 ++Left->BlockParameterCount;
769 if (Current->is(tok::comma)) {
770 ++Left->ParameterCount;
771 if (!Left->Role)
772 Left->Role.reset(new CommaSeparatedList(Style));
773 Left->Role->CommaFound(Current);
774 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
775 Left->ParameterCount = 1;
776 }
777 }
778
parseConditional()779 bool parseConditional() {
780 while (CurrentToken) {
781 if (CurrentToken->is(tok::colon)) {
782 CurrentToken->setType(TT_ConditionalExpr);
783 next();
784 return true;
785 }
786 if (!consumeToken())
787 return false;
788 }
789 return false;
790 }
791
parseTemplateDeclaration()792 bool parseTemplateDeclaration() {
793 if (CurrentToken && CurrentToken->is(tok::less)) {
794 CurrentToken->setType(TT_TemplateOpener);
795 next();
796 if (!parseAngle())
797 return false;
798 if (CurrentToken)
799 CurrentToken->Previous->ClosesTemplateDeclaration = true;
800 return true;
801 }
802 return false;
803 }
804
consumeToken()805 bool consumeToken() {
806 FormatToken *Tok = CurrentToken;
807 next();
808 switch (Tok->Tok.getKind()) {
809 case tok::plus:
810 case tok::minus:
811 if (!Tok->Previous && Line.MustBeDeclaration)
812 Tok->setType(TT_ObjCMethodSpecifier);
813 break;
814 case tok::colon:
815 if (!Tok->Previous)
816 return false;
817 // Colons from ?: are handled in parseConditional().
818 if (Style.Language == FormatStyle::LK_JavaScript) {
819 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
820 (Contexts.size() == 1 && // switch/case labels
821 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
822 Contexts.back().ContextKind == tok::l_paren || // function params
823 Contexts.back().ContextKind == tok::l_square || // array type
824 (!Contexts.back().IsExpression &&
825 Contexts.back().ContextKind == tok::l_brace) || // object type
826 (Contexts.size() == 1 &&
827 Line.MustBeDeclaration)) { // method/property declaration
828 Contexts.back().IsExpression = false;
829 Tok->setType(TT_JsTypeColon);
830 break;
831 }
832 } else if (Style.isCSharp()) {
833 if (Contexts.back().InCSharpAttributeSpecifier) {
834 Tok->setType(TT_AttributeColon);
835 break;
836 }
837 if (Contexts.back().ContextKind == tok::l_paren) {
838 Tok->setType(TT_CSharpNamedArgumentColon);
839 break;
840 }
841 }
842 if (Contexts.back().ColonIsDictLiteral ||
843 Style.Language == FormatStyle::LK_Proto ||
844 Style.Language == FormatStyle::LK_TextProto) {
845 Tok->setType(TT_DictLiteral);
846 if (Style.Language == FormatStyle::LK_TextProto) {
847 if (FormatToken *Previous = Tok->getPreviousNonComment())
848 Previous->setType(TT_SelectorName);
849 }
850 } else if (Contexts.back().ColonIsObjCMethodExpr ||
851 Line.startsWith(TT_ObjCMethodSpecifier)) {
852 Tok->setType(TT_ObjCMethodExpr);
853 const FormatToken *BeforePrevious = Tok->Previous->Previous;
854 // Ensure we tag all identifiers in method declarations as
855 // TT_SelectorName.
856 bool UnknownIdentifierInMethodDeclaration =
857 Line.startsWith(TT_ObjCMethodSpecifier) &&
858 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
859 if (!BeforePrevious ||
860 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
861 !(BeforePrevious->is(TT_CastRParen) ||
862 (BeforePrevious->is(TT_ObjCMethodExpr) &&
863 BeforePrevious->is(tok::colon))) ||
864 BeforePrevious->is(tok::r_square) ||
865 Contexts.back().LongestObjCSelectorName == 0 ||
866 UnknownIdentifierInMethodDeclaration) {
867 Tok->Previous->setType(TT_SelectorName);
868 if (!Contexts.back().FirstObjCSelectorName)
869 Contexts.back().FirstObjCSelectorName = Tok->Previous;
870 else if (Tok->Previous->ColumnWidth >
871 Contexts.back().LongestObjCSelectorName)
872 Contexts.back().LongestObjCSelectorName =
873 Tok->Previous->ColumnWidth;
874 Tok->Previous->ParameterIndex =
875 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
876 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
877 }
878 } else if (Contexts.back().ColonIsForRangeExpr) {
879 Tok->setType(TT_RangeBasedForLoopColon);
880 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
881 Tok->setType(TT_BitFieldColon);
882 } else if (Contexts.size() == 1 &&
883 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
884 FormatToken *Prev = Tok->getPreviousNonComment();
885 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept))
886 Tok->setType(TT_CtorInitializerColon);
887 else if (Prev->is(tok::kw_try)) {
888 // Member initializer list within function try block.
889 FormatToken *PrevPrev = Prev->getPreviousNonComment();
890 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
891 Tok->setType(TT_CtorInitializerColon);
892 } else
893 Tok->setType(TT_InheritanceColon);
894 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
895 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
896 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
897 Tok->Next->Next->is(tok::colon)))) {
898 // This handles a special macro in ObjC code where selectors including
899 // the colon are passed as macro arguments.
900 Tok->setType(TT_ObjCMethodExpr);
901 } else if (Contexts.back().ContextKind == tok::l_paren) {
902 Tok->setType(TT_InlineASMColon);
903 }
904 break;
905 case tok::pipe:
906 case tok::amp:
907 // | and & in declarations/type expressions represent union and
908 // intersection types, respectively.
909 if (Style.Language == FormatStyle::LK_JavaScript &&
910 !Contexts.back().IsExpression)
911 Tok->setType(TT_JsTypeOperator);
912 break;
913 case tok::kw_if:
914 case tok::kw_while:
915 if (Tok->is(tok::kw_if) && CurrentToken &&
916 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier))
917 next();
918 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
919 next();
920 if (!parseParens(/*LookForDecls=*/true))
921 return false;
922 }
923 break;
924 case tok::kw_for:
925 if (Style.Language == FormatStyle::LK_JavaScript) {
926 // x.for and {for: ...}
927 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
928 (Tok->Next && Tok->Next->is(tok::colon)))
929 break;
930 // JS' for await ( ...
931 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
932 next();
933 }
934 Contexts.back().ColonIsForRangeExpr = true;
935 next();
936 if (!parseParens())
937 return false;
938 break;
939 case tok::l_paren:
940 // When faced with 'operator()()', the kw_operator handler incorrectly
941 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
942 // the first two parens OverloadedOperators and the second l_paren an
943 // OverloadedOperatorLParen.
944 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
945 Tok->Previous->MatchingParen &&
946 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
947 Tok->Previous->setType(TT_OverloadedOperator);
948 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
949 Tok->setType(TT_OverloadedOperatorLParen);
950 }
951
952 if (!parseParens())
953 return false;
954 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
955 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
956 (!Tok->Previous ||
957 !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
958 TT_LeadingJavaAnnotation)))
959 Line.MightBeFunctionDecl = true;
960 break;
961 case tok::l_square:
962 if (!parseSquare())
963 return false;
964 break;
965 case tok::l_brace:
966 if (Style.Language == FormatStyle::LK_TextProto) {
967 FormatToken *Previous = Tok->getPreviousNonComment();
968 if (Previous && Previous->getType() != TT_DictLiteral)
969 Previous->setType(TT_SelectorName);
970 }
971 if (!parseBrace())
972 return false;
973 break;
974 case tok::less:
975 if (parseAngle()) {
976 Tok->setType(TT_TemplateOpener);
977 // In TT_Proto, we must distignuish between:
978 // map<key, value>
979 // msg < item: data >
980 // msg: < item: data >
981 // In TT_TextProto, map<key, value> does not occur.
982 if (Style.Language == FormatStyle::LK_TextProto ||
983 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
984 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
985 Tok->setType(TT_DictLiteral);
986 FormatToken *Previous = Tok->getPreviousNonComment();
987 if (Previous && Previous->getType() != TT_DictLiteral)
988 Previous->setType(TT_SelectorName);
989 }
990 } else {
991 Tok->setType(TT_BinaryOperator);
992 NonTemplateLess.insert(Tok);
993 CurrentToken = Tok;
994 next();
995 }
996 break;
997 case tok::r_paren:
998 case tok::r_square:
999 return false;
1000 case tok::r_brace:
1001 // Lines can start with '}'.
1002 if (Tok->Previous)
1003 return false;
1004 break;
1005 case tok::greater:
1006 if (Style.Language != FormatStyle::LK_TextProto)
1007 Tok->setType(TT_BinaryOperator);
1008 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1009 Tok->SpacesRequiredBefore = 1;
1010 break;
1011 case tok::kw_operator:
1012 if (Style.Language == FormatStyle::LK_TextProto ||
1013 Style.Language == FormatStyle::LK_Proto)
1014 break;
1015 while (CurrentToken &&
1016 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1017 if (CurrentToken->isOneOf(tok::star, tok::amp))
1018 CurrentToken->setType(TT_PointerOrReference);
1019 consumeToken();
1020 if (CurrentToken && CurrentToken->is(tok::comma) &&
1021 CurrentToken->Previous->isNot(tok::kw_operator))
1022 break;
1023 if (CurrentToken && CurrentToken->Previous->isOneOf(
1024 TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1025 tok::star, tok::arrow, tok::amp, tok::ampamp))
1026 CurrentToken->Previous->setType(TT_OverloadedOperator);
1027 }
1028 if (CurrentToken && CurrentToken->is(tok::l_paren))
1029 CurrentToken->setType(TT_OverloadedOperatorLParen);
1030 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1031 CurrentToken->Previous->setType(TT_OverloadedOperator);
1032 break;
1033 case tok::question:
1034 if (Tok->is(TT_CSharpNullConditionalLSquare)) {
1035 if (!parseSquare())
1036 return false;
1037 break;
1038 }
1039 if (Tok->isOneOf(TT_CSharpNullConditional, TT_CSharpNullCoalescing))
1040 break;
1041 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
1042 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1043 tok::r_brace)) {
1044 // Question marks before semicolons, colons, etc. indicate optional
1045 // types (fields, parameters), e.g.
1046 // function(x?: string, y?) {...}
1047 // class X { y?; }
1048 Tok->setType(TT_JsTypeOptionalQuestion);
1049 break;
1050 }
1051 // Declarations cannot be conditional expressions, this can only be part
1052 // of a type declaration.
1053 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1054 Style.Language == FormatStyle::LK_JavaScript)
1055 break;
1056 if (Style.isCSharp()) {
1057 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1058 // nullable types.
1059 // Line.MustBeDeclaration will be true for `Type? name;`.
1060 if ((!Contexts.back().IsExpression && Line.MustBeDeclaration) ||
1061 (Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::greater)) ||
1062 (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1063 Tok->Next->Next->is(tok::equal))) {
1064 Tok->setType(TT_CSharpNullable);
1065 break;
1066 }
1067 }
1068 parseConditional();
1069 break;
1070 case tok::kw_template:
1071 parseTemplateDeclaration();
1072 break;
1073 case tok::comma:
1074 if (Contexts.back().InCtorInitializer)
1075 Tok->setType(TT_CtorInitializerComma);
1076 else if (Contexts.back().InInheritanceList)
1077 Tok->setType(TT_InheritanceComma);
1078 else if (Contexts.back().FirstStartOfName &&
1079 (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
1080 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1081 Line.IsMultiVariableDeclStmt = true;
1082 }
1083 if (Contexts.back().IsForEachMacro)
1084 Contexts.back().IsExpression = true;
1085 break;
1086 case tok::identifier:
1087 if (Tok->isOneOf(Keywords.kw___has_include,
1088 Keywords.kw___has_include_next)) {
1089 parseHasInclude();
1090 }
1091 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1092 Tok->Next->isNot(tok::l_paren)) {
1093 Tok->setType(TT_CSharpGenericTypeConstraint);
1094 parseCSharpGenericTypeConstraint();
1095 }
1096 break;
1097 default:
1098 break;
1099 }
1100 return true;
1101 }
1102
parseCSharpGenericTypeConstraint()1103 void parseCSharpGenericTypeConstraint() {
1104 int OpenAngleBracketsCount = 0;
1105 while (CurrentToken) {
1106 if (CurrentToken->is(tok::less)) {
1107 // parseAngle is too greedy and will consume the whole line.
1108 CurrentToken->setType(TT_TemplateOpener);
1109 ++OpenAngleBracketsCount;
1110 next();
1111 } else if (CurrentToken->is(tok::greater)) {
1112 CurrentToken->setType(TT_TemplateCloser);
1113 --OpenAngleBracketsCount;
1114 next();
1115 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1116 // We allow line breaks after GenericTypeConstraintComma's
1117 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1118 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1119 next();
1120 } else if (CurrentToken->is(Keywords.kw_where)) {
1121 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1122 next();
1123 } else if (CurrentToken->is(tok::colon)) {
1124 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1125 next();
1126 } else {
1127 next();
1128 }
1129 }
1130 }
1131
parseIncludeDirective()1132 void parseIncludeDirective() {
1133 if (CurrentToken && CurrentToken->is(tok::less)) {
1134 next();
1135 while (CurrentToken) {
1136 // Mark tokens up to the trailing line comments as implicit string
1137 // literals.
1138 if (CurrentToken->isNot(tok::comment) &&
1139 !CurrentToken->TokenText.startswith("//"))
1140 CurrentToken->setType(TT_ImplicitStringLiteral);
1141 next();
1142 }
1143 }
1144 }
1145
parseWarningOrError()1146 void parseWarningOrError() {
1147 next();
1148 // We still want to format the whitespace left of the first token of the
1149 // warning or error.
1150 next();
1151 while (CurrentToken) {
1152 CurrentToken->setType(TT_ImplicitStringLiteral);
1153 next();
1154 }
1155 }
1156
parsePragma()1157 void parsePragma() {
1158 next(); // Consume "pragma".
1159 if (CurrentToken &&
1160 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
1161 bool IsMark = CurrentToken->is(Keywords.kw_mark);
1162 next(); // Consume "mark".
1163 next(); // Consume first token (so we fix leading whitespace).
1164 while (CurrentToken) {
1165 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
1166 CurrentToken->setType(TT_ImplicitStringLiteral);
1167 next();
1168 }
1169 }
1170 }
1171
parseHasInclude()1172 void parseHasInclude() {
1173 if (!CurrentToken || !CurrentToken->is(tok::l_paren))
1174 return;
1175 next(); // '('
1176 parseIncludeDirective();
1177 next(); // ')'
1178 }
1179
parsePreprocessorDirective()1180 LineType parsePreprocessorDirective() {
1181 bool IsFirstToken = CurrentToken->IsFirst;
1182 LineType Type = LT_PreprocessorDirective;
1183 next();
1184 if (!CurrentToken)
1185 return Type;
1186
1187 if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
1188 // JavaScript files can contain shebang lines of the form:
1189 // #!/usr/bin/env node
1190 // Treat these like C++ #include directives.
1191 while (CurrentToken) {
1192 // Tokens cannot be comments here.
1193 CurrentToken->setType(TT_ImplicitStringLiteral);
1194 next();
1195 }
1196 return LT_ImportStatement;
1197 }
1198
1199 if (CurrentToken->Tok.is(tok::numeric_constant)) {
1200 CurrentToken->SpacesRequiredBefore = 1;
1201 return Type;
1202 }
1203 // Hashes in the middle of a line can lead to any strange token
1204 // sequence.
1205 if (!CurrentToken->Tok.getIdentifierInfo())
1206 return Type;
1207 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1208 case tok::pp_include:
1209 case tok::pp_include_next:
1210 case tok::pp_import:
1211 next();
1212 parseIncludeDirective();
1213 Type = LT_ImportStatement;
1214 break;
1215 case tok::pp_error:
1216 case tok::pp_warning:
1217 parseWarningOrError();
1218 break;
1219 case tok::pp_pragma:
1220 parsePragma();
1221 break;
1222 case tok::pp_if:
1223 case tok::pp_elif:
1224 Contexts.back().IsExpression = true;
1225 next();
1226 parseLine();
1227 break;
1228 default:
1229 break;
1230 }
1231 while (CurrentToken) {
1232 FormatToken *Tok = CurrentToken;
1233 next();
1234 if (Tok->is(tok::l_paren))
1235 parseParens();
1236 else if (Tok->isOneOf(Keywords.kw___has_include,
1237 Keywords.kw___has_include_next))
1238 parseHasInclude();
1239 }
1240 return Type;
1241 }
1242
1243 public:
parseLine()1244 LineType parseLine() {
1245 if (!CurrentToken)
1246 return LT_Invalid;
1247 NonTemplateLess.clear();
1248 if (CurrentToken->is(tok::hash))
1249 return parsePreprocessorDirective();
1250
1251 // Directly allow to 'import <string-literal>' to support protocol buffer
1252 // definitions (github.com/google/protobuf) or missing "#" (either way we
1253 // should not break the line).
1254 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1255 if ((Style.Language == FormatStyle::LK_Java &&
1256 CurrentToken->is(Keywords.kw_package)) ||
1257 (Info && Info->getPPKeywordID() == tok::pp_import &&
1258 CurrentToken->Next &&
1259 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1260 tok::kw_static))) {
1261 next();
1262 parseIncludeDirective();
1263 return LT_ImportStatement;
1264 }
1265
1266 // If this line starts and ends in '<' and '>', respectively, it is likely
1267 // part of "#define <a/b.h>".
1268 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1269 parseIncludeDirective();
1270 return LT_ImportStatement;
1271 }
1272
1273 // In .proto files, top-level options and package statements are very
1274 // similar to import statements and should not be line-wrapped.
1275 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1276 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1277 next();
1278 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1279 while (CurrentToken)
1280 next();
1281 return LT_ImportStatement;
1282 }
1283 }
1284
1285 bool KeywordVirtualFound = false;
1286 bool ImportStatement = false;
1287
1288 // import {...} from '...';
1289 if (Style.Language == FormatStyle::LK_JavaScript &&
1290 CurrentToken->is(Keywords.kw_import))
1291 ImportStatement = true;
1292
1293 while (CurrentToken) {
1294 if (CurrentToken->is(tok::kw_virtual))
1295 KeywordVirtualFound = true;
1296 if (Style.Language == FormatStyle::LK_JavaScript) {
1297 // export {...} from '...';
1298 // An export followed by "from 'some string';" is a re-export from
1299 // another module identified by a URI and is treated as a
1300 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1301 // Just "export {...};" or "export class ..." should not be treated as
1302 // an import in this sense.
1303 if (Line.First->is(tok::kw_export) &&
1304 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1305 CurrentToken->Next->isStringLiteral())
1306 ImportStatement = true;
1307 if (isClosureImportStatement(*CurrentToken))
1308 ImportStatement = true;
1309 }
1310 if (!consumeToken())
1311 return LT_Invalid;
1312 }
1313 if (KeywordVirtualFound)
1314 return LT_VirtualFunctionDecl;
1315 if (ImportStatement)
1316 return LT_ImportStatement;
1317
1318 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1319 if (Contexts.back().FirstObjCSelectorName)
1320 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1321 Contexts.back().LongestObjCSelectorName;
1322 return LT_ObjCMethodDecl;
1323 }
1324
1325 return LT_Other;
1326 }
1327
1328 private:
isClosureImportStatement(const FormatToken & Tok)1329 bool isClosureImportStatement(const FormatToken &Tok) {
1330 // FIXME: Closure-library specific stuff should not be hard-coded but be
1331 // configurable.
1332 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1333 Tok.Next->Next &&
1334 (Tok.Next->Next->TokenText == "module" ||
1335 Tok.Next->Next->TokenText == "provide" ||
1336 Tok.Next->Next->TokenText == "require" ||
1337 Tok.Next->Next->TokenText == "requireType" ||
1338 Tok.Next->Next->TokenText == "forwardDeclare") &&
1339 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1340 }
1341
resetTokenMetadata(FormatToken * Token)1342 void resetTokenMetadata(FormatToken *Token) {
1343 if (!Token)
1344 return;
1345
1346 // Reset token type in case we have already looked at it and then
1347 // recovered from an error (e.g. failure to find the matching >).
1348 if (!CurrentToken->isOneOf(
1349 TT_LambdaLSquare, TT_LambdaLBrace, TT_ForEachMacro,
1350 TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral,
1351 TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro,
1352 TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString,
1353 TT_ObjCStringLiteral, TT_UntouchableMacroFunc))
1354 CurrentToken->setType(TT_Unknown);
1355 CurrentToken->Role.reset();
1356 CurrentToken->MatchingParen = nullptr;
1357 CurrentToken->FakeLParens.clear();
1358 CurrentToken->FakeRParens = 0;
1359 }
1360
next()1361 void next() {
1362 if (CurrentToken) {
1363 CurrentToken->NestingLevel = Contexts.size() - 1;
1364 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1365 modifyContext(*CurrentToken);
1366 determineTokenType(*CurrentToken);
1367 CurrentToken = CurrentToken->Next;
1368 }
1369
1370 resetTokenMetadata(CurrentToken);
1371 }
1372
1373 /// A struct to hold information valid in a specific context, e.g.
1374 /// a pair of parenthesis.
1375 struct Context {
Contextclang::format::__anon51f3827b0111::AnnotatingParser::Context1376 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1377 bool IsExpression)
1378 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1379 IsExpression(IsExpression) {}
1380
1381 tok::TokenKind ContextKind;
1382 unsigned BindingStrength;
1383 bool IsExpression;
1384 unsigned LongestObjCSelectorName = 0;
1385 bool ColonIsForRangeExpr = false;
1386 bool ColonIsDictLiteral = false;
1387 bool ColonIsObjCMethodExpr = false;
1388 FormatToken *FirstObjCSelectorName = nullptr;
1389 FormatToken *FirstStartOfName = nullptr;
1390 bool CanBeExpression = true;
1391 bool InTemplateArgument = false;
1392 bool InCtorInitializer = false;
1393 bool InInheritanceList = false;
1394 bool CaretFound = false;
1395 bool IsForEachMacro = false;
1396 bool InCpp11AttributeSpecifier = false;
1397 bool InCSharpAttributeSpecifier = false;
1398 };
1399
1400 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1401 /// of each instance.
1402 struct ScopedContextCreator {
1403 AnnotatingParser &P;
1404
ScopedContextCreatorclang::format::__anon51f3827b0111::AnnotatingParser::ScopedContextCreator1405 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1406 unsigned Increase)
1407 : P(P) {
1408 P.Contexts.push_back(Context(ContextKind,
1409 P.Contexts.back().BindingStrength + Increase,
1410 P.Contexts.back().IsExpression));
1411 }
1412
~ScopedContextCreatorclang::format::__anon51f3827b0111::AnnotatingParser::ScopedContextCreator1413 ~ScopedContextCreator() { P.Contexts.pop_back(); }
1414 };
1415
modifyContext(const FormatToken & Current)1416 void modifyContext(const FormatToken &Current) {
1417 if (Current.getPrecedence() == prec::Assignment &&
1418 !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1419 // Type aliases use `type X = ...;` in TypeScript and can be exported
1420 // using `export type ...`.
1421 !(Style.Language == FormatStyle::LK_JavaScript &&
1422 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1423 Line.startsWith(tok::kw_export, Keywords.kw_type,
1424 tok::identifier))) &&
1425 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1426 Contexts.back().IsExpression = true;
1427 if (!Line.startsWith(TT_UnaryOperator)) {
1428 for (FormatToken *Previous = Current.Previous;
1429 Previous && Previous->Previous &&
1430 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1431 Previous = Previous->Previous) {
1432 if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1433 Previous = Previous->MatchingParen;
1434 if (!Previous)
1435 break;
1436 }
1437 if (Previous->opensScope())
1438 break;
1439 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1440 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1441 Previous->Previous && Previous->Previous->isNot(tok::equal))
1442 Previous->setType(TT_PointerOrReference);
1443 }
1444 }
1445 } else if (Current.is(tok::lessless) &&
1446 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1447 Contexts.back().IsExpression = true;
1448 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1449 Contexts.back().IsExpression = true;
1450 } else if (Current.is(TT_TrailingReturnArrow)) {
1451 Contexts.back().IsExpression = false;
1452 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1453 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1454 } else if (Current.Previous &&
1455 Current.Previous->is(TT_CtorInitializerColon)) {
1456 Contexts.back().IsExpression = true;
1457 Contexts.back().InCtorInitializer = true;
1458 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1459 Contexts.back().InInheritanceList = true;
1460 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1461 for (FormatToken *Previous = Current.Previous;
1462 Previous && Previous->isOneOf(tok::star, tok::amp);
1463 Previous = Previous->Previous)
1464 Previous->setType(TT_PointerOrReference);
1465 if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1466 Contexts.back().IsExpression = false;
1467 } else if (Current.is(tok::kw_new)) {
1468 Contexts.back().CanBeExpression = false;
1469 } else if (Current.is(tok::semi) ||
1470 (Current.is(tok::exclaim) && Current.Previous &&
1471 !Current.Previous->is(tok::kw_operator))) {
1472 // This should be the condition or increment in a for-loop.
1473 // But not operator !() (can't use TT_OverloadedOperator here as its not
1474 // been annotated yet).
1475 Contexts.back().IsExpression = true;
1476 }
1477 }
1478
untilMatchingParen(FormatToken * Current)1479 static FormatToken *untilMatchingParen(FormatToken *Current) {
1480 // Used when `MatchingParen` is not yet established.
1481 int ParenLevel = 0;
1482 while (Current) {
1483 if (Current->is(tok::l_paren))
1484 ParenLevel++;
1485 if (Current->is(tok::r_paren))
1486 ParenLevel--;
1487 if (ParenLevel < 1)
1488 break;
1489 Current = Current->Next;
1490 }
1491 return Current;
1492 }
1493
isDeductionGuide(FormatToken & Current)1494 static bool isDeductionGuide(FormatToken &Current) {
1495 // Look for a deduction guide template<T> A(...) -> A<...>;
1496 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1497 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1498 // Find the TemplateCloser.
1499 FormatToken *TemplateCloser = Current.Next->Next;
1500 int NestingLevel = 0;
1501 while (TemplateCloser) {
1502 // Skip over an expressions in parens A<(3 < 2)>;
1503 if (TemplateCloser->is(tok::l_paren)) {
1504 // No Matching Paren yet so skip to matching paren
1505 TemplateCloser = untilMatchingParen(TemplateCloser);
1506 }
1507 if (TemplateCloser->is(tok::less))
1508 NestingLevel++;
1509 if (TemplateCloser->is(tok::greater))
1510 NestingLevel--;
1511 if (NestingLevel < 1)
1512 break;
1513 TemplateCloser = TemplateCloser->Next;
1514 }
1515 // Assuming we have found the end of the template ensure its followed
1516 // with a semi-colon.
1517 if (TemplateCloser && TemplateCloser->Next &&
1518 TemplateCloser->Next->is(tok::semi) &&
1519 Current.Previous->MatchingParen) {
1520 // Determine if the identifier `A` prior to the A<..>; is the same as
1521 // prior to the A(..)
1522 FormatToken *LeadingIdentifier =
1523 Current.Previous->MatchingParen->Previous;
1524
1525 // Differentiate a deduction guide by seeing the
1526 // > of the template prior to the leading identifier.
1527 if (LeadingIdentifier) {
1528 FormatToken *PriorLeadingIdentifier = LeadingIdentifier->Previous;
1529 // Skip back past explicit decoration
1530 if (PriorLeadingIdentifier &&
1531 PriorLeadingIdentifier->is(tok::kw_explicit))
1532 PriorLeadingIdentifier = PriorLeadingIdentifier->Previous;
1533
1534 return (PriorLeadingIdentifier &&
1535 PriorLeadingIdentifier->is(TT_TemplateCloser) &&
1536 LeadingIdentifier->TokenText == Current.Next->TokenText);
1537 }
1538 }
1539 }
1540 return false;
1541 }
1542
determineTokenType(FormatToken & Current)1543 void determineTokenType(FormatToken &Current) {
1544 if (!Current.is(TT_Unknown))
1545 // The token type is already known.
1546 return;
1547
1548 if (Style.isCSharp() && CurrentToken->is(tok::question)) {
1549 if (CurrentToken->TokenText == "??") {
1550 Current.setType(TT_CSharpNullCoalescing);
1551 return;
1552 }
1553 if (CurrentToken->TokenText == "?.") {
1554 Current.setType(TT_CSharpNullConditional);
1555 return;
1556 }
1557 if (CurrentToken->TokenText == "?[") {
1558 Current.setType(TT_CSharpNullConditionalLSquare);
1559 return;
1560 }
1561 }
1562
1563 if (Style.Language == FormatStyle::LK_JavaScript) {
1564 if (Current.is(tok::exclaim)) {
1565 if (Current.Previous &&
1566 (Keywords.IsJavaScriptIdentifier(
1567 *Current.Previous, /* AcceptIdentifierName= */ true) ||
1568 Current.Previous->isOneOf(
1569 tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace,
1570 Keywords.kw_type, Keywords.kw_get, Keywords.kw_set) ||
1571 Current.Previous->Tok.isLiteral())) {
1572 Current.setType(TT_JsNonNullAssertion);
1573 return;
1574 }
1575 if (Current.Next &&
1576 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1577 Current.setType(TT_JsNonNullAssertion);
1578 return;
1579 }
1580 }
1581 }
1582
1583 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1584 // function declaration have been found. In this case, 'Current' is a
1585 // trailing token of this declaration and thus cannot be a name.
1586 if (Current.is(Keywords.kw_instanceof)) {
1587 Current.setType(TT_BinaryOperator);
1588 } else if (isStartOfName(Current) &&
1589 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1590 Contexts.back().FirstStartOfName = &Current;
1591 Current.setType(TT_StartOfName);
1592 } else if (Current.is(tok::semi)) {
1593 // Reset FirstStartOfName after finding a semicolon so that a for loop
1594 // with multiple increment statements is not confused with a for loop
1595 // having multiple variable declarations.
1596 Contexts.back().FirstStartOfName = nullptr;
1597 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1598 AutoFound = true;
1599 } else if (Current.is(tok::arrow) &&
1600 Style.Language == FormatStyle::LK_Java) {
1601 Current.setType(TT_LambdaArrow);
1602 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1603 Current.NestingLevel == 0 &&
1604 !Current.Previous->is(tok::kw_operator)) {
1605 // not auto operator->() -> xxx;
1606 Current.setType(TT_TrailingReturnArrow);
1607
1608 } else if (isDeductionGuide(Current)) {
1609 // Deduction guides trailing arrow " A(...) -> A<T>;".
1610 Current.setType(TT_TrailingReturnArrow);
1611 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1612 Current.setType(determineStarAmpUsage(
1613 Current,
1614 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
1615 Contexts.back().InTemplateArgument));
1616 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1617 Current.setType(determinePlusMinusCaretUsage(Current));
1618 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1619 Contexts.back().CaretFound = true;
1620 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1621 Current.setType(determineIncrementUsage(Current));
1622 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1623 Current.setType(TT_UnaryOperator);
1624 } else if (Current.is(tok::question)) {
1625 if (Style.Language == FormatStyle::LK_JavaScript &&
1626 Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1627 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1628 // on the interface, not a ternary expression.
1629 Current.setType(TT_JsTypeOptionalQuestion);
1630 } else {
1631 Current.setType(TT_ConditionalExpr);
1632 }
1633 } else if (Current.isBinaryOperator() &&
1634 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1635 (!Current.is(tok::greater) &&
1636 Style.Language != FormatStyle::LK_TextProto)) {
1637 Current.setType(TT_BinaryOperator);
1638 } else if (Current.is(tok::comment)) {
1639 if (Current.TokenText.startswith("/*")) {
1640 if (Current.TokenText.endswith("*/"))
1641 Current.setType(TT_BlockComment);
1642 else
1643 // The lexer has for some reason determined a comment here. But we
1644 // cannot really handle it, if it isn't properly terminated.
1645 Current.Tok.setKind(tok::unknown);
1646 } else {
1647 Current.setType(TT_LineComment);
1648 }
1649 } else if (Current.is(tok::r_paren)) {
1650 if (rParenEndsCast(Current))
1651 Current.setType(TT_CastRParen);
1652 if (Current.MatchingParen && Current.Next &&
1653 !Current.Next->isBinaryOperator() &&
1654 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1655 tok::comma, tok::period, tok::arrow,
1656 tok::coloncolon))
1657 if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1658 // Make sure this isn't the return type of an Obj-C block declaration
1659 if (AfterParen->Tok.isNot(tok::caret)) {
1660 if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1661 if (BeforeParen->is(tok::identifier) &&
1662 !BeforeParen->is(TT_TypenameMacro) &&
1663 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1664 (!BeforeParen->Previous ||
1665 BeforeParen->Previous->ClosesTemplateDeclaration))
1666 Current.setType(TT_FunctionAnnotationRParen);
1667 }
1668 }
1669 } else if (Current.is(tok::at) && Current.Next &&
1670 Style.Language != FormatStyle::LK_JavaScript &&
1671 Style.Language != FormatStyle::LK_Java) {
1672 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1673 // marks declarations and properties that need special formatting.
1674 switch (Current.Next->Tok.getObjCKeywordID()) {
1675 case tok::objc_interface:
1676 case tok::objc_implementation:
1677 case tok::objc_protocol:
1678 Current.setType(TT_ObjCDecl);
1679 break;
1680 case tok::objc_property:
1681 Current.setType(TT_ObjCProperty);
1682 break;
1683 default:
1684 break;
1685 }
1686 } else if (Current.is(tok::period)) {
1687 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1688 if (PreviousNoComment &&
1689 PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1690 Current.setType(TT_DesignatedInitializerPeriod);
1691 else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1692 Current.Previous->isOneOf(TT_JavaAnnotation,
1693 TT_LeadingJavaAnnotation)) {
1694 Current.setType(Current.Previous->getType());
1695 }
1696 } else if (canBeObjCSelectorComponent(Current) &&
1697 // FIXME(bug 36976): ObjC return types shouldn't use
1698 // TT_CastRParen.
1699 Current.Previous && Current.Previous->is(TT_CastRParen) &&
1700 Current.Previous->MatchingParen &&
1701 Current.Previous->MatchingParen->Previous &&
1702 Current.Previous->MatchingParen->Previous->is(
1703 TT_ObjCMethodSpecifier)) {
1704 // This is the first part of an Objective-C selector name. (If there's no
1705 // colon after this, this is the only place which annotates the identifier
1706 // as a selector.)
1707 Current.setType(TT_SelectorName);
1708 } else if (Current.isOneOf(tok::identifier, tok::kw_const,
1709 tok::kw_noexcept) &&
1710 Current.Previous &&
1711 !Current.Previous->isOneOf(tok::equal, tok::at) &&
1712 Line.MightBeFunctionDecl && Contexts.size() == 1) {
1713 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1714 // function declaration have been found.
1715 Current.setType(TT_TrailingAnnotation);
1716 } else if ((Style.Language == FormatStyle::LK_Java ||
1717 Style.Language == FormatStyle::LK_JavaScript) &&
1718 Current.Previous) {
1719 if (Current.Previous->is(tok::at) &&
1720 Current.isNot(Keywords.kw_interface)) {
1721 const FormatToken &AtToken = *Current.Previous;
1722 const FormatToken *Previous = AtToken.getPreviousNonComment();
1723 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1724 Current.setType(TT_LeadingJavaAnnotation);
1725 else
1726 Current.setType(TT_JavaAnnotation);
1727 } else if (Current.Previous->is(tok::period) &&
1728 Current.Previous->isOneOf(TT_JavaAnnotation,
1729 TT_LeadingJavaAnnotation)) {
1730 Current.setType(Current.Previous->getType());
1731 }
1732 }
1733 }
1734
1735 /// Take a guess at whether \p Tok starts a name of a function or
1736 /// variable declaration.
1737 ///
1738 /// This is a heuristic based on whether \p Tok is an identifier following
1739 /// something that is likely a type.
isStartOfName(const FormatToken & Tok)1740 bool isStartOfName(const FormatToken &Tok) {
1741 if (Tok.isNot(tok::identifier) || !Tok.Previous)
1742 return false;
1743
1744 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1745 Keywords.kw_as))
1746 return false;
1747 if (Style.Language == FormatStyle::LK_JavaScript &&
1748 Tok.Previous->is(Keywords.kw_in))
1749 return false;
1750
1751 // Skip "const" as it does not have an influence on whether this is a name.
1752 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1753 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1754 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1755
1756 if (!PreviousNotConst)
1757 return false;
1758
1759 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1760 PreviousNotConst->Previous &&
1761 PreviousNotConst->Previous->is(tok::hash);
1762
1763 if (PreviousNotConst->is(TT_TemplateCloser))
1764 return PreviousNotConst && PreviousNotConst->MatchingParen &&
1765 PreviousNotConst->MatchingParen->Previous &&
1766 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1767 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1768
1769 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1770 PreviousNotConst->MatchingParen->Previous &&
1771 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1772 return true;
1773
1774 return (!IsPPKeyword &&
1775 PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1776 PreviousNotConst->is(TT_PointerOrReference) ||
1777 PreviousNotConst->isSimpleTypeSpecifier();
1778 }
1779
1780 /// Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)1781 bool rParenEndsCast(const FormatToken &Tok) {
1782 // C-style casts are only used in C++, C# and Java.
1783 if (!Style.isCSharp() && !Style.isCpp() &&
1784 Style.Language != FormatStyle::LK_Java)
1785 return false;
1786
1787 // Empty parens aren't casts and there are no casts at the end of the line.
1788 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1789 return false;
1790
1791 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1792 if (LeftOfParens) {
1793 // If there is a closing parenthesis left of the current parentheses,
1794 // look past it as these might be chained casts.
1795 if (LeftOfParens->is(tok::r_paren)) {
1796 if (!LeftOfParens->MatchingParen ||
1797 !LeftOfParens->MatchingParen->Previous)
1798 return false;
1799 LeftOfParens = LeftOfParens->MatchingParen->Previous;
1800 }
1801
1802 // If there is an identifier (or with a few exceptions a keyword) right
1803 // before the parentheses, this is unlikely to be a cast.
1804 if (LeftOfParens->Tok.getIdentifierInfo() &&
1805 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1806 tok::kw_delete))
1807 return false;
1808
1809 // Certain other tokens right before the parentheses are also signals that
1810 // this cannot be a cast.
1811 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1812 TT_TemplateCloser, tok::ellipsis))
1813 return false;
1814 }
1815
1816 if (Tok.Next->is(tok::question))
1817 return false;
1818
1819 // `foreach((A a, B b) in someList)` should not be seen as a cast.
1820 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
1821 return false;
1822
1823 // Functions which end with decorations like volatile, noexcept are unlikely
1824 // to be casts.
1825 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
1826 tok::kw_throw, tok::arrow, Keywords.kw_override,
1827 Keywords.kw_final) ||
1828 isCpp11AttributeSpecifier(*Tok.Next))
1829 return false;
1830
1831 // As Java has no function types, a "(" after the ")" likely means that this
1832 // is a cast.
1833 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1834 return true;
1835
1836 // If a (non-string) literal follows, this is likely a cast.
1837 if (Tok.Next->isNot(tok::string_literal) &&
1838 (Tok.Next->Tok.isLiteral() ||
1839 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1840 return true;
1841
1842 // Heuristically try to determine whether the parentheses contain a type.
1843 bool ParensAreType =
1844 !Tok.Previous ||
1845 Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1846 Tok.Previous->isSimpleTypeSpecifier();
1847 bool ParensCouldEndDecl =
1848 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1849 if (ParensAreType && !ParensCouldEndDecl)
1850 return true;
1851
1852 // At this point, we heuristically assume that there are no casts at the
1853 // start of the line. We assume that we have found most cases where there
1854 // are by the logic above, e.g. "(void)x;".
1855 if (!LeftOfParens)
1856 return false;
1857
1858 // Certain token types inside the parentheses mean that this can't be a
1859 // cast.
1860 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1861 Token = Token->Next)
1862 if (Token->is(TT_BinaryOperator))
1863 return false;
1864
1865 // If the following token is an identifier or 'this', this is a cast. All
1866 // cases where this can be something else are handled above.
1867 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1868 return true;
1869
1870 if (!Tok.Next->Next)
1871 return false;
1872
1873 // If the next token after the parenthesis is a unary operator, assume
1874 // that this is cast, unless there are unexpected tokens inside the
1875 // parenthesis.
1876 bool NextIsUnary =
1877 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1878 if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1879 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1880 return false;
1881 // Search for unexpected tokens.
1882 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1883 Prev = Prev->Previous) {
1884 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1885 return false;
1886 }
1887 return true;
1888 }
1889
1890 /// Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)1891 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1892 bool InTemplateArgument) {
1893 if (Style.Language == FormatStyle::LK_JavaScript)
1894 return TT_BinaryOperator;
1895
1896 // && in C# must be a binary operator.
1897 if (Style.isCSharp() && Tok.is(tok::ampamp))
1898 return TT_BinaryOperator;
1899
1900 const FormatToken *PrevToken = Tok.getPreviousNonComment();
1901 if (!PrevToken)
1902 return TT_UnaryOperator;
1903
1904 const FormatToken *NextToken = Tok.getNextNonComment();
1905 if (!NextToken ||
1906 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const,
1907 tok::kw_noexcept) ||
1908 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1909 return TT_PointerOrReference;
1910
1911 if (PrevToken->is(tok::coloncolon))
1912 return TT_PointerOrReference;
1913
1914 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1915 tok::comma, tok::semi, tok::kw_return, tok::colon,
1916 tok::equal, tok::kw_delete, tok::kw_sizeof,
1917 tok::kw_throw) ||
1918 PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1919 TT_UnaryOperator, TT_CastRParen))
1920 return TT_UnaryOperator;
1921
1922 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1923 return TT_PointerOrReference;
1924 if (NextToken->is(tok::kw_operator) && !IsExpression)
1925 return TT_PointerOrReference;
1926 if (NextToken->isOneOf(tok::comma, tok::semi))
1927 return TT_PointerOrReference;
1928
1929 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {
1930 FormatToken *TokenBeforeMatchingParen =
1931 PrevToken->MatchingParen->getPreviousNonComment();
1932 if (TokenBeforeMatchingParen &&
1933 TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype,
1934 TT_TypenameMacro))
1935 return TT_PointerOrReference;
1936 }
1937
1938 if (PrevToken->Tok.isLiteral() ||
1939 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1940 tok::kw_false, tok::r_brace) ||
1941 NextToken->Tok.isLiteral() ||
1942 NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1943 NextToken->isUnaryOperator() ||
1944 // If we know we're in a template argument, there are no named
1945 // declarations. Thus, having an identifier on the right-hand side
1946 // indicates a binary operator.
1947 (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1948 return TT_BinaryOperator;
1949
1950 // "&&(" is quite unlikely to be two successive unary "&".
1951 if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren))
1952 return TT_BinaryOperator;
1953
1954 // This catches some cases where evaluation order is used as control flow:
1955 // aaa && aaa->f();
1956 if (NextToken->Tok.isAnyIdentifier()) {
1957 const FormatToken *NextNextToken = NextToken->getNextNonComment();
1958 if (NextNextToken && NextNextToken->is(tok::arrow))
1959 return TT_BinaryOperator;
1960 }
1961
1962 // It is very unlikely that we are going to find a pointer or reference type
1963 // definition on the RHS of an assignment.
1964 if (IsExpression && !Contexts.back().CaretFound)
1965 return TT_BinaryOperator;
1966
1967 return TT_PointerOrReference;
1968 }
1969
determinePlusMinusCaretUsage(const FormatToken & Tok)1970 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1971 const FormatToken *PrevToken = Tok.getPreviousNonComment();
1972 if (!PrevToken)
1973 return TT_UnaryOperator;
1974
1975 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
1976 // This must be a sequence of leading unary operators.
1977 return TT_UnaryOperator;
1978
1979 // Use heuristics to recognize unary operators.
1980 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1981 tok::question, tok::colon, tok::kw_return,
1982 tok::kw_case, tok::at, tok::l_brace, tok::kw_throw,
1983 tok::kw_co_return, tok::kw_co_yield))
1984 return TT_UnaryOperator;
1985
1986 // There can't be two consecutive binary operators.
1987 if (PrevToken->is(TT_BinaryOperator))
1988 return TT_UnaryOperator;
1989
1990 // Fall back to marking the token as binary operator.
1991 return TT_BinaryOperator;
1992 }
1993
1994 /// Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)1995 TokenType determineIncrementUsage(const FormatToken &Tok) {
1996 const FormatToken *PrevToken = Tok.getPreviousNonComment();
1997 if (!PrevToken || PrevToken->is(TT_CastRParen))
1998 return TT_UnaryOperator;
1999 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2000 return TT_TrailingUnaryOperator;
2001
2002 return TT_UnaryOperator;
2003 }
2004
2005 SmallVector<Context, 8> Contexts;
2006
2007 const FormatStyle &Style;
2008 AnnotatedLine &Line;
2009 FormatToken *CurrentToken;
2010 bool AutoFound;
2011 const AdditionalKeywords &Keywords;
2012
2013 // Set of "<" tokens that do not open a template parameter list. If parseAngle
2014 // determines that a specific token can't be a template opener, it will make
2015 // same decision irrespective of the decisions for tokens leading up to it.
2016 // Store this information to prevent this from causing exponential runtime.
2017 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2018 };
2019
2020 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2021 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2022
2023 /// Parses binary expressions by inserting fake parenthesis based on
2024 /// operator precedence.
2025 class ExpressionParser {
2026 public:
ExpressionParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,AnnotatedLine & Line)2027 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2028 AnnotatedLine &Line)
2029 : Style(Style), Keywords(Keywords), Current(Line.First) {}
2030
2031 /// Parse expressions with the given operator precedence.
parse(int Precedence=0)2032 void parse(int Precedence = 0) {
2033 // Skip 'return' and ObjC selector colons as they are not part of a binary
2034 // expression.
2035 while (Current && (Current->is(tok::kw_return) ||
2036 (Current->is(tok::colon) &&
2037 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
2038 next();
2039
2040 if (!Current || Precedence > PrecedenceArrowAndPeriod)
2041 return;
2042
2043 // Conditional expressions need to be parsed separately for proper nesting.
2044 if (Precedence == prec::Conditional) {
2045 parseConditionalExpr();
2046 return;
2047 }
2048
2049 // Parse unary operators, which all have a higher precedence than binary
2050 // operators.
2051 if (Precedence == PrecedenceUnaryOperator) {
2052 parseUnaryOperator();
2053 return;
2054 }
2055
2056 FormatToken *Start = Current;
2057 FormatToken *LatestOperator = nullptr;
2058 unsigned OperatorIndex = 0;
2059
2060 while (Current) {
2061 // Consume operators with higher precedence.
2062 parse(Precedence + 1);
2063
2064 int CurrentPrecedence = getCurrentPrecedence();
2065
2066 if (Current && Current->is(TT_SelectorName) &&
2067 Precedence == CurrentPrecedence) {
2068 if (LatestOperator)
2069 addFakeParenthesis(Start, prec::Level(Precedence));
2070 Start = Current;
2071 }
2072
2073 // At the end of the line or when an operator with higher precedence is
2074 // found, insert fake parenthesis and return.
2075 if (!Current ||
2076 (Current->closesScope() &&
2077 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2078 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2079 (CurrentPrecedence == prec::Conditional &&
2080 Precedence == prec::Assignment && Current->is(tok::colon))) {
2081 break;
2082 }
2083
2084 // Consume scopes: (), [], <> and {}
2085 if (Current->opensScope()) {
2086 // In fragment of a JavaScript template string can look like '}..${' and
2087 // thus close a scope and open a new one at the same time.
2088 while (Current && (!Current->closesScope() || Current->opensScope())) {
2089 next();
2090 parse();
2091 }
2092 next();
2093 } else {
2094 // Operator found.
2095 if (CurrentPrecedence == Precedence) {
2096 if (LatestOperator)
2097 LatestOperator->NextOperator = Current;
2098 LatestOperator = Current;
2099 Current->OperatorIndex = OperatorIndex;
2100 ++OperatorIndex;
2101 }
2102 next(/*SkipPastLeadingComments=*/Precedence > 0);
2103 }
2104 }
2105
2106 if (LatestOperator && (Current || Precedence > 0)) {
2107 // LatestOperator->LastOperator = true;
2108 if (Precedence == PrecedenceArrowAndPeriod) {
2109 // Call expressions don't have a binary operator precedence.
2110 addFakeParenthesis(Start, prec::Unknown);
2111 } else {
2112 addFakeParenthesis(Start, prec::Level(Precedence));
2113 }
2114 }
2115 }
2116
2117 private:
2118 /// Gets the precedence (+1) of the given token for binary operators
2119 /// and other tokens that we treat like binary operators.
getCurrentPrecedence()2120 int getCurrentPrecedence() {
2121 if (Current) {
2122 const FormatToken *NextNonComment = Current->getNextNonComment();
2123 if (Current->is(TT_ConditionalExpr))
2124 return prec::Conditional;
2125 if (NextNonComment && Current->is(TT_SelectorName) &&
2126 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2127 ((Style.Language == FormatStyle::LK_Proto ||
2128 Style.Language == FormatStyle::LK_TextProto) &&
2129 NextNonComment->is(tok::less))))
2130 return prec::Assignment;
2131 if (Current->is(TT_JsComputedPropertyName))
2132 return prec::Assignment;
2133 if (Current->is(TT_LambdaArrow))
2134 return prec::Comma;
2135 if (Current->is(TT_JsFatArrow))
2136 return prec::Assignment;
2137 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2138 (Current->is(tok::comment) && NextNonComment &&
2139 NextNonComment->is(TT_SelectorName)))
2140 return 0;
2141 if (Current->is(TT_RangeBasedForLoopColon))
2142 return prec::Comma;
2143 if ((Style.Language == FormatStyle::LK_Java ||
2144 Style.Language == FormatStyle::LK_JavaScript) &&
2145 Current->is(Keywords.kw_instanceof))
2146 return prec::Relational;
2147 if (Style.Language == FormatStyle::LK_JavaScript &&
2148 Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
2149 return prec::Relational;
2150 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2151 return Current->getPrecedence();
2152 if (Current->isOneOf(tok::period, tok::arrow))
2153 return PrecedenceArrowAndPeriod;
2154 if ((Style.Language == FormatStyle::LK_Java ||
2155 Style.Language == FormatStyle::LK_JavaScript) &&
2156 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2157 Keywords.kw_throws))
2158 return 0;
2159 }
2160 return -1;
2161 }
2162
addFakeParenthesis(FormatToken * Start,prec::Level Precedence)2163 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
2164 Start->FakeLParens.push_back(Precedence);
2165 if (Precedence > prec::Unknown)
2166 Start->StartsBinaryExpression = true;
2167 if (Current) {
2168 FormatToken *Previous = Current->Previous;
2169 while (Previous->is(tok::comment) && Previous->Previous)
2170 Previous = Previous->Previous;
2171 ++Previous->FakeRParens;
2172 if (Precedence > prec::Unknown)
2173 Previous->EndsBinaryExpression = true;
2174 }
2175 }
2176
2177 /// Parse unary operator expressions and surround them with fake
2178 /// parentheses if appropriate.
parseUnaryOperator()2179 void parseUnaryOperator() {
2180 llvm::SmallVector<FormatToken *, 2> Tokens;
2181 while (Current && Current->is(TT_UnaryOperator)) {
2182 Tokens.push_back(Current);
2183 next();
2184 }
2185 parse(PrecedenceArrowAndPeriod);
2186 for (FormatToken *Token : llvm::reverse(Tokens))
2187 // The actual precedence doesn't matter.
2188 addFakeParenthesis(Token, prec::Unknown);
2189 }
2190
parseConditionalExpr()2191 void parseConditionalExpr() {
2192 while (Current && Current->isTrailingComment()) {
2193 next();
2194 }
2195 FormatToken *Start = Current;
2196 parse(prec::LogicalOr);
2197 if (!Current || !Current->is(tok::question))
2198 return;
2199 next();
2200 parse(prec::Assignment);
2201 if (!Current || Current->isNot(TT_ConditionalExpr))
2202 return;
2203 next();
2204 parse(prec::Assignment);
2205 addFakeParenthesis(Start, prec::Conditional);
2206 }
2207
next(bool SkipPastLeadingComments=true)2208 void next(bool SkipPastLeadingComments = true) {
2209 if (Current)
2210 Current = Current->Next;
2211 while (Current &&
2212 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2213 Current->isTrailingComment())
2214 Current = Current->Next;
2215 }
2216
2217 const FormatStyle &Style;
2218 const AdditionalKeywords &Keywords;
2219 FormatToken *Current;
2220 };
2221
2222 } // end anonymous namespace
2223
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines)2224 void TokenAnnotator::setCommentLineLevels(
2225 SmallVectorImpl<AnnotatedLine *> &Lines) {
2226 const AnnotatedLine *NextNonCommentLine = nullptr;
2227 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
2228 E = Lines.rend();
2229 I != E; ++I) {
2230 bool CommentLine = true;
2231 for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
2232 if (!Tok->is(tok::comment)) {
2233 CommentLine = false;
2234 break;
2235 }
2236 }
2237
2238 // If the comment is currently aligned with the line immediately following
2239 // it, that's probably intentional and we should keep it.
2240 if (NextNonCommentLine && CommentLine &&
2241 NextNonCommentLine->First->NewlinesBefore <= 1 &&
2242 NextNonCommentLine->First->OriginalColumn ==
2243 (*I)->First->OriginalColumn) {
2244 // Align comments for preprocessor lines with the # in column 0 if
2245 // preprocessor lines are not indented. Otherwise, align with the next
2246 // line.
2247 (*I)->Level =
2248 (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
2249 (NextNonCommentLine->Type == LT_PreprocessorDirective ||
2250 NextNonCommentLine->Type == LT_ImportStatement))
2251 ? 0
2252 : NextNonCommentLine->Level;
2253 } else {
2254 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
2255 }
2256
2257 setCommentLineLevels((*I)->Children);
2258 }
2259 }
2260
maxNestingDepth(const AnnotatedLine & Line)2261 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
2262 unsigned Result = 0;
2263 for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
2264 Result = std::max(Result, Tok->NestingLevel);
2265 return Result;
2266 }
2267
annotate(AnnotatedLine & Line)2268 void TokenAnnotator::annotate(AnnotatedLine &Line) {
2269 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2270 E = Line.Children.end();
2271 I != E; ++I) {
2272 annotate(**I);
2273 }
2274 AnnotatingParser Parser(Style, Line, Keywords);
2275 Line.Type = Parser.parseLine();
2276
2277 // With very deep nesting, ExpressionParser uses lots of stack and the
2278 // formatting algorithm is very slow. We're not going to do a good job here
2279 // anyway - it's probably generated code being formatted by mistake.
2280 // Just skip the whole line.
2281 if (maxNestingDepth(Line) > 50)
2282 Line.Type = LT_Invalid;
2283
2284 if (Line.Type == LT_Invalid)
2285 return;
2286
2287 ExpressionParser ExprParser(Style, Keywords, Line);
2288 ExprParser.parse();
2289
2290 if (Line.startsWith(TT_ObjCMethodSpecifier))
2291 Line.Type = LT_ObjCMethodDecl;
2292 else if (Line.startsWith(TT_ObjCDecl))
2293 Line.Type = LT_ObjCDecl;
2294 else if (Line.startsWith(TT_ObjCProperty))
2295 Line.Type = LT_ObjCProperty;
2296
2297 Line.First->SpacesRequiredBefore = 1;
2298 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
2299 }
2300
2301 // This function heuristically determines whether 'Current' starts the name of a
2302 // function declaration.
isFunctionDeclarationName(const FormatToken & Current,const AnnotatedLine & Line)2303 static bool isFunctionDeclarationName(const FormatToken &Current,
2304 const AnnotatedLine &Line) {
2305 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
2306 for (; Next; Next = Next->Next) {
2307 if (Next->is(TT_OverloadedOperatorLParen))
2308 return Next;
2309 if (Next->is(TT_OverloadedOperator))
2310 continue;
2311 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
2312 // For 'new[]' and 'delete[]'.
2313 if (Next->Next &&
2314 Next->Next->startsSequence(tok::l_square, tok::r_square))
2315 Next = Next->Next->Next;
2316 continue;
2317 }
2318 if (Next->startsSequence(tok::l_square, tok::r_square)) {
2319 // For operator[]().
2320 Next = Next->Next;
2321 continue;
2322 }
2323 if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
2324 Next->Next && Next->Next->isOneOf(tok::star, tok::amp, tok::ampamp)) {
2325 // For operator void*(), operator char*(), operator Foo*().
2326 Next = Next->Next;
2327 continue;
2328 }
2329 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
2330 Next = Next->MatchingParen;
2331 continue;
2332 }
2333
2334 break;
2335 }
2336 return nullptr;
2337 };
2338
2339 // Find parentheses of parameter list.
2340 const FormatToken *Next = Current.Next;
2341 if (Current.is(tok::kw_operator)) {
2342 if (Current.Previous && Current.Previous->is(tok::coloncolon))
2343 return false;
2344 Next = skipOperatorName(Next);
2345 } else {
2346 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
2347 return false;
2348 for (; Next; Next = Next->Next) {
2349 if (Next->is(TT_TemplateOpener)) {
2350 Next = Next->MatchingParen;
2351 } else if (Next->is(tok::coloncolon)) {
2352 Next = Next->Next;
2353 if (!Next)
2354 return false;
2355 if (Next->is(tok::kw_operator)) {
2356 Next = skipOperatorName(Next->Next);
2357 break;
2358 }
2359 if (!Next->is(tok::identifier))
2360 return false;
2361 } else if (Next->is(tok::l_paren)) {
2362 break;
2363 } else {
2364 return false;
2365 }
2366 }
2367 }
2368
2369 // Check whether parameter list can belong to a function declaration.
2370 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2371 return false;
2372 // If the lines ends with "{", this is likely an function definition.
2373 if (Line.Last->is(tok::l_brace))
2374 return true;
2375 if (Next->Next == Next->MatchingParen)
2376 return true; // Empty parentheses.
2377 // If there is an &/&& after the r_paren, this is likely a function.
2378 if (Next->MatchingParen->Next &&
2379 Next->MatchingParen->Next->is(TT_PointerOrReference))
2380 return true;
2381 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2382 Tok = Tok->Next) {
2383 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
2384 Tok = Tok->MatchingParen;
2385 continue;
2386 }
2387 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2388 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
2389 return true;
2390 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2391 Tok->Tok.isLiteral())
2392 return false;
2393 }
2394 return false;
2395 }
2396
mustBreakForReturnType(const AnnotatedLine & Line) const2397 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2398 assert(Line.MightBeFunctionDecl);
2399
2400 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
2401 Style.AlwaysBreakAfterReturnType ==
2402 FormatStyle::RTBS_TopLevelDefinitions) &&
2403 Line.Level > 0)
2404 return false;
2405
2406 switch (Style.AlwaysBreakAfterReturnType) {
2407 case FormatStyle::RTBS_None:
2408 return false;
2409 case FormatStyle::RTBS_All:
2410 case FormatStyle::RTBS_TopLevel:
2411 return true;
2412 case FormatStyle::RTBS_AllDefinitions:
2413 case FormatStyle::RTBS_TopLevelDefinitions:
2414 return Line.mightBeFunctionDefinition();
2415 }
2416
2417 return false;
2418 }
2419
calculateFormattingInformation(AnnotatedLine & Line)2420 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
2421 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2422 E = Line.Children.end();
2423 I != E; ++I) {
2424 calculateFormattingInformation(**I);
2425 }
2426
2427 Line.First->TotalLength =
2428 Line.First->IsMultiline ? Style.ColumnLimit
2429 : Line.FirstStartColumn + Line.First->ColumnWidth;
2430 FormatToken *Current = Line.First->Next;
2431 bool InFunctionDecl = Line.MightBeFunctionDecl;
2432 while (Current) {
2433 if (isFunctionDeclarationName(*Current, Line))
2434 Current->setType(TT_FunctionDeclarationName);
2435 if (Current->is(TT_LineComment)) {
2436 if (Current->Previous->BlockKind == BK_BracedInit &&
2437 Current->Previous->opensScope())
2438 Current->SpacesRequiredBefore =
2439 (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1;
2440 else
2441 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
2442
2443 // If we find a trailing comment, iterate backwards to determine whether
2444 // it seems to relate to a specific parameter. If so, break before that
2445 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2446 // to the previous line in:
2447 // SomeFunction(a,
2448 // b, // comment
2449 // c);
2450 if (!Current->HasUnescapedNewline) {
2451 for (FormatToken *Parameter = Current->Previous; Parameter;
2452 Parameter = Parameter->Previous) {
2453 if (Parameter->isOneOf(tok::comment, tok::r_brace))
2454 break;
2455 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
2456 if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
2457 Parameter->HasUnescapedNewline)
2458 Parameter->MustBreakBefore = true;
2459 break;
2460 }
2461 }
2462 }
2463 } else if (Current->SpacesRequiredBefore == 0 &&
2464 spaceRequiredBefore(Line, *Current)) {
2465 Current->SpacesRequiredBefore = 1;
2466 }
2467
2468 Current->MustBreakBefore =
2469 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
2470
2471 if (!Current->MustBreakBefore && InFunctionDecl &&
2472 Current->is(TT_FunctionDeclarationName))
2473 Current->MustBreakBefore = mustBreakForReturnType(Line);
2474
2475 Current->CanBreakBefore =
2476 Current->MustBreakBefore || canBreakBefore(Line, *Current);
2477 unsigned ChildSize = 0;
2478 if (Current->Previous->Children.size() == 1) {
2479 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
2480 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
2481 : LastOfChild.TotalLength + 1;
2482 }
2483 const FormatToken *Prev = Current->Previous;
2484 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
2485 (Prev->Children.size() == 1 &&
2486 Prev->Children[0]->First->MustBreakBefore) ||
2487 Current->IsMultiline)
2488 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
2489 else
2490 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
2491 ChildSize + Current->SpacesRequiredBefore;
2492
2493 if (Current->is(TT_CtorInitializerColon))
2494 InFunctionDecl = false;
2495
2496 // FIXME: Only calculate this if CanBreakBefore is true once static
2497 // initializers etc. are sorted out.
2498 // FIXME: Move magic numbers to a better place.
2499
2500 // Reduce penalty for aligning ObjC method arguments using the colon
2501 // alignment as this is the canonical way (still prefer fitting everything
2502 // into one line if possible). Trying to fit a whole expression into one
2503 // line should not force other line breaks (e.g. when ObjC method
2504 // expression is a part of other expression).
2505 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
2506 if (Style.Language == FormatStyle::LK_ObjC &&
2507 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
2508 if (Current->ParameterIndex == 1)
2509 Current->SplitPenalty += 5 * Current->BindingStrength;
2510 } else {
2511 Current->SplitPenalty += 20 * Current->BindingStrength;
2512 }
2513
2514 Current = Current->Next;
2515 }
2516
2517 calculateUnbreakableTailLengths(Line);
2518 unsigned IndentLevel = Line.Level;
2519 for (Current = Line.First; Current != nullptr; Current = Current->Next) {
2520 if (Current->Role)
2521 Current->Role->precomputeFormattingInfos(Current);
2522 if (Current->MatchingParen &&
2523 Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
2524 assert(IndentLevel > 0);
2525 --IndentLevel;
2526 }
2527 Current->IndentLevel = IndentLevel;
2528 if (Current->opensBlockOrBlockTypeList(Style))
2529 ++IndentLevel;
2530 }
2531
2532 LLVM_DEBUG({ printDebugInfo(Line); });
2533 }
2534
calculateUnbreakableTailLengths(AnnotatedLine & Line)2535 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
2536 unsigned UnbreakableTailLength = 0;
2537 FormatToken *Current = Line.Last;
2538 while (Current) {
2539 Current->UnbreakableTailLength = UnbreakableTailLength;
2540 if (Current->CanBreakBefore ||
2541 Current->isOneOf(tok::comment, tok::string_literal)) {
2542 UnbreakableTailLength = 0;
2543 } else {
2544 UnbreakableTailLength +=
2545 Current->ColumnWidth + Current->SpacesRequiredBefore;
2546 }
2547 Current = Current->Previous;
2548 }
2549 }
2550
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl)2551 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2552 const FormatToken &Tok,
2553 bool InFunctionDecl) {
2554 const FormatToken &Left = *Tok.Previous;
2555 const FormatToken &Right = Tok;
2556
2557 if (Left.is(tok::semi))
2558 return 0;
2559
2560 if (Style.Language == FormatStyle::LK_Java) {
2561 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2562 return 1;
2563 if (Right.is(Keywords.kw_implements))
2564 return 2;
2565 if (Left.is(tok::comma) && Left.NestingLevel == 0)
2566 return 3;
2567 } else if (Style.Language == FormatStyle::LK_JavaScript) {
2568 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2569 return 100;
2570 if (Left.is(TT_JsTypeColon))
2571 return 35;
2572 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2573 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2574 return 100;
2575 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2576 if (Left.opensScope() && Right.closesScope())
2577 return 200;
2578 }
2579
2580 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2581 return 1;
2582 if (Right.is(tok::l_square)) {
2583 if (Style.Language == FormatStyle::LK_Proto)
2584 return 1;
2585 if (Left.is(tok::r_square))
2586 return 200;
2587 // Slightly prefer formatting local lambda definitions like functions.
2588 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2589 return 35;
2590 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2591 TT_ArrayInitializerLSquare,
2592 TT_DesignatedInitializerLSquare, TT_AttributeSquare))
2593 return 500;
2594 }
2595
2596 if (Left.is(tok::coloncolon) ||
2597 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2598 return 500;
2599 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2600 Right.is(tok::kw_operator)) {
2601 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2602 return 3;
2603 if (Left.is(TT_StartOfName))
2604 return 110;
2605 if (InFunctionDecl && Right.NestingLevel == 0)
2606 return Style.PenaltyReturnTypeOnItsOwnLine;
2607 return 200;
2608 }
2609 if (Right.is(TT_PointerOrReference))
2610 return 190;
2611 if (Right.is(TT_LambdaArrow))
2612 return 110;
2613 if (Left.is(tok::equal) && Right.is(tok::l_brace))
2614 return 160;
2615 if (Left.is(TT_CastRParen))
2616 return 100;
2617 if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2618 return 5000;
2619 if (Left.is(tok::comment))
2620 return 1000;
2621
2622 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2623 TT_CtorInitializerColon))
2624 return 2;
2625
2626 if (Right.isMemberAccess()) {
2627 // Breaking before the "./->" of a chained call/member access is reasonably
2628 // cheap, as formatting those with one call per line is generally
2629 // desirable. In particular, it should be cheaper to break before the call
2630 // than it is to break inside a call's parameters, which could lead to weird
2631 // "hanging" indents. The exception is the very last "./->" to support this
2632 // frequent pattern:
2633 //
2634 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2635 // dddddddd);
2636 //
2637 // which might otherwise be blown up onto many lines. Here, clang-format
2638 // won't produce "hanging" indents anyway as there is no other trailing
2639 // call.
2640 //
2641 // Also apply higher penalty is not a call as that might lead to a wrapping
2642 // like:
2643 //
2644 // aaaaaaa
2645 // .aaaaaaaaa.bbbbbbbb(cccccccc);
2646 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2647 ? 150
2648 : 35;
2649 }
2650
2651 if (Right.is(TT_TrailingAnnotation) &&
2652 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2653 // Moving trailing annotations to the next line is fine for ObjC method
2654 // declarations.
2655 if (Line.startsWith(TT_ObjCMethodSpecifier))
2656 return 10;
2657 // Generally, breaking before a trailing annotation is bad unless it is
2658 // function-like. It seems to be especially preferable to keep standard
2659 // annotations (i.e. "const", "final" and "override") on the same line.
2660 // Use a slightly higher penalty after ")" so that annotations like
2661 // "const override" are kept together.
2662 bool is_short_annotation = Right.TokenText.size() < 10;
2663 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2664 }
2665
2666 // In for-loops, prefer breaking at ',' and ';'.
2667 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2668 return 4;
2669
2670 // In Objective-C method expressions, prefer breaking before "param:" over
2671 // breaking after it.
2672 if (Right.is(TT_SelectorName))
2673 return 0;
2674 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2675 return Line.MightBeFunctionDecl ? 50 : 500;
2676
2677 // In Objective-C type declarations, avoid breaking after the category's
2678 // open paren (we'll prefer breaking after the protocol list's opening
2679 // angle bracket, if present).
2680 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
2681 Left.Previous->isOneOf(tok::identifier, tok::greater))
2682 return 500;
2683
2684 if (Left.is(tok::l_paren) && InFunctionDecl &&
2685 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
2686 return 100;
2687 if (Left.is(tok::l_paren) && Left.Previous &&
2688 (Left.Previous->is(tok::kw_for) || Left.Previous->isIf()))
2689 return 1000;
2690 if (Left.is(tok::equal) && InFunctionDecl)
2691 return 110;
2692 if (Right.is(tok::r_brace))
2693 return 1;
2694 if (Left.is(TT_TemplateOpener))
2695 return 100;
2696 if (Left.opensScope()) {
2697 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
2698 return 0;
2699 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
2700 return 19;
2701 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
2702 : 19;
2703 }
2704 if (Left.is(TT_JavaAnnotation))
2705 return 50;
2706
2707 if (Left.is(TT_UnaryOperator))
2708 return 60;
2709 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2710 Left.Previous->isLabelString() &&
2711 (Left.NextOperator || Left.OperatorIndex != 0))
2712 return 50;
2713 if (Right.is(tok::plus) && Left.isLabelString() &&
2714 (Right.NextOperator || Right.OperatorIndex != 0))
2715 return 25;
2716 if (Left.is(tok::comma))
2717 return 1;
2718 if (Right.is(tok::lessless) && Left.isLabelString() &&
2719 (Right.NextOperator || Right.OperatorIndex != 1))
2720 return 25;
2721 if (Right.is(tok::lessless)) {
2722 // Breaking at a << is really cheap.
2723 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2724 // Slightly prefer to break before the first one in log-like statements.
2725 return 2;
2726 return 1;
2727 }
2728 if (Left.ClosesTemplateDeclaration)
2729 return Style.PenaltyBreakTemplateDeclaration;
2730 if (Left.is(TT_ConditionalExpr))
2731 return prec::Conditional;
2732 prec::Level Level = Left.getPrecedence();
2733 if (Level == prec::Unknown)
2734 Level = Right.getPrecedence();
2735 if (Level == prec::Assignment)
2736 return Style.PenaltyBreakAssignment;
2737 if (Level != prec::Unknown)
2738 return Level;
2739
2740 return 3;
2741 }
2742
spaceRequiredBeforeParens(const FormatToken & Right) const2743 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
2744 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always ||
2745 (Style.SpaceBeforeParens == FormatStyle::SBPO_NonEmptyParentheses &&
2746 Right.ParameterCount > 0);
2747 }
2748
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right)2749 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2750 const FormatToken &Left,
2751 const FormatToken &Right) {
2752 if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2753 return true;
2754 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2755 return true;
2756 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2757 Left.Tok.getObjCKeywordID() == tok::objc_property)
2758 return true;
2759 if (Right.is(tok::hashhash))
2760 return Left.is(tok::hash);
2761 if (Left.isOneOf(tok::hashhash, tok::hash))
2762 return Right.is(tok::hash);
2763 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
2764 (Left.is(tok::l_brace) && Left.BlockKind != BK_Block &&
2765 Right.is(tok::r_brace) && Right.BlockKind != BK_Block))
2766 return Style.SpaceInEmptyParentheses;
2767 if (Style.SpacesInConditionalStatement) {
2768 if (Left.is(tok::l_paren) && Left.Previous &&
2769 isKeywordWithCondition(*Left.Previous))
2770 return true;
2771 if (Right.is(tok::r_paren) && Right.MatchingParen &&
2772 Right.MatchingParen->Previous &&
2773 isKeywordWithCondition(*Right.MatchingParen->Previous))
2774 return true;
2775 }
2776 if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2777 return (Right.is(TT_CastRParen) ||
2778 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2779 ? Style.SpacesInCStyleCastParentheses
2780 : Style.SpacesInParentheses;
2781 if (Right.isOneOf(tok::semi, tok::comma))
2782 return false;
2783 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
2784 bool IsLightweightGeneric = Right.MatchingParen &&
2785 Right.MatchingParen->Next &&
2786 Right.MatchingParen->Next->is(tok::colon);
2787 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
2788 }
2789 if (Right.is(tok::less) && Left.is(tok::kw_template))
2790 return Style.SpaceAfterTemplateKeyword;
2791 if (Left.isOneOf(tok::exclaim, tok::tilde))
2792 return false;
2793 if (Left.is(tok::at) &&
2794 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2795 tok::numeric_constant, tok::l_paren, tok::l_brace,
2796 tok::kw_true, tok::kw_false))
2797 return false;
2798 if (Left.is(tok::colon))
2799 return !Left.is(TT_ObjCMethodExpr);
2800 if (Left.is(tok::coloncolon))
2801 return false;
2802 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
2803 if (Style.Language == FormatStyle::LK_TextProto ||
2804 (Style.Language == FormatStyle::LK_Proto &&
2805 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
2806 // Format empty list as `<>`.
2807 if (Left.is(tok::less) && Right.is(tok::greater))
2808 return false;
2809 return !Style.Cpp11BracedListStyle;
2810 }
2811 return false;
2812 }
2813 if (Right.is(tok::ellipsis))
2814 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2815 Left.Previous->is(tok::kw_case));
2816 if (Left.is(tok::l_square) && Right.is(tok::amp))
2817 return Style.SpacesInSquareBrackets;
2818 if (Right.is(TT_PointerOrReference)) {
2819 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
2820 if (!Left.MatchingParen)
2821 return true;
2822 FormatToken *TokenBeforeMatchingParen =
2823 Left.MatchingParen->getPreviousNonComment();
2824 if (!TokenBeforeMatchingParen ||
2825 !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype,
2826 TT_TypenameMacro))
2827 return true;
2828 }
2829 return (Left.Tok.isLiteral() ||
2830 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2831 (Style.PointerAlignment != FormatStyle::PAS_Left ||
2832 (Line.IsMultiVariableDeclStmt &&
2833 (Left.NestingLevel == 0 ||
2834 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2835 }
2836 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2837 (!Left.is(TT_PointerOrReference) ||
2838 (Style.PointerAlignment != FormatStyle::PAS_Right &&
2839 !Line.IsMultiVariableDeclStmt)))
2840 return true;
2841 if (Left.is(TT_PointerOrReference))
2842 return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
2843 (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
2844 !Right.is(TT_StartOfName)) ||
2845 (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2846 (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2847 tok::l_paren) &&
2848 (Style.PointerAlignment != FormatStyle::PAS_Right &&
2849 !Line.IsMultiVariableDeclStmt) &&
2850 Left.Previous &&
2851 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon,
2852 tok::l_square));
2853 // Ensure right pointer alignement with ellipsis e.g. int *...P
2854 if (Left.is(tok::ellipsis) && Left.Previous &&
2855 Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp))
2856 return Style.PointerAlignment != FormatStyle::PAS_Right;
2857
2858 if (Right.is(tok::star) && Left.is(tok::l_paren))
2859 return false;
2860 if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
2861 return false;
2862 if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) {
2863 const FormatToken *Previous = &Left;
2864 while (Previous && !Previous->is(tok::kw_operator)) {
2865 if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
2866 Previous = Previous->getPreviousNonComment();
2867 continue;
2868 }
2869 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
2870 Previous = Previous->MatchingParen->getPreviousNonComment();
2871 continue;
2872 }
2873 if (Previous->is(tok::coloncolon)) {
2874 Previous = Previous->getPreviousNonComment();
2875 continue;
2876 }
2877 break;
2878 }
2879 // Space between the type and the * in:
2880 // operator void*()
2881 // operator char*()
2882 // operator /*comment*/ const char*()
2883 // operator volatile /*comment*/ char*()
2884 // operator Foo*()
2885 // operator C<T>*()
2886 // operator std::Foo*()
2887 // operator C<T>::D<U>*()
2888 // dependent on PointerAlignment style.
2889 if (Previous &&
2890 (Previous->endsSequence(tok::kw_operator) ||
2891 Previous->endsSequence(tok::kw_const, tok::kw_operator) ||
2892 Previous->endsSequence(tok::kw_volatile, tok::kw_operator)))
2893 return (Style.PointerAlignment != FormatStyle::PAS_Left);
2894 }
2895 const auto SpaceRequiredForArrayInitializerLSquare =
2896 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
2897 return Style.SpacesInContainerLiterals ||
2898 ((Style.Language == FormatStyle::LK_Proto ||
2899 Style.Language == FormatStyle::LK_TextProto) &&
2900 !Style.Cpp11BracedListStyle &&
2901 LSquareTok.endsSequence(tok::l_square, tok::colon,
2902 TT_SelectorName));
2903 };
2904 if (Left.is(tok::l_square))
2905 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
2906 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
2907 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
2908 TT_LambdaLSquare) &&
2909 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
2910 if (Right.is(tok::r_square))
2911 return Right.MatchingParen &&
2912 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
2913 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
2914 Style)) ||
2915 (Style.SpacesInSquareBrackets &&
2916 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
2917 TT_StructuredBindingLSquare,
2918 TT_LambdaLSquare)) ||
2919 Right.MatchingParen->is(TT_AttributeParen));
2920 if (Right.is(tok::l_square) &&
2921 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2922 TT_DesignatedInitializerLSquare,
2923 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
2924 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
2925 !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
2926 Right.is(TT_ArraySubscriptLSquare)))
2927 return false;
2928 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2929 return !Left.Children.empty(); // No spaces in "{}".
2930 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2931 (Right.is(tok::r_brace) && Right.MatchingParen &&
2932 Right.MatchingParen->BlockKind != BK_Block))
2933 return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true;
2934 if (Left.is(TT_BlockComment))
2935 // No whitespace in x(/*foo=*/1), except for JavaScript.
2936 return Style.Language == FormatStyle::LK_JavaScript ||
2937 !Left.TokenText.endswith("=*/");
2938
2939 // Space between template and attribute.
2940 // e.g. template <typename T> [[nodiscard]] ...
2941 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
2942 return true;
2943 if (Right.is(tok::l_paren)) {
2944 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
2945 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
2946 return true;
2947 if (Style.SpaceBeforeParens ==
2948 FormatStyle::SBPO_ControlStatementsExceptForEachMacros &&
2949 Left.is(TT_ForEachMacro))
2950 return false;
2951 return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2952 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
2953 (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while,
2954 tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2955 TT_ObjCForIn) ||
2956 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
2957 (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2958 tok::kw_new, tok::kw_delete) &&
2959 (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2960 (spaceRequiredBeforeParens(Right) &&
2961 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2962 Left.is(tok::r_paren) || Left.isSimpleTypeSpecifier() ||
2963 (Left.is(tok::r_square) && Left.MatchingParen &&
2964 Left.MatchingParen->is(TT_LambdaLSquare))) &&
2965 Line.Type != LT_PreprocessorDirective);
2966 }
2967 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2968 return false;
2969 if (Right.is(TT_UnaryOperator))
2970 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2971 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2972 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2973 tok::r_paren) ||
2974 Left.isSimpleTypeSpecifier()) &&
2975 Right.is(tok::l_brace) && Right.getNextNonComment() &&
2976 Right.BlockKind != BK_Block)
2977 return false;
2978 if (Left.is(tok::period) || Right.is(tok::period))
2979 return false;
2980 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2981 return false;
2982 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2983 Left.MatchingParen->Previous &&
2984 (Left.MatchingParen->Previous->is(tok::period) ||
2985 Left.MatchingParen->Previous->is(tok::coloncolon)))
2986 // Java call to generic function with explicit type:
2987 // A.<B<C<...>>>DoSomething();
2988 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
2989 return false;
2990 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2991 return false;
2992 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at))
2993 // Objective-C dictionary literal -> no space after opening brace.
2994 return false;
2995 if (Right.is(tok::r_brace) && Right.MatchingParen &&
2996 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at))
2997 // Objective-C dictionary literal -> no space before closing brace.
2998 return false;
2999 if (Right.getType() == TT_TrailingAnnotation &&
3000 Right.isOneOf(tok::amp, tok::ampamp) &&
3001 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
3002 (!Right.Next || Right.Next->is(tok::semi)))
3003 // Match const and volatile ref-qualifiers without any additional
3004 // qualifiers such as
3005 // void Fn() const &;
3006 return Style.PointerAlignment != FormatStyle::PAS_Left;
3007 return true;
3008 }
3009
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Right)3010 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
3011 const FormatToken &Right) {
3012 const FormatToken &Left = *Right.Previous;
3013 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
3014 return true; // Never ever merge two identifiers.
3015 if (Style.isCpp()) {
3016 if (Left.is(tok::kw_operator))
3017 return Right.is(tok::coloncolon);
3018 if (Right.is(tok::l_brace) && Right.BlockKind == BK_BracedInit &&
3019 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList)
3020 return true;
3021 } else if (Style.Language == FormatStyle::LK_Proto ||
3022 Style.Language == FormatStyle::LK_TextProto) {
3023 if (Right.is(tok::period) &&
3024 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
3025 Keywords.kw_repeated, Keywords.kw_extend))
3026 return true;
3027 if (Right.is(tok::l_paren) &&
3028 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
3029 return true;
3030 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
3031 return true;
3032 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
3033 if (Left.is(tok::slash) || Right.is(tok::slash))
3034 return false;
3035 if (Left.MatchingParen &&
3036 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
3037 Right.isOneOf(tok::l_brace, tok::less))
3038 return !Style.Cpp11BracedListStyle;
3039 // A percent is probably part of a formatting specification, such as %lld.
3040 if (Left.is(tok::percent))
3041 return false;
3042 // Preserve the existence of a space before a percent for cases like 0x%04x
3043 // and "%d %d"
3044 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
3045 return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin();
3046 } else if (Style.isCSharp()) {
3047 // Require spaces around '{' and before '}' unless they appear in
3048 // interpolated strings. Interpolated strings are merged into a single token
3049 // so cannot have spaces inserted by this function.
3050
3051 // No space between 'this' and '['
3052 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
3053 return false;
3054
3055 // No space between 'new' and '('
3056 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
3057 return false;
3058
3059 // Space before { (including space within '{ {').
3060 if (Right.is(tok::l_brace))
3061 return true;
3062
3063 // Spaces inside braces.
3064 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
3065 return true;
3066
3067 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
3068 return true;
3069
3070 // Spaces around '=>'.
3071 if (Left.is(TT_JsFatArrow) || Right.is(TT_JsFatArrow))
3072 return true;
3073
3074 // No spaces around attribute target colons
3075 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
3076 return false;
3077
3078 // space between type and variable e.g. Dictionary<string,string> foo;
3079 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
3080 return true;
3081
3082 // spaces inside square brackets.
3083 if (Left.is(tok::l_square) || Right.is(tok::r_square))
3084 return Style.SpacesInSquareBrackets;
3085
3086 // No space before ? in nullable types.
3087 if (Right.is(TT_CSharpNullable))
3088 return false;
3089
3090 // Require space after ? in nullable types except in generics and casts.
3091 if (Left.is(TT_CSharpNullable))
3092 return !Right.isOneOf(TT_TemplateCloser, tok::r_paren);
3093
3094 // No space before or after '?.'.
3095 if (Left.is(TT_CSharpNullConditional) || Right.is(TT_CSharpNullConditional))
3096 return false;
3097
3098 // Space before and after '??'.
3099 if (Left.is(TT_CSharpNullCoalescing) || Right.is(TT_CSharpNullCoalescing))
3100 return true;
3101
3102 // No space before '?['.
3103 if (Right.is(TT_CSharpNullConditionalLSquare))
3104 return false;
3105
3106 // No space between consecutive commas '[,,]'.
3107 if (Left.is(tok::comma) && Right.is(tok::comma))
3108 return false;
3109
3110 // Possible space inside `?[ 0 ]`.
3111 if (Left.is(TT_CSharpNullConditionalLSquare))
3112 return Style.SpacesInSquareBrackets;
3113
3114 // space after var in `var (key, value)`
3115 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
3116 return true;
3117
3118 // space between keywords and paren e.g. "using ("
3119 if (Right.is(tok::l_paren))
3120 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
3121 Keywords.kw_lock))
3122 return Style.SpaceBeforeParens == FormatStyle::SBPO_ControlStatements ||
3123 spaceRequiredBeforeParens(Right);
3124 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3125 if (Left.is(TT_JsFatArrow))
3126 return true;
3127 // for await ( ...
3128 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
3129 Left.Previous->is(tok::kw_for))
3130 return true;
3131 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
3132 Right.MatchingParen) {
3133 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
3134 // An async arrow function, for example: `x = async () => foo();`,
3135 // as opposed to calling a function called async: `x = async();`
3136 if (Next && Next->is(TT_JsFatArrow))
3137 return true;
3138 }
3139 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
3140 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
3141 return false;
3142 // In tagged template literals ("html`bar baz`"), there is no space between
3143 // the tag identifier and the template string.
3144 if (Keywords.IsJavaScriptIdentifier(Left,
3145 /* AcceptIdentifierName= */ false) &&
3146 Right.is(TT_TemplateString))
3147 return false;
3148 if (Right.is(tok::star) &&
3149 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
3150 return false;
3151 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
3152 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
3153 Keywords.kw_extends, Keywords.kw_implements))
3154 return true;
3155 if (Right.is(tok::l_paren)) {
3156 // JS methods can use some keywords as names (e.g. `delete()`).
3157 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
3158 return false;
3159 // Valid JS method names can include keywords, e.g. `foo.delete()` or
3160 // `bar.instanceof()`. Recognize call positions by preceding period.
3161 if (Left.Previous && Left.Previous->is(tok::period) &&
3162 Left.Tok.getIdentifierInfo())
3163 return false;
3164 // Additional unary JavaScript operators that need a space after.
3165 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
3166 tok::kw_void))
3167 return true;
3168 }
3169 // `foo as const;` casts into a const type.
3170 if (Left.endsSequence(tok::kw_const, Keywords.kw_as)) {
3171 return false;
3172 }
3173 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
3174 tok::kw_const) ||
3175 // "of" is only a keyword if it appears after another identifier
3176 // (e.g. as "const x of y" in a for loop), or after a destructuring
3177 // operation (const [x, y] of z, const {a, b} of c).
3178 (Left.is(Keywords.kw_of) && Left.Previous &&
3179 (Left.Previous->Tok.is(tok::identifier) ||
3180 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
3181 (!Left.Previous || !Left.Previous->is(tok::period)))
3182 return true;
3183 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
3184 Left.Previous->is(tok::period) && Right.is(tok::l_paren))
3185 return false;
3186 if (Left.is(Keywords.kw_as) &&
3187 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
3188 return true;
3189 if (Left.is(tok::kw_default) && Left.Previous &&
3190 Left.Previous->is(tok::kw_export))
3191 return true;
3192 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
3193 return true;
3194 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
3195 return false;
3196 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
3197 return false;
3198 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
3199 Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
3200 return false;
3201 if (Left.is(tok::ellipsis))
3202 return false;
3203 if (Left.is(TT_TemplateCloser) &&
3204 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
3205 Keywords.kw_implements, Keywords.kw_extends))
3206 // Type assertions ('<type>expr') are not followed by whitespace. Other
3207 // locations that should have whitespace following are identified by the
3208 // above set of follower tokens.
3209 return false;
3210 if (Right.is(TT_JsNonNullAssertion))
3211 return false;
3212 if (Left.is(TT_JsNonNullAssertion) &&
3213 Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
3214 return true; // "x! as string", "x! in y"
3215 } else if (Style.Language == FormatStyle::LK_Java) {
3216 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
3217 return true;
3218 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
3219 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
3220 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
3221 tok::kw_protected) ||
3222 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
3223 Keywords.kw_native)) &&
3224 Right.is(TT_TemplateOpener))
3225 return true;
3226 }
3227 if (Left.is(TT_ImplicitStringLiteral))
3228 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
3229 if (Line.Type == LT_ObjCMethodDecl) {
3230 if (Left.is(TT_ObjCMethodSpecifier))
3231 return true;
3232 if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
3233 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
3234 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
3235 // method declaration.
3236 return false;
3237 }
3238 if (Line.Type == LT_ObjCProperty &&
3239 (Right.is(tok::equal) || Left.is(tok::equal)))
3240 return false;
3241
3242 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
3243 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
3244 return true;
3245 if (Right.is(TT_OverloadedOperatorLParen))
3246 return spaceRequiredBeforeParens(Right);
3247 if (Left.is(tok::comma))
3248 return true;
3249 if (Right.is(tok::comma))
3250 return false;
3251 if (Right.is(TT_ObjCBlockLParen))
3252 return true;
3253 if (Right.is(TT_CtorInitializerColon))
3254 return Style.SpaceBeforeCtorInitializerColon;
3255 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
3256 return false;
3257 if (Right.is(TT_RangeBasedForLoopColon) &&
3258 !Style.SpaceBeforeRangeBasedForLoopColon)
3259 return false;
3260 if (Right.is(tok::colon)) {
3261 if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
3262 !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
3263 return false;
3264 if (Right.is(TT_ObjCMethodExpr))
3265 return false;
3266 if (Left.is(tok::question))
3267 return false;
3268 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
3269 return false;
3270 if (Right.is(TT_DictLiteral))
3271 return Style.SpacesInContainerLiterals;
3272 if (Right.is(TT_AttributeColon))
3273 return false;
3274 if (Right.is(TT_CSharpNamedArgumentColon))
3275 return false;
3276 return true;
3277 }
3278 if (Left.is(TT_UnaryOperator)) {
3279 if (!Right.is(tok::l_paren)) {
3280 // The alternative operators for ~ and ! are "compl" and "not".
3281 // If they are used instead, we do not want to combine them with
3282 // the token to the right, unless that is a left paren.
3283 if (Left.is(tok::exclaim) && Left.TokenText == "not")
3284 return true;
3285 if (Left.is(tok::tilde) && Left.TokenText == "compl")
3286 return true;
3287 // Lambda captures allow for a lone &, so "&]" needs to be properly
3288 // handled.
3289 if (Left.is(tok::amp) && Right.is(tok::r_square))
3290 return Style.SpacesInSquareBrackets;
3291 }
3292 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
3293 Right.is(TT_BinaryOperator);
3294 }
3295
3296 // If the next token is a binary operator or a selector name, we have
3297 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
3298 if (Left.is(TT_CastRParen))
3299 return Style.SpaceAfterCStyleCast ||
3300 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
3301
3302 if (Left.is(tok::greater) && Right.is(tok::greater)) {
3303 if (Style.Language == FormatStyle::LK_TextProto ||
3304 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
3305 return !Style.Cpp11BracedListStyle;
3306 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
3307 (Style.Standard < FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
3308 }
3309 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
3310 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
3311 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
3312 return false;
3313 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
3314 Right.getPrecedence() == prec::Assignment)
3315 return false;
3316 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
3317 (Left.is(tok::identifier) || Left.is(tok::kw_this)))
3318 return false;
3319 if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
3320 // Generally don't remove existing spaces between an identifier and "::".
3321 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
3322 // this turns out to be too lenient, add analysis of the identifier itself.
3323 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
3324 if (Right.is(tok::coloncolon) &&
3325 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren))
3326 // Put a space between < and :: in vector< ::std::string >
3327 return (Left.is(TT_TemplateOpener) &&
3328 (Style.Standard < FormatStyle::LS_Cpp11 || Style.SpacesInAngles)) ||
3329 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
3330 tok::kw___super, TT_TemplateOpener,
3331 TT_TemplateCloser)) ||
3332 (Left.is(tok::l_paren) && Style.SpacesInParentheses);
3333 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
3334 return Style.SpacesInAngles;
3335 // Space before TT_StructuredBindingLSquare.
3336 if (Right.is(TT_StructuredBindingLSquare))
3337 return !Left.isOneOf(tok::amp, tok::ampamp) ||
3338 Style.PointerAlignment != FormatStyle::PAS_Right;
3339 // Space before & or && following a TT_StructuredBindingLSquare.
3340 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
3341 Right.isOneOf(tok::amp, tok::ampamp))
3342 return Style.PointerAlignment != FormatStyle::PAS_Left;
3343 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
3344 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
3345 !Right.is(tok::r_paren)))
3346 return true;
3347 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
3348 Right.isNot(TT_FunctionTypeLParen))
3349 return spaceRequiredBeforeParens(Right);
3350 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
3351 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
3352 return false;
3353 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
3354 Line.startsWith(tok::hash))
3355 return true;
3356 if (Right.is(TT_TrailingUnaryOperator))
3357 return false;
3358 if (Left.is(TT_RegexLiteral))
3359 return false;
3360 return spaceRequiredBetween(Line, Left, Right);
3361 }
3362
3363 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)3364 static bool isAllmanBrace(const FormatToken &Tok) {
3365 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
3366 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
3367 }
3368
3369 // Returns 'true' if 'Tok' is an function argument.
IsFunctionArgument(const FormatToken & Tok)3370 static bool IsFunctionArgument(const FormatToken &Tok) {
3371 return Tok.MatchingParen && Tok.MatchingParen->Next &&
3372 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
3373 }
3374
3375 static bool
isItAnEmptyLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)3376 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
3377 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
3378 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
3379 }
3380
3381 static bool
isItAInlineLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)3382 isItAInlineLambdaAllowed(const FormatToken &Tok,
3383 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
3384 return (ShortLambdaOption == FormatStyle::SLS_Inline &&
3385 IsFunctionArgument(Tok)) ||
3386 (ShortLambdaOption == FormatStyle::SLS_All);
3387 }
3388
isOneChildWithoutMustBreakBefore(const FormatToken & Tok)3389 static bool isOneChildWithoutMustBreakBefore(const FormatToken &Tok) {
3390 if (Tok.Children.size() != 1)
3391 return false;
3392 FormatToken *curElt = Tok.Children[0]->First;
3393 while (curElt) {
3394 if (curElt->MustBreakBefore)
3395 return false;
3396 curElt = curElt->Next;
3397 }
3398 return true;
3399 }
isAllmanLambdaBrace(const FormatToken & Tok)3400 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
3401 return (Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
3402 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral));
3403 }
3404
isAllmanBraceIncludedBreakableLambda(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)3405 static bool isAllmanBraceIncludedBreakableLambda(
3406 const FormatToken &Tok, FormatStyle::ShortLambdaStyle ShortLambdaOption) {
3407 if (!isAllmanLambdaBrace(Tok))
3408 return false;
3409
3410 if (isItAnEmptyLambdaAllowed(Tok, ShortLambdaOption))
3411 return false;
3412
3413 return !isItAInlineLambdaAllowed(Tok, ShortLambdaOption) ||
3414 !isOneChildWithoutMustBreakBefore(Tok);
3415 }
3416
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right)3417 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
3418 const FormatToken &Right) {
3419 const FormatToken &Left = *Right.Previous;
3420 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
3421 return true;
3422
3423 if (Style.isCSharp()) {
3424 if (Right.is(TT_CSharpNamedArgumentColon) ||
3425 Left.is(TT_CSharpNamedArgumentColon))
3426 return false;
3427 if (Right.is(TT_CSharpGenericTypeConstraint))
3428 return true;
3429 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3430 // FIXME: This might apply to other languages and token kinds.
3431 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
3432 Left.Previous->is(tok::string_literal))
3433 return true;
3434 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
3435 Left.Previous && Left.Previous->is(tok::equal) &&
3436 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
3437 tok::kw_const) &&
3438 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
3439 // above.
3440 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
3441 // Object literals on the top level of a file are treated as "enum-style".
3442 // Each key/value pair is put on a separate line, instead of bin-packing.
3443 return true;
3444 if (Left.is(tok::l_brace) && Line.Level == 0 &&
3445 (Line.startsWith(tok::kw_enum) ||
3446 Line.startsWith(tok::kw_const, tok::kw_enum) ||
3447 Line.startsWith(tok::kw_export, tok::kw_enum) ||
3448 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
3449 // JavaScript top-level enum key/value pairs are put on separate lines
3450 // instead of bin-packing.
3451 return true;
3452 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
3453 Left.Previous->is(TT_JsFatArrow)) {
3454 // JS arrow function (=> {...}).
3455 switch (Style.AllowShortLambdasOnASingleLine) {
3456 case FormatStyle::SLS_All:
3457 return false;
3458 case FormatStyle::SLS_None:
3459 return true;
3460 case FormatStyle::SLS_Empty:
3461 return !Left.Children.empty();
3462 case FormatStyle::SLS_Inline:
3463 // allow one-lining inline (e.g. in function call args) and empty arrow
3464 // functions.
3465 return (Left.NestingLevel == 0 && Line.Level == 0) &&
3466 !Left.Children.empty();
3467 }
3468 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
3469 }
3470
3471 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
3472 !Left.Children.empty())
3473 // Support AllowShortFunctionsOnASingleLine for JavaScript.
3474 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
3475 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
3476 (Left.NestingLevel == 0 && Line.Level == 0 &&
3477 Style.AllowShortFunctionsOnASingleLine &
3478 FormatStyle::SFS_InlineOnly);
3479 } else if (Style.Language == FormatStyle::LK_Java) {
3480 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
3481 Right.Next->is(tok::string_literal))
3482 return true;
3483 } else if (Style.Language == FormatStyle::LK_Cpp ||
3484 Style.Language == FormatStyle::LK_ObjC ||
3485 Style.Language == FormatStyle::LK_Proto ||
3486 Style.Language == FormatStyle::LK_TableGen ||
3487 Style.Language == FormatStyle::LK_TextProto) {
3488 if (Left.isStringLiteral() && Right.isStringLiteral())
3489 return true;
3490 }
3491
3492 // If the last token before a '}', ']', or ')' is a comma or a trailing
3493 // comment, the intention is to insert a line break after it in order to make
3494 // shuffling around entries easier. Import statements, especially in
3495 // JavaScript, can be an exception to this rule.
3496 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
3497 const FormatToken *BeforeClosingBrace = nullptr;
3498 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
3499 (Style.Language == FormatStyle::LK_JavaScript &&
3500 Left.is(tok::l_paren))) &&
3501 Left.BlockKind != BK_Block && Left.MatchingParen)
3502 BeforeClosingBrace = Left.MatchingParen->Previous;
3503 else if (Right.MatchingParen &&
3504 (Right.MatchingParen->isOneOf(tok::l_brace,
3505 TT_ArrayInitializerLSquare) ||
3506 (Style.Language == FormatStyle::LK_JavaScript &&
3507 Right.MatchingParen->is(tok::l_paren))))
3508 BeforeClosingBrace = &Left;
3509 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
3510 BeforeClosingBrace->isTrailingComment()))
3511 return true;
3512 }
3513
3514 if (Right.is(tok::comment))
3515 return Left.BlockKind != BK_BracedInit &&
3516 Left.isNot(TT_CtorInitializerColon) &&
3517 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
3518 if (Left.isTrailingComment())
3519 return true;
3520 if (Right.Previous->IsUnterminatedLiteral)
3521 return true;
3522 if (Right.is(tok::lessless) && Right.Next &&
3523 Right.Previous->is(tok::string_literal) &&
3524 Right.Next->is(tok::string_literal))
3525 return true;
3526 if (Right.Previous->ClosesTemplateDeclaration &&
3527 Right.Previous->MatchingParen &&
3528 Right.Previous->MatchingParen->NestingLevel == 0 &&
3529 Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes)
3530 return true;
3531 if (Right.is(TT_CtorInitializerComma) &&
3532 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
3533 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
3534 return true;
3535 if (Right.is(TT_CtorInitializerColon) &&
3536 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
3537 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
3538 return true;
3539 // Break only if we have multiple inheritance.
3540 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
3541 Right.is(TT_InheritanceComma))
3542 return true;
3543 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
3544 // Multiline raw string literals are special wrt. line breaks. The author
3545 // has made a deliberate choice and might have aligned the contents of the
3546 // string literal accordingly. Thus, we try keep existing line breaks.
3547 return Right.IsMultiline && Right.NewlinesBefore > 0;
3548 if ((Right.Previous->is(tok::l_brace) ||
3549 (Right.Previous->is(tok::less) && Right.Previous->Previous &&
3550 Right.Previous->Previous->is(tok::equal))) &&
3551 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
3552 // Don't put enums or option definitions onto single lines in protocol
3553 // buffers.
3554 return true;
3555 }
3556 if (Right.is(TT_InlineASMBrace))
3557 return Right.HasUnescapedNewline;
3558
3559 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
3560 if (Style.BraceWrapping.BeforeLambdaBody &&
3561 (isAllmanBraceIncludedBreakableLambda(Left, ShortLambdaOption) ||
3562 isAllmanBraceIncludedBreakableLambda(Right, ShortLambdaOption))) {
3563 return true;
3564 }
3565
3566 if (isAllmanBrace(Left) || isAllmanBrace(Right))
3567 return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
3568 (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
3569 Style.BraceWrapping.AfterEnum) ||
3570 (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
3571 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
3572 if (Left.is(TT_ObjCBlockLBrace) &&
3573 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
3574 return true;
3575
3576 if (Left.is(TT_LambdaLBrace)) {
3577 if (IsFunctionArgument(Left) &&
3578 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline)
3579 return false;
3580
3581 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
3582 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
3583 (!Left.Children.empty() &&
3584 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty))
3585 return true;
3586 }
3587
3588 // Put multiple Java annotation on a new line.
3589 if ((Style.Language == FormatStyle::LK_Java ||
3590 Style.Language == FormatStyle::LK_JavaScript) &&
3591 Left.is(TT_LeadingJavaAnnotation) &&
3592 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
3593 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
3594 return true;
3595
3596 if (Right.is(TT_ProtoExtensionLSquare))
3597 return true;
3598
3599 // In text proto instances if a submessage contains at least 2 entries and at
3600 // least one of them is a submessage, like A { ... B { ... } ... },
3601 // put all of the entries of A on separate lines by forcing the selector of
3602 // the submessage B to be put on a newline.
3603 //
3604 // Example: these can stay on one line:
3605 // a { scalar_1: 1 scalar_2: 2 }
3606 // a { b { key: value } }
3607 //
3608 // and these entries need to be on a new line even if putting them all in one
3609 // line is under the column limit:
3610 // a {
3611 // scalar: 1
3612 // b { key: value }
3613 // }
3614 //
3615 // We enforce this by breaking before a submessage field that has previous
3616 // siblings, *and* breaking before a field that follows a submessage field.
3617 //
3618 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
3619 // the TT_SelectorName there, but we don't want to break inside the brackets.
3620 //
3621 // Another edge case is @submessage { key: value }, which is a common
3622 // substitution placeholder. In this case we want to keep `@` and `submessage`
3623 // together.
3624 //
3625 // We ensure elsewhere that extensions are always on their own line.
3626 if ((Style.Language == FormatStyle::LK_Proto ||
3627 Style.Language == FormatStyle::LK_TextProto) &&
3628 Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
3629 // Keep `@submessage` together in:
3630 // @submessage { key: value }
3631 if (Right.Previous && Right.Previous->is(tok::at))
3632 return false;
3633 // Look for the scope opener after selector in cases like:
3634 // selector { ...
3635 // selector: { ...
3636 // selector: @base { ...
3637 FormatToken *LBrace = Right.Next;
3638 if (LBrace && LBrace->is(tok::colon)) {
3639 LBrace = LBrace->Next;
3640 if (LBrace && LBrace->is(tok::at)) {
3641 LBrace = LBrace->Next;
3642 if (LBrace)
3643 LBrace = LBrace->Next;
3644 }
3645 }
3646 if (LBrace &&
3647 // The scope opener is one of {, [, <:
3648 // selector { ... }
3649 // selector [ ... ]
3650 // selector < ... >
3651 //
3652 // In case of selector { ... }, the l_brace is TT_DictLiteral.
3653 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
3654 // so we check for immediately following r_brace.
3655 ((LBrace->is(tok::l_brace) &&
3656 (LBrace->is(TT_DictLiteral) ||
3657 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
3658 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
3659 // If Left.ParameterCount is 0, then this submessage entry is not the
3660 // first in its parent submessage, and we want to break before this entry.
3661 // If Left.ParameterCount is greater than 0, then its parent submessage
3662 // might contain 1 or more entries and we want to break before this entry
3663 // if it contains at least 2 entries. We deal with this case later by
3664 // detecting and breaking before the next entry in the parent submessage.
3665 if (Left.ParameterCount == 0)
3666 return true;
3667 // However, if this submessage is the first entry in its parent
3668 // submessage, Left.ParameterCount might be 1 in some cases.
3669 // We deal with this case later by detecting an entry
3670 // following a closing paren of this submessage.
3671 }
3672
3673 // If this is an entry immediately following a submessage, it will be
3674 // preceded by a closing paren of that submessage, like in:
3675 // left---. .---right
3676 // v v
3677 // sub: { ... } key: value
3678 // If there was a comment between `}` an `key` above, then `key` would be
3679 // put on a new line anyways.
3680 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
3681 return true;
3682 }
3683
3684 // Deal with lambda arguments in C++ - we want consistent line breaks whether
3685 // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
3686 // as aggressive line breaks are placed when the lambda is not the last arg.
3687 if ((Style.Language == FormatStyle::LK_Cpp ||
3688 Style.Language == FormatStyle::LK_ObjC) &&
3689 Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
3690 !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
3691 // Multiple lambdas in the same function call force line breaks.
3692 if (Left.BlockParameterCount > 1)
3693 return true;
3694
3695 // A lambda followed by another arg forces a line break.
3696 if (!Left.Role)
3697 return false;
3698 auto Comma = Left.Role->lastComma();
3699 if (!Comma)
3700 return false;
3701 auto Next = Comma->getNextNonComment();
3702 if (!Next)
3703 return false;
3704 if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
3705 return true;
3706 }
3707
3708 return false;
3709 }
3710
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right)3711 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
3712 const FormatToken &Right) {
3713 const FormatToken &Left = *Right.Previous;
3714 // Language-specific stuff.
3715 if (Style.isCSharp()) {
3716 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
3717 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon))
3718 return false;
3719 // Only break after commas for generic type constraints.
3720 if (Line.First->is(TT_CSharpGenericTypeConstraint))
3721 return Left.is(TT_CSharpGenericTypeConstraintComma);
3722 } else if (Style.Language == FormatStyle::LK_Java) {
3723 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3724 Keywords.kw_implements))
3725 return false;
3726 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3727 Keywords.kw_implements))
3728 return true;
3729 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3730 const FormatToken *NonComment = Right.getPreviousNonComment();
3731 if (NonComment &&
3732 NonComment->isOneOf(
3733 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
3734 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
3735 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
3736 Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
3737 Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
3738 return false; // Otherwise automatic semicolon insertion would trigger.
3739 if (Right.NestingLevel == 0 &&
3740 (Left.Tok.getIdentifierInfo() ||
3741 Left.isOneOf(tok::r_square, tok::r_paren)) &&
3742 Right.isOneOf(tok::l_square, tok::l_paren))
3743 return false; // Otherwise automatic semicolon insertion would trigger.
3744 if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
3745 return false;
3746 if (Left.is(TT_JsTypeColon))
3747 return true;
3748 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
3749 if (Left.is(tok::exclaim) && Right.is(tok::colon))
3750 return false;
3751 // Look for is type annotations like:
3752 // function f(): a is B { ... }
3753 // Do not break before is in these cases.
3754 if (Right.is(Keywords.kw_is)) {
3755 const FormatToken *Next = Right.getNextNonComment();
3756 // If `is` is followed by a colon, it's likely that it's a dict key, so
3757 // ignore it for this check.
3758 // For example this is common in Polymer:
3759 // Polymer({
3760 // is: 'name',
3761 // ...
3762 // });
3763 if (!Next || !Next->is(tok::colon))
3764 return false;
3765 }
3766 if (Left.is(Keywords.kw_in))
3767 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
3768 if (Right.is(Keywords.kw_in))
3769 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
3770 if (Right.is(Keywords.kw_as))
3771 return false; // must not break before as in 'x as type' casts
3772 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
3773 // extends and infer can appear as keywords in conditional types:
3774 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
3775 // do not break before them, as the expressions are subject to ASI.
3776 return false;
3777 }
3778 if (Left.is(Keywords.kw_as))
3779 return true;
3780 if (Left.is(TT_JsNonNullAssertion))
3781 return true;
3782 if (Left.is(Keywords.kw_declare) &&
3783 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
3784 Keywords.kw_function, tok::kw_class, tok::kw_enum,
3785 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
3786 Keywords.kw_let, tok::kw_const))
3787 // See grammar for 'declare' statements at:
3788 // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
3789 return false;
3790 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
3791 Right.isOneOf(tok::identifier, tok::string_literal))
3792 return false; // must not break in "module foo { ...}"
3793 if (Right.is(TT_TemplateString) && Right.closesScope())
3794 return false;
3795 // Don't split tagged template literal so there is a break between the tag
3796 // identifier and template string.
3797 if (Left.is(tok::identifier) && Right.is(TT_TemplateString)) {
3798 return false;
3799 }
3800 if (Left.is(TT_TemplateString) && Left.opensScope())
3801 return true;
3802 }
3803
3804 if (Left.is(tok::at))
3805 return false;
3806 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
3807 return false;
3808 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
3809 return !Right.is(tok::l_paren);
3810 if (Right.is(TT_PointerOrReference))
3811 return Line.IsMultiVariableDeclStmt ||
3812 (Style.PointerAlignment == FormatStyle::PAS_Right &&
3813 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
3814 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3815 Right.is(tok::kw_operator))
3816 return true;
3817 if (Left.is(TT_PointerOrReference))
3818 return false;
3819 if (Right.isTrailingComment())
3820 // We rely on MustBreakBefore being set correctly here as we should not
3821 // change the "binding" behavior of a comment.
3822 // The first comment in a braced lists is always interpreted as belonging to
3823 // the first list element. Otherwise, it should be placed outside of the
3824 // list.
3825 return Left.BlockKind == BK_BracedInit ||
3826 (Left.is(TT_CtorInitializerColon) &&
3827 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
3828 if (Left.is(tok::question) && Right.is(tok::colon))
3829 return false;
3830 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
3831 return Style.BreakBeforeTernaryOperators;
3832 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
3833 return !Style.BreakBeforeTernaryOperators;
3834 if (Left.is(TT_InheritanceColon))
3835 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
3836 if (Right.is(TT_InheritanceColon))
3837 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
3838 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
3839 Left.isNot(TT_SelectorName))
3840 return true;
3841
3842 if (Right.is(tok::colon) &&
3843 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
3844 return false;
3845 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
3846 if (Style.Language == FormatStyle::LK_Proto ||
3847 Style.Language == FormatStyle::LK_TextProto) {
3848 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
3849 return false;
3850 // Prevent cases like:
3851 //
3852 // submessage:
3853 // { key: valueeeeeeeeeeee }
3854 //
3855 // when the snippet does not fit into one line.
3856 // Prefer:
3857 //
3858 // submessage: {
3859 // key: valueeeeeeeeeeee
3860 // }
3861 //
3862 // instead, even if it is longer by one line.
3863 //
3864 // Note that this allows allows the "{" to go over the column limit
3865 // when the column limit is just between ":" and "{", but that does
3866 // not happen too often and alternative formattings in this case are
3867 // not much better.
3868 //
3869 // The code covers the cases:
3870 //
3871 // submessage: { ... }
3872 // submessage: < ... >
3873 // repeated: [ ... ]
3874 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
3875 Right.is(TT_DictLiteral)) ||
3876 Right.is(TT_ArrayInitializerLSquare))
3877 return false;
3878 }
3879 return true;
3880 }
3881 if (Right.is(tok::r_square) && Right.MatchingParen &&
3882 Right.MatchingParen->is(TT_ProtoExtensionLSquare))
3883 return false;
3884 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
3885 Right.Next->is(TT_ObjCMethodExpr)))
3886 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
3887 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
3888 return true;
3889 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
3890 return true;
3891 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
3892 TT_OverloadedOperator))
3893 return false;
3894 if (Left.is(TT_RangeBasedForLoopColon))
3895 return true;
3896 if (Right.is(TT_RangeBasedForLoopColon))
3897 return false;
3898 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
3899 return true;
3900 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
3901 Left.is(tok::kw_operator))
3902 return false;
3903 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
3904 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
3905 return false;
3906 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
3907 !Style.Cpp11BracedListStyle)
3908 return false;
3909 if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
3910 return false;
3911 if (Left.is(tok::l_paren) && Left.Previous &&
3912 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
3913 return false;
3914 if (Right.is(TT_ImplicitStringLiteral))
3915 return false;
3916
3917 if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
3918 return false;
3919 if (Right.is(tok::r_square) && Right.MatchingParen &&
3920 Right.MatchingParen->is(TT_LambdaLSquare))
3921 return false;
3922
3923 // We only break before r_brace if there was a corresponding break before
3924 // the l_brace, which is tracked by BreakBeforeClosingBrace.
3925 if (Right.is(tok::r_brace))
3926 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
3927
3928 // Allow breaking after a trailing annotation, e.g. after a method
3929 // declaration.
3930 if (Left.is(TT_TrailingAnnotation))
3931 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
3932 tok::less, tok::coloncolon);
3933
3934 if (Right.is(tok::kw___attribute) ||
3935 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare)))
3936 return !Left.is(TT_AttributeSquare);
3937
3938 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
3939 return true;
3940
3941 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3942 return true;
3943
3944 if (Left.is(TT_CtorInitializerColon))
3945 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
3946 if (Right.is(TT_CtorInitializerColon))
3947 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
3948 if (Left.is(TT_CtorInitializerComma) &&
3949 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
3950 return false;
3951 if (Right.is(TT_CtorInitializerComma) &&
3952 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
3953 return true;
3954 if (Left.is(TT_InheritanceComma) &&
3955 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
3956 return false;
3957 if (Right.is(TT_InheritanceComma) &&
3958 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
3959 return true;
3960 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
3961 (Left.is(tok::less) && Right.is(tok::less)))
3962 return false;
3963 if (Right.is(TT_BinaryOperator) &&
3964 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
3965 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
3966 Right.getPrecedence() != prec::Assignment))
3967 return true;
3968 if (Left.is(TT_ArrayInitializerLSquare))
3969 return true;
3970 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
3971 return true;
3972 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
3973 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
3974 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
3975 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
3976 Left.getPrecedence() == prec::Assignment))
3977 return true;
3978 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
3979 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare)))
3980 return false;
3981
3982 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
3983 if (Style.BraceWrapping.BeforeLambdaBody) {
3984 if (isAllmanLambdaBrace(Left))
3985 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
3986 if (isAllmanLambdaBrace(Right))
3987 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
3988 }
3989
3990 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
3991 tok::kw_class, tok::kw_struct, tok::comment) ||
3992 Right.isMemberAccess() ||
3993 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
3994 tok::colon, tok::l_square, tok::at) ||
3995 (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) ||
3996 (Left.is(tok::r_paren) &&
3997 Right.isOneOf(tok::identifier, tok::kw_const)) ||
3998 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
3999 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
4000 }
4001
printDebugInfo(const AnnotatedLine & Line)4002 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
4003 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
4004 const FormatToken *Tok = Line.First;
4005 while (Tok) {
4006 llvm::errs() << " M=" << Tok->MustBreakBefore
4007 << " C=" << Tok->CanBreakBefore
4008 << " T=" << getTokenTypeName(Tok->getType())
4009 << " S=" << Tok->SpacesRequiredBefore
4010 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
4011 << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
4012 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
4013 << " PPK=" << Tok->PackingKind << " FakeLParens=";
4014 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
4015 llvm::errs() << Tok->FakeLParens[i] << "/";
4016 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
4017 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
4018 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
4019 if (!Tok->Next)
4020 assert(Tok == Line.Last);
4021 Tok = Tok->Next;
4022 }
4023 llvm::errs() << "----\n";
4024 }
4025
4026 } // namespace format
4027 } // namespace clang
4028