1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
21
22 #define DEBUG_TYPE "format-token-annotator"
23
24 namespace clang {
25 namespace format {
26
27 namespace {
28
29 /// Returns \c true if the line starts with a token that can start a statement
30 /// with an initializer.
startsWithInitStatement(const AnnotatedLine & Line)31 static bool startsWithInitStatement(const AnnotatedLine &Line) {
32 return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
33 Line.startsWith(tok::kw_switch);
34 }
35
36 /// Returns \c true if the token can be used as an identifier in
37 /// an Objective-C \c \@selector, \c false otherwise.
38 ///
39 /// Because getFormattingLangOpts() always lexes source code as
40 /// Objective-C++, C++ keywords like \c new and \c delete are
41 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
42 ///
43 /// For Objective-C and Objective-C++, both identifiers and keywords
44 /// are valid inside @selector(...) (or a macro which
45 /// invokes @selector(...)). So, we allow treat any identifier or
46 /// keyword as a potential Objective-C selector component.
canBeObjCSelectorComponent(const FormatToken & Tok)47 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
48 return Tok.Tok.getIdentifierInfo() != nullptr;
49 }
50
51 /// With `Left` being '(', check if we're at either `[...](` or
52 /// `[...]<...>(`, where the [ opens a lambda capture list.
isLambdaParameterList(const FormatToken * Left)53 static bool isLambdaParameterList(const FormatToken *Left) {
54 // Skip <...> if present.
55 if (Left->Previous && Left->Previous->is(tok::greater) &&
56 Left->Previous->MatchingParen &&
57 Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
58 Left = Left->Previous->MatchingParen;
59 }
60
61 // Check for `[...]`.
62 return Left->Previous && Left->Previous->is(tok::r_square) &&
63 Left->Previous->MatchingParen &&
64 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
65 }
66
67 /// Returns \c true if the token is followed by a boolean condition, \c false
68 /// otherwise.
isKeywordWithCondition(const FormatToken & Tok)69 static bool isKeywordWithCondition(const FormatToken &Tok) {
70 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
71 tok::kw_constexpr, tok::kw_catch);
72 }
73
74 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
isCppAttribute(bool IsCpp,const FormatToken & Tok)75 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
76 if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
77 return false;
78 // The first square bracket is part of an ObjC array literal
79 if (Tok.Previous && Tok.Previous->is(tok::at))
80 return false;
81 const FormatToken *AttrTok = Tok.Next->Next;
82 if (!AttrTok)
83 return false;
84 // C++17 '[[using ns: foo, bar(baz, blech)]]'
85 // We assume nobody will name an ObjC variable 'using'.
86 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
87 return true;
88 if (AttrTok->isNot(tok::identifier))
89 return false;
90 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
91 // ObjC message send. We assume nobody will use : in a C++11 attribute
92 // specifier parameter, although this is technically valid:
93 // [[foo(:)]].
94 if (AttrTok->is(tok::colon) ||
95 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
96 AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
97 return false;
98 }
99 if (AttrTok->is(tok::ellipsis))
100 return true;
101 AttrTok = AttrTok->Next;
102 }
103 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
104 }
105
106 /// A parser that gathers additional information about tokens.
107 ///
108 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
109 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
110 /// into template parameter lists.
111 class AnnotatingParser {
112 public:
AnnotatingParser(const FormatStyle & Style,AnnotatedLine & Line,const AdditionalKeywords & Keywords)113 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
114 const AdditionalKeywords &Keywords)
115 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
116 Keywords(Keywords) {
117 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
118 resetTokenMetadata();
119 }
120
121 private:
parseAngle()122 bool parseAngle() {
123 if (!CurrentToken || !CurrentToken->Previous)
124 return false;
125 if (NonTemplateLess.count(CurrentToken->Previous))
126 return false;
127
128 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
129 if (Previous.Previous) {
130 if (Previous.Previous->Tok.isLiteral())
131 return false;
132 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
133 (!Previous.Previous->MatchingParen ||
134 !Previous.Previous->MatchingParen->is(
135 TT_OverloadedOperatorLParen))) {
136 return false;
137 }
138 }
139
140 FormatToken *Left = CurrentToken->Previous;
141 Left->ParentBracket = Contexts.back().ContextKind;
142 ScopedContextCreator ContextCreator(*this, tok::less, 12);
143
144 // If this angle is in the context of an expression, we need to be more
145 // hesitant to detect it as opening template parameters.
146 bool InExprContext = Contexts.back().IsExpression;
147
148 Contexts.back().IsExpression = false;
149 // If there's a template keyword before the opening angle bracket, this is a
150 // template parameter, not an argument.
151 if (Left->Previous && Left->Previous->isNot(tok::kw_template))
152 Contexts.back().ContextType = Context::TemplateArgument;
153
154 if (Style.Language == FormatStyle::LK_Java &&
155 CurrentToken->is(tok::question)) {
156 next();
157 }
158
159 while (CurrentToken) {
160 if (CurrentToken->is(tok::greater)) {
161 // Try to do a better job at looking for ">>" within the condition of
162 // a statement. Conservatively insert spaces between consecutive ">"
163 // tokens to prevent splitting right bitshift operators and potentially
164 // altering program semantics. This check is overly conservative and
165 // will prevent spaces from being inserted in select nested template
166 // parameter cases, but should not alter program semantics.
167 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
168 Left->ParentBracket != tok::less &&
169 CurrentToken->getStartOfNonWhitespace() ==
170 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
171 -1)) {
172 return false;
173 }
174 Left->MatchingParen = CurrentToken;
175 CurrentToken->MatchingParen = Left;
176 // In TT_Proto, we must distignuish between:
177 // map<key, value>
178 // msg < item: data >
179 // msg: < item: data >
180 // In TT_TextProto, map<key, value> does not occur.
181 if (Style.Language == FormatStyle::LK_TextProto ||
182 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
183 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
184 CurrentToken->setType(TT_DictLiteral);
185 } else {
186 CurrentToken->setType(TT_TemplateCloser);
187 }
188 next();
189 return true;
190 }
191 if (CurrentToken->is(tok::question) &&
192 Style.Language == FormatStyle::LK_Java) {
193 next();
194 continue;
195 }
196 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
197 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
198 !Style.isCSharp() && Style.Language != FormatStyle::LK_Proto &&
199 Style.Language != FormatStyle::LK_TextProto)) {
200 return false;
201 }
202 // If a && or || is found and interpreted as a binary operator, this set
203 // of angles is likely part of something like "a < b && c > d". If the
204 // angles are inside an expression, the ||/&& might also be a binary
205 // operator that was misinterpreted because we are parsing template
206 // parameters.
207 // FIXME: This is getting out of hand, write a decent parser.
208 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
209 CurrentToken->Previous->is(TT_BinaryOperator) &&
210 Contexts[Contexts.size() - 2].IsExpression &&
211 !Line.startsWith(tok::kw_template)) {
212 return false;
213 }
214 updateParameterCount(Left, CurrentToken);
215 if (Style.Language == FormatStyle::LK_Proto) {
216 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
217 if (CurrentToken->is(tok::colon) ||
218 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
219 Previous->isNot(tok::colon))) {
220 Previous->setType(TT_SelectorName);
221 }
222 }
223 }
224 if (!consumeToken())
225 return false;
226 }
227 return false;
228 }
229
parseUntouchableParens()230 bool parseUntouchableParens() {
231 while (CurrentToken) {
232 CurrentToken->Finalized = true;
233 switch (CurrentToken->Tok.getKind()) {
234 case tok::l_paren:
235 next();
236 if (!parseUntouchableParens())
237 return false;
238 continue;
239 case tok::r_paren:
240 next();
241 return true;
242 default:
243 // no-op
244 break;
245 }
246 next();
247 }
248 return false;
249 }
250
parseParens(bool LookForDecls=false)251 bool parseParens(bool LookForDecls = false) {
252 if (!CurrentToken)
253 return false;
254 assert(CurrentToken->Previous && "Unknown previous token");
255 FormatToken &OpeningParen = *CurrentToken->Previous;
256 assert(OpeningParen.is(tok::l_paren));
257 FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
258 OpeningParen.ParentBracket = Contexts.back().ContextKind;
259 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
260
261 // FIXME: This is a bit of a hack. Do better.
262 Contexts.back().ColonIsForRangeExpr =
263 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
264
265 if (OpeningParen.Previous &&
266 OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
267 OpeningParen.Finalized = true;
268 return parseUntouchableParens();
269 }
270
271 bool StartsObjCMethodExpr = false;
272 if (!Style.isVerilog()) {
273 if (FormatToken *MaybeSel = OpeningParen.Previous) {
274 // @selector( starts a selector.
275 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
276 MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
277 StartsObjCMethodExpr = true;
278 }
279 }
280 }
281
282 if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
283 // Find the previous kw_operator token.
284 FormatToken *Prev = &OpeningParen;
285 while (!Prev->is(tok::kw_operator)) {
286 Prev = Prev->Previous;
287 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
288 }
289
290 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
291 // i.e. the operator is called as a member function,
292 // then the argument must be an expression.
293 bool OperatorCalledAsMemberFunction =
294 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
295 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
296 } else if (Style.isJavaScript() &&
297 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
298 Line.startsWith(tok::kw_export, Keywords.kw_type,
299 tok::identifier))) {
300 // type X = (...);
301 // export type X = (...);
302 Contexts.back().IsExpression = false;
303 } else if (OpeningParen.Previous &&
304 (OpeningParen.Previous->isOneOf(tok::kw_static_assert,
305 tok::kw_while, tok::l_paren,
306 tok::comma, TT_BinaryOperator) ||
307 OpeningParen.Previous->isIf())) {
308 // static_assert, if and while usually contain expressions.
309 Contexts.back().IsExpression = true;
310 } else if (Style.isJavaScript() && OpeningParen.Previous &&
311 (OpeningParen.Previous->is(Keywords.kw_function) ||
312 (OpeningParen.Previous->endsSequence(tok::identifier,
313 Keywords.kw_function)))) {
314 // function(...) or function f(...)
315 Contexts.back().IsExpression = false;
316 } else if (Style.isJavaScript() && OpeningParen.Previous &&
317 OpeningParen.Previous->is(TT_JsTypeColon)) {
318 // let x: (SomeType);
319 Contexts.back().IsExpression = false;
320 } else if (isLambdaParameterList(&OpeningParen)) {
321 // This is a parameter list of a lambda expression.
322 Contexts.back().IsExpression = false;
323 } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
324 Contexts.back().IsExpression = false;
325 } else if (OpeningParen.Previous &&
326 OpeningParen.Previous->is(tok::kw__Generic)) {
327 Contexts.back().ContextType = Context::C11GenericSelection;
328 Contexts.back().IsExpression = true;
329 } else if (Line.InPPDirective &&
330 (!OpeningParen.Previous ||
331 !OpeningParen.Previous->is(tok::identifier))) {
332 Contexts.back().IsExpression = true;
333 } else if (Contexts[Contexts.size() - 2].CaretFound) {
334 // This is the parameter list of an ObjC block.
335 Contexts.back().IsExpression = false;
336 } else if (OpeningParen.Previous &&
337 OpeningParen.Previous->is(TT_ForEachMacro)) {
338 // The first argument to a foreach macro is a declaration.
339 Contexts.back().ContextType = Context::ForEachMacro;
340 Contexts.back().IsExpression = false;
341 } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
342 OpeningParen.Previous->MatchingParen->isOneOf(
343 TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
344 Contexts.back().IsExpression = false;
345 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
346 bool IsForOrCatch =
347 OpeningParen.Previous &&
348 OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
349 Contexts.back().IsExpression = !IsForOrCatch;
350 }
351
352 // Infer the role of the l_paren based on the previous token if we haven't
353 // detected one yet.
354 if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
355 if (PrevNonComment->is(tok::kw___attribute)) {
356 OpeningParen.setType(TT_AttributeParen);
357 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
358 tok::kw_typeof,
359 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
360 #include "clang/Basic/TransformTypeTraits.def"
361 tok::kw__Atomic)) {
362 OpeningParen.setType(TT_TypeDeclarationParen);
363 // decltype() and typeof() usually contain expressions.
364 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
365 Contexts.back().IsExpression = true;
366 }
367 }
368
369 if (StartsObjCMethodExpr) {
370 Contexts.back().ColonIsObjCMethodExpr = true;
371 OpeningParen.setType(TT_ObjCMethodExpr);
372 }
373
374 // MightBeFunctionType and ProbablyFunctionType are used for
375 // function pointer and reference types as well as Objective-C
376 // block types:
377 //
378 // void (*FunctionPointer)(void);
379 // void (&FunctionReference)(void);
380 // void (&&FunctionReference)(void);
381 // void (^ObjCBlock)(void);
382 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
383 bool ProbablyFunctionType =
384 CurrentToken->isOneOf(tok::star, tok::amp, tok::ampamp, tok::caret);
385 bool HasMultipleLines = false;
386 bool HasMultipleParametersOnALine = false;
387 bool MightBeObjCForRangeLoop =
388 OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
389 FormatToken *PossibleObjCForInToken = nullptr;
390 while (CurrentToken) {
391 // LookForDecls is set when "if (" has been seen. Check for
392 // 'identifier' '*' 'identifier' followed by not '=' -- this
393 // '*' has to be a binary operator but determineStarAmpUsage() will
394 // categorize it as an unary operator, so set the right type here.
395 if (LookForDecls && CurrentToken->Next) {
396 FormatToken *Prev = CurrentToken->getPreviousNonComment();
397 if (Prev) {
398 FormatToken *PrevPrev = Prev->getPreviousNonComment();
399 FormatToken *Next = CurrentToken->Next;
400 if (PrevPrev && PrevPrev->is(tok::identifier) &&
401 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
402 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
403 Prev->setType(TT_BinaryOperator);
404 LookForDecls = false;
405 }
406 }
407 }
408
409 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
410 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
411 tok::coloncolon)) {
412 ProbablyFunctionType = true;
413 }
414 if (CurrentToken->is(tok::comma))
415 MightBeFunctionType = false;
416 if (CurrentToken->Previous->is(TT_BinaryOperator))
417 Contexts.back().IsExpression = true;
418 if (CurrentToken->is(tok::r_paren)) {
419 if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
420 ProbablyFunctionType && CurrentToken->Next &&
421 (CurrentToken->Next->is(tok::l_paren) ||
422 (CurrentToken->Next->is(tok::l_square) &&
423 Line.MustBeDeclaration))) {
424 OpeningParen.setType(OpeningParen.Next->is(tok::caret)
425 ? TT_ObjCBlockLParen
426 : TT_FunctionTypeLParen);
427 }
428 OpeningParen.MatchingParen = CurrentToken;
429 CurrentToken->MatchingParen = &OpeningParen;
430
431 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
432 OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
433 // Detect the case where macros are used to generate lambdas or
434 // function bodies, e.g.:
435 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
436 for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
437 Tok = Tok->Next) {
438 if (Tok->is(TT_BinaryOperator) &&
439 Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) {
440 Tok->setType(TT_PointerOrReference);
441 }
442 }
443 }
444
445 if (StartsObjCMethodExpr) {
446 CurrentToken->setType(TT_ObjCMethodExpr);
447 if (Contexts.back().FirstObjCSelectorName) {
448 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
449 Contexts.back().LongestObjCSelectorName;
450 }
451 }
452
453 if (OpeningParen.is(TT_AttributeParen))
454 CurrentToken->setType(TT_AttributeParen);
455 if (OpeningParen.is(TT_TypeDeclarationParen))
456 CurrentToken->setType(TT_TypeDeclarationParen);
457 if (OpeningParen.Previous &&
458 OpeningParen.Previous->is(TT_JavaAnnotation)) {
459 CurrentToken->setType(TT_JavaAnnotation);
460 }
461 if (OpeningParen.Previous &&
462 OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
463 CurrentToken->setType(TT_LeadingJavaAnnotation);
464 }
465 if (OpeningParen.Previous &&
466 OpeningParen.Previous->is(TT_AttributeSquare)) {
467 CurrentToken->setType(TT_AttributeSquare);
468 }
469
470 if (!HasMultipleLines)
471 OpeningParen.setPackingKind(PPK_Inconclusive);
472 else if (HasMultipleParametersOnALine)
473 OpeningParen.setPackingKind(PPK_BinPacked);
474 else
475 OpeningParen.setPackingKind(PPK_OnePerLine);
476
477 next();
478 return true;
479 }
480 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
481 return false;
482
483 if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
484 OpeningParen.setType(TT_Unknown);
485 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
486 !CurrentToken->Next->HasUnescapedNewline &&
487 !CurrentToken->Next->isTrailingComment()) {
488 HasMultipleParametersOnALine = true;
489 }
490 bool ProbablyFunctionTypeLParen =
491 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
492 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
493 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
494 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
495 !(CurrentToken->is(tok::l_brace) ||
496 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
497 Contexts.back().IsExpression = false;
498 }
499 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
500 MightBeObjCForRangeLoop = false;
501 if (PossibleObjCForInToken) {
502 PossibleObjCForInToken->setType(TT_Unknown);
503 PossibleObjCForInToken = nullptr;
504 }
505 }
506 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
507 PossibleObjCForInToken = CurrentToken;
508 PossibleObjCForInToken->setType(TT_ObjCForIn);
509 }
510 // When we discover a 'new', we set CanBeExpression to 'false' in order to
511 // parse the type correctly. Reset that after a comma.
512 if (CurrentToken->is(tok::comma))
513 Contexts.back().CanBeExpression = true;
514
515 FormatToken *Tok = CurrentToken;
516 if (!consumeToken())
517 return false;
518 updateParameterCount(&OpeningParen, Tok);
519 if (CurrentToken && CurrentToken->HasUnescapedNewline)
520 HasMultipleLines = true;
521 }
522 return false;
523 }
524
isCSharpAttributeSpecifier(const FormatToken & Tok)525 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
526 if (!Style.isCSharp())
527 return false;
528
529 // `identifier[i]` is not an attribute.
530 if (Tok.Previous && Tok.Previous->is(tok::identifier))
531 return false;
532
533 // Chains of [] in `identifier[i][j][k]` are not attributes.
534 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
535 auto *MatchingParen = Tok.Previous->MatchingParen;
536 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
537 return false;
538 }
539
540 const FormatToken *AttrTok = Tok.Next;
541 if (!AttrTok)
542 return false;
543
544 // Just an empty declaration e.g. string [].
545 if (AttrTok->is(tok::r_square))
546 return false;
547
548 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
549 while (AttrTok && AttrTok->isNot(tok::r_square))
550 AttrTok = AttrTok->Next;
551
552 if (!AttrTok)
553 return false;
554
555 // Allow an attribute to be the only content of a file.
556 AttrTok = AttrTok->Next;
557 if (!AttrTok)
558 return true;
559
560 // Limit this to being an access modifier that follows.
561 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
562 tok::comment, tok::kw_class, tok::kw_static,
563 tok::l_square, Keywords.kw_internal)) {
564 return true;
565 }
566
567 // incase its a [XXX] retval func(....
568 if (AttrTok->Next &&
569 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
570 return true;
571 }
572
573 return false;
574 }
575
isCpp11AttributeSpecifier(const FormatToken & Tok)576 bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
577 return isCppAttribute(Style.isCpp(), Tok);
578 }
579
parseSquare()580 bool parseSquare() {
581 if (!CurrentToken)
582 return false;
583
584 // A '[' could be an index subscript (after an identifier or after
585 // ')' or ']'), it could be the start of an Objective-C method
586 // expression, it could the start of an Objective-C array literal,
587 // or it could be a C++ attribute specifier [[foo::bar]].
588 FormatToken *Left = CurrentToken->Previous;
589 Left->ParentBracket = Contexts.back().ContextKind;
590 FormatToken *Parent = Left->getPreviousNonComment();
591
592 // Cases where '>' is followed by '['.
593 // In C++, this can happen either in array of templates (foo<int>[10])
594 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
595 bool CppArrayTemplates =
596 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
597 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
598 Contexts.back().ContextType == Context::TemplateArgument);
599
600 const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
601 const bool IsCpp11AttributeSpecifier =
602 isCpp11AttributeSpecifier(*Left) || IsInnerSquare;
603
604 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
605 bool IsCSharpAttributeSpecifier =
606 isCSharpAttributeSpecifier(*Left) ||
607 Contexts.back().InCSharpAttributeSpecifier;
608
609 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
610 bool IsCppStructuredBinding = Left->isCppStructuredBinding(Style);
611 bool StartsObjCMethodExpr =
612 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
613 Style.isCpp() && !IsCpp11AttributeSpecifier &&
614 !IsCSharpAttributeSpecifier && Contexts.back().CanBeExpression &&
615 Left->isNot(TT_LambdaLSquare) &&
616 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
617 (!Parent ||
618 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
619 tok::kw_return, tok::kw_throw) ||
620 Parent->isUnaryOperator() ||
621 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
622 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
623 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
624 prec::Unknown));
625 bool ColonFound = false;
626
627 unsigned BindingIncrease = 1;
628 if (IsCppStructuredBinding) {
629 Left->setType(TT_StructuredBindingLSquare);
630 } else if (Left->is(TT_Unknown)) {
631 if (StartsObjCMethodExpr) {
632 Left->setType(TT_ObjCMethodExpr);
633 } else if (InsideInlineASM) {
634 Left->setType(TT_InlineASMSymbolicNameLSquare);
635 } else if (IsCpp11AttributeSpecifier) {
636 Left->setType(TT_AttributeSquare);
637 if (!IsInnerSquare && Left->Previous)
638 Left->Previous->EndsCppAttributeGroup = false;
639 } else if (Style.isJavaScript() && Parent &&
640 Contexts.back().ContextKind == tok::l_brace &&
641 Parent->isOneOf(tok::l_brace, tok::comma)) {
642 Left->setType(TT_JsComputedPropertyName);
643 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
644 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
645 Left->setType(TT_DesignatedInitializerLSquare);
646 } else if (IsCSharpAttributeSpecifier) {
647 Left->setType(TT_AttributeSquare);
648 } else if (CurrentToken->is(tok::r_square) && Parent &&
649 Parent->is(TT_TemplateCloser)) {
650 Left->setType(TT_ArraySubscriptLSquare);
651 } else if (Style.Language == FormatStyle::LK_Proto ||
652 Style.Language == FormatStyle::LK_TextProto) {
653 // Square braces in LK_Proto can either be message field attributes:
654 //
655 // optional Aaa aaa = 1 [
656 // (aaa) = aaa
657 // ];
658 //
659 // extensions 123 [
660 // (aaa) = aaa
661 // ];
662 //
663 // or text proto extensions (in options):
664 //
665 // option (Aaa.options) = {
666 // [type.type/type] {
667 // key: value
668 // }
669 // }
670 //
671 // or repeated fields (in options):
672 //
673 // option (Aaa.options) = {
674 // keys: [ 1, 2, 3 ]
675 // }
676 //
677 // In the first and the third case we want to spread the contents inside
678 // the square braces; in the second we want to keep them inline.
679 Left->setType(TT_ArrayInitializerLSquare);
680 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
681 tok::equal) &&
682 !Left->endsSequence(tok::l_square, tok::numeric_constant,
683 tok::identifier) &&
684 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
685 Left->setType(TT_ProtoExtensionLSquare);
686 BindingIncrease = 10;
687 }
688 } else if (!CppArrayTemplates && Parent &&
689 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
690 tok::comma, tok::l_paren, tok::l_square,
691 tok::question, tok::colon, tok::kw_return,
692 // Should only be relevant to JavaScript:
693 tok::kw_default)) {
694 Left->setType(TT_ArrayInitializerLSquare);
695 } else {
696 BindingIncrease = 10;
697 Left->setType(TT_ArraySubscriptLSquare);
698 }
699 }
700
701 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
702 Contexts.back().IsExpression = true;
703 if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
704 Contexts.back().IsExpression = false;
705
706 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
707 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
708 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
709
710 while (CurrentToken) {
711 if (CurrentToken->is(tok::r_square)) {
712 if (IsCpp11AttributeSpecifier) {
713 CurrentToken->setType(TT_AttributeSquare);
714 if (!IsInnerSquare)
715 CurrentToken->EndsCppAttributeGroup = true;
716 }
717 if (IsCSharpAttributeSpecifier) {
718 CurrentToken->setType(TT_AttributeSquare);
719 } else if (((CurrentToken->Next &&
720 CurrentToken->Next->is(tok::l_paren)) ||
721 (CurrentToken->Previous &&
722 CurrentToken->Previous->Previous == Left)) &&
723 Left->is(TT_ObjCMethodExpr)) {
724 // An ObjC method call is rarely followed by an open parenthesis. It
725 // also can't be composed of just one token, unless it's a macro that
726 // will be expanded to more tokens.
727 // FIXME: Do we incorrectly label ":" with this?
728 StartsObjCMethodExpr = false;
729 Left->setType(TT_Unknown);
730 }
731 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
732 CurrentToken->setType(TT_ObjCMethodExpr);
733 // If we haven't seen a colon yet, make sure the last identifier
734 // before the r_square is tagged as a selector name component.
735 if (!ColonFound && CurrentToken->Previous &&
736 CurrentToken->Previous->is(TT_Unknown) &&
737 canBeObjCSelectorComponent(*CurrentToken->Previous)) {
738 CurrentToken->Previous->setType(TT_SelectorName);
739 }
740 // determineStarAmpUsage() thinks that '*' '[' is allocating an
741 // array of pointers, but if '[' starts a selector then '*' is a
742 // binary operator.
743 if (Parent && Parent->is(TT_PointerOrReference))
744 Parent->overwriteFixedType(TT_BinaryOperator);
745 }
746 // An arrow after an ObjC method expression is not a lambda arrow.
747 if (CurrentToken->getType() == TT_ObjCMethodExpr &&
748 CurrentToken->Next && CurrentToken->Next->is(TT_LambdaArrow)) {
749 CurrentToken->Next->overwriteFixedType(TT_Unknown);
750 }
751 Left->MatchingParen = CurrentToken;
752 CurrentToken->MatchingParen = Left;
753 // FirstObjCSelectorName is set when a colon is found. This does
754 // not work, however, when the method has no parameters.
755 // Here, we set FirstObjCSelectorName when the end of the method call is
756 // reached, in case it was not set already.
757 if (!Contexts.back().FirstObjCSelectorName) {
758 FormatToken *Previous = CurrentToken->getPreviousNonComment();
759 if (Previous && Previous->is(TT_SelectorName)) {
760 Previous->ObjCSelectorNameParts = 1;
761 Contexts.back().FirstObjCSelectorName = Previous;
762 }
763 } else {
764 Left->ParameterCount =
765 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
766 }
767 if (Contexts.back().FirstObjCSelectorName) {
768 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
769 Contexts.back().LongestObjCSelectorName;
770 if (Left->BlockParameterCount > 1)
771 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
772 }
773 next();
774 return true;
775 }
776 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
777 return false;
778 if (CurrentToken->is(tok::colon)) {
779 if (IsCpp11AttributeSpecifier &&
780 CurrentToken->endsSequence(tok::colon, tok::identifier,
781 tok::kw_using)) {
782 // Remember that this is a [[using ns: foo]] C++ attribute, so we
783 // don't add a space before the colon (unlike other colons).
784 CurrentToken->setType(TT_AttributeColon);
785 } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
786 Left->isOneOf(TT_ArraySubscriptLSquare,
787 TT_DesignatedInitializerLSquare)) {
788 Left->setType(TT_ObjCMethodExpr);
789 StartsObjCMethodExpr = true;
790 Contexts.back().ColonIsObjCMethodExpr = true;
791 if (Parent && Parent->is(tok::r_paren)) {
792 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
793 Parent->setType(TT_CastRParen);
794 }
795 }
796 ColonFound = true;
797 }
798 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
799 !ColonFound) {
800 Left->setType(TT_ArrayInitializerLSquare);
801 }
802 FormatToken *Tok = CurrentToken;
803 if (!consumeToken())
804 return false;
805 updateParameterCount(Left, Tok);
806 }
807 return false;
808 }
809
couldBeInStructArrayInitializer() const810 bool couldBeInStructArrayInitializer() const {
811 if (Contexts.size() < 2)
812 return false;
813 // We want to back up no more then 2 context levels i.e.
814 // . { { <-
815 const auto End = std::next(Contexts.rbegin(), 2);
816 auto Last = Contexts.rbegin();
817 unsigned Depth = 0;
818 for (; Last != End; ++Last)
819 if (Last->ContextKind == tok::l_brace)
820 ++Depth;
821 return Depth == 2 && Last->ContextKind != tok::l_brace;
822 }
823
parseBrace()824 bool parseBrace() {
825 if (!CurrentToken)
826 return true;
827
828 assert(CurrentToken->Previous);
829 FormatToken &OpeningBrace = *CurrentToken->Previous;
830 assert(OpeningBrace.is(tok::l_brace));
831 OpeningBrace.ParentBracket = Contexts.back().ContextKind;
832
833 if (Contexts.back().CaretFound)
834 OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
835 Contexts.back().CaretFound = false;
836
837 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
838 Contexts.back().ColonIsDictLiteral = true;
839 if (OpeningBrace.is(BK_BracedInit))
840 Contexts.back().IsExpression = true;
841 if (Style.isJavaScript() && OpeningBrace.Previous &&
842 OpeningBrace.Previous->is(TT_JsTypeColon)) {
843 Contexts.back().IsExpression = false;
844 }
845
846 unsigned CommaCount = 0;
847 while (CurrentToken) {
848 if (CurrentToken->is(tok::r_brace)) {
849 assert(OpeningBrace.Optional == CurrentToken->Optional);
850 OpeningBrace.MatchingParen = CurrentToken;
851 CurrentToken->MatchingParen = &OpeningBrace;
852 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
853 if (OpeningBrace.ParentBracket == tok::l_brace &&
854 couldBeInStructArrayInitializer() && CommaCount > 0) {
855 Contexts.back().ContextType = Context::StructArrayInitializer;
856 }
857 }
858 next();
859 return true;
860 }
861 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
862 return false;
863 updateParameterCount(&OpeningBrace, CurrentToken);
864 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
865 FormatToken *Previous = CurrentToken->getPreviousNonComment();
866 if (Previous->is(TT_JsTypeOptionalQuestion))
867 Previous = Previous->getPreviousNonComment();
868 if ((CurrentToken->is(tok::colon) &&
869 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
870 Style.Language == FormatStyle::LK_Proto ||
871 Style.Language == FormatStyle::LK_TextProto) {
872 OpeningBrace.setType(TT_DictLiteral);
873 if (Previous->Tok.getIdentifierInfo() ||
874 Previous->is(tok::string_literal)) {
875 Previous->setType(TT_SelectorName);
876 }
877 }
878 if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown))
879 OpeningBrace.setType(TT_DictLiteral);
880 else if (Style.isJavaScript())
881 OpeningBrace.overwriteFixedType(TT_DictLiteral);
882 }
883 if (CurrentToken->is(tok::comma)) {
884 if (Style.isJavaScript())
885 OpeningBrace.overwriteFixedType(TT_DictLiteral);
886 ++CommaCount;
887 }
888 if (!consumeToken())
889 return false;
890 }
891 return true;
892 }
893
updateParameterCount(FormatToken * Left,FormatToken * Current)894 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
895 // For ObjC methods, the number of parameters is calculated differently as
896 // method declarations have a different structure (the parameters are not
897 // inside a bracket scope).
898 if (Current->is(tok::l_brace) && Current->is(BK_Block))
899 ++Left->BlockParameterCount;
900 if (Current->is(tok::comma)) {
901 ++Left->ParameterCount;
902 if (!Left->Role)
903 Left->Role.reset(new CommaSeparatedList(Style));
904 Left->Role->CommaFound(Current);
905 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
906 Left->ParameterCount = 1;
907 }
908 }
909
parseConditional()910 bool parseConditional() {
911 while (CurrentToken) {
912 if (CurrentToken->is(tok::colon)) {
913 CurrentToken->setType(TT_ConditionalExpr);
914 next();
915 return true;
916 }
917 if (!consumeToken())
918 return false;
919 }
920 return false;
921 }
922
parseTemplateDeclaration()923 bool parseTemplateDeclaration() {
924 if (CurrentToken && CurrentToken->is(tok::less)) {
925 CurrentToken->setType(TT_TemplateOpener);
926 next();
927 if (!parseAngle())
928 return false;
929 if (CurrentToken)
930 CurrentToken->Previous->ClosesTemplateDeclaration = true;
931 return true;
932 }
933 return false;
934 }
935
consumeToken()936 bool consumeToken() {
937 FormatToken *Tok = CurrentToken;
938 next();
939 // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
940 // operators.
941 if (Tok->is(TT_VerilogTableItem))
942 return true;
943 switch (Tok->Tok.getKind()) {
944 case tok::plus:
945 case tok::minus:
946 if (!Tok->Previous && Line.MustBeDeclaration)
947 Tok->setType(TT_ObjCMethodSpecifier);
948 break;
949 case tok::colon:
950 if (!Tok->Previous)
951 return false;
952 // Colons from ?: are handled in parseConditional().
953 if (Style.isJavaScript()) {
954 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
955 (Contexts.size() == 1 && // switch/case labels
956 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
957 Contexts.back().ContextKind == tok::l_paren || // function params
958 Contexts.back().ContextKind == tok::l_square || // array type
959 (!Contexts.back().IsExpression &&
960 Contexts.back().ContextKind == tok::l_brace) || // object type
961 (Contexts.size() == 1 &&
962 Line.MustBeDeclaration)) { // method/property declaration
963 Contexts.back().IsExpression = false;
964 Tok->setType(TT_JsTypeColon);
965 break;
966 }
967 } else if (Style.isCSharp()) {
968 if (Contexts.back().InCSharpAttributeSpecifier) {
969 Tok->setType(TT_AttributeColon);
970 break;
971 }
972 if (Contexts.back().ContextKind == tok::l_paren) {
973 Tok->setType(TT_CSharpNamedArgumentColon);
974 break;
975 }
976 } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
977 // The distribution weight operators are labeled
978 // TT_BinaryOperator by the lexer.
979 if (Keywords.isVerilogEnd(*Tok->Previous) ||
980 Keywords.isVerilogBegin(*Tok->Previous)) {
981 Tok->setType(TT_VerilogBlockLabelColon);
982 } else if (Contexts.back().ContextKind == tok::l_square) {
983 Tok->setType(TT_BitFieldColon);
984 } else if (Contexts.back().ColonIsDictLiteral) {
985 Tok->setType(TT_DictLiteral);
986 } else if (Contexts.size() == 1) {
987 // In Verilog a case label doesn't have the case keyword. We
988 // assume a colon following an expression is a case label.
989 // Colons from ?: are annotated in parseConditional().
990 Tok->setType(TT_GotoLabelColon);
991 if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
992 --Line.Level;
993 }
994 break;
995 }
996 if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
997 Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
998 Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
999 Tok->setType(TT_ModulePartitionColon);
1000 } else if (Contexts.back().ColonIsDictLiteral ||
1001 Style.Language == FormatStyle::LK_Proto ||
1002 Style.Language == FormatStyle::LK_TextProto) {
1003 Tok->setType(TT_DictLiteral);
1004 if (Style.Language == FormatStyle::LK_TextProto) {
1005 if (FormatToken *Previous = Tok->getPreviousNonComment())
1006 Previous->setType(TT_SelectorName);
1007 }
1008 } else if (Contexts.back().ColonIsObjCMethodExpr ||
1009 Line.startsWith(TT_ObjCMethodSpecifier)) {
1010 Tok->setType(TT_ObjCMethodExpr);
1011 const FormatToken *BeforePrevious = Tok->Previous->Previous;
1012 // Ensure we tag all identifiers in method declarations as
1013 // TT_SelectorName.
1014 bool UnknownIdentifierInMethodDeclaration =
1015 Line.startsWith(TT_ObjCMethodSpecifier) &&
1016 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1017 if (!BeforePrevious ||
1018 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1019 !(BeforePrevious->is(TT_CastRParen) ||
1020 (BeforePrevious->is(TT_ObjCMethodExpr) &&
1021 BeforePrevious->is(tok::colon))) ||
1022 BeforePrevious->is(tok::r_square) ||
1023 Contexts.back().LongestObjCSelectorName == 0 ||
1024 UnknownIdentifierInMethodDeclaration) {
1025 Tok->Previous->setType(TT_SelectorName);
1026 if (!Contexts.back().FirstObjCSelectorName) {
1027 Contexts.back().FirstObjCSelectorName = Tok->Previous;
1028 } else if (Tok->Previous->ColumnWidth >
1029 Contexts.back().LongestObjCSelectorName) {
1030 Contexts.back().LongestObjCSelectorName =
1031 Tok->Previous->ColumnWidth;
1032 }
1033 Tok->Previous->ParameterIndex =
1034 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1035 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1036 }
1037 } else if (Contexts.back().ColonIsForRangeExpr) {
1038 Tok->setType(TT_RangeBasedForLoopColon);
1039 } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1040 Tok->setType(TT_GenericSelectionColon);
1041 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1042 Tok->setType(TT_BitFieldColon);
1043 } else if (Contexts.size() == 1 &&
1044 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1045 tok::kw_default)) {
1046 FormatToken *Prev = Tok->getPreviousNonComment();
1047 if (!Prev)
1048 break;
1049 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1050 Prev->ClosesRequiresClause) {
1051 Tok->setType(TT_CtorInitializerColon);
1052 } else if (Prev->is(tok::kw_try)) {
1053 // Member initializer list within function try block.
1054 FormatToken *PrevPrev = Prev->getPreviousNonComment();
1055 if (!PrevPrev)
1056 break;
1057 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1058 Tok->setType(TT_CtorInitializerColon);
1059 } else {
1060 Tok->setType(TT_InheritanceColon);
1061 }
1062 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1063 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1064 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1065 Tok->Next->Next->is(tok::colon)))) {
1066 // This handles a special macro in ObjC code where selectors including
1067 // the colon are passed as macro arguments.
1068 Tok->setType(TT_ObjCMethodExpr);
1069 } else if (Contexts.back().ContextKind == tok::l_paren &&
1070 !Line.InPragmaDirective) {
1071 Tok->setType(TT_InlineASMColon);
1072 }
1073 break;
1074 case tok::pipe:
1075 case tok::amp:
1076 // | and & in declarations/type expressions represent union and
1077 // intersection types, respectively.
1078 if (Style.isJavaScript() && !Contexts.back().IsExpression)
1079 Tok->setType(TT_JsTypeOperator);
1080 break;
1081 case tok::kw_if:
1082 if (CurrentToken &&
1083 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1084 next();
1085 }
1086 [[fallthrough]];
1087 case tok::kw_while:
1088 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1089 next();
1090 if (!parseParens(/*LookForDecls=*/true))
1091 return false;
1092 }
1093 break;
1094 case tok::kw_for:
1095 if (Style.isJavaScript()) {
1096 // x.for and {for: ...}
1097 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1098 (Tok->Next && Tok->Next->is(tok::colon))) {
1099 break;
1100 }
1101 // JS' for await ( ...
1102 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1103 next();
1104 }
1105 if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await))
1106 next();
1107 Contexts.back().ColonIsForRangeExpr = true;
1108 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1109 return false;
1110 next();
1111 if (!parseParens())
1112 return false;
1113 break;
1114 case tok::l_paren:
1115 // When faced with 'operator()()', the kw_operator handler incorrectly
1116 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1117 // the first two parens OverloadedOperators and the second l_paren an
1118 // OverloadedOperatorLParen.
1119 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1120 Tok->Previous->MatchingParen &&
1121 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1122 Tok->Previous->setType(TT_OverloadedOperator);
1123 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1124 Tok->setType(TT_OverloadedOperatorLParen);
1125 }
1126
1127 if (!parseParens())
1128 return false;
1129 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1130 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1131 !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen) &&
1132 (!Tok->Previous ||
1133 !Tok->Previous->isOneOf(tok::kw___attribute, TT_RequiresClause,
1134 TT_LeadingJavaAnnotation))) {
1135 Line.MightBeFunctionDecl = true;
1136 }
1137 break;
1138 case tok::l_square:
1139 if (!parseSquare())
1140 return false;
1141 break;
1142 case tok::l_brace:
1143 if (Style.Language == FormatStyle::LK_TextProto) {
1144 FormatToken *Previous = Tok->getPreviousNonComment();
1145 if (Previous && Previous->getType() != TT_DictLiteral)
1146 Previous->setType(TT_SelectorName);
1147 }
1148 if (!parseBrace())
1149 return false;
1150 break;
1151 case tok::less:
1152 if (parseAngle()) {
1153 Tok->setType(TT_TemplateOpener);
1154 // In TT_Proto, we must distignuish between:
1155 // map<key, value>
1156 // msg < item: data >
1157 // msg: < item: data >
1158 // In TT_TextProto, map<key, value> does not occur.
1159 if (Style.Language == FormatStyle::LK_TextProto ||
1160 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1161 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1162 Tok->setType(TT_DictLiteral);
1163 FormatToken *Previous = Tok->getPreviousNonComment();
1164 if (Previous && Previous->getType() != TT_DictLiteral)
1165 Previous->setType(TT_SelectorName);
1166 }
1167 } else {
1168 Tok->setType(TT_BinaryOperator);
1169 NonTemplateLess.insert(Tok);
1170 CurrentToken = Tok;
1171 next();
1172 }
1173 break;
1174 case tok::r_paren:
1175 case tok::r_square:
1176 return false;
1177 case tok::r_brace:
1178 // Lines can start with '}'.
1179 if (Tok->Previous)
1180 return false;
1181 break;
1182 case tok::greater:
1183 if (Style.Language != FormatStyle::LK_TextProto)
1184 Tok->setType(TT_BinaryOperator);
1185 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1186 Tok->SpacesRequiredBefore = 1;
1187 break;
1188 case tok::kw_operator:
1189 if (Style.Language == FormatStyle::LK_TextProto ||
1190 Style.Language == FormatStyle::LK_Proto) {
1191 break;
1192 }
1193 while (CurrentToken &&
1194 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1195 if (CurrentToken->isOneOf(tok::star, tok::amp))
1196 CurrentToken->setType(TT_PointerOrReference);
1197 consumeToken();
1198 if (!CurrentToken)
1199 continue;
1200 if (CurrentToken->is(tok::comma) &&
1201 CurrentToken->Previous->isNot(tok::kw_operator)) {
1202 break;
1203 }
1204 if (CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
1205 tok::comma, tok::star, tok::arrow,
1206 tok::amp, tok::ampamp) ||
1207 // User defined literal.
1208 CurrentToken->Previous->TokenText.startswith("\"\"")) {
1209 CurrentToken->Previous->setType(TT_OverloadedOperator);
1210 }
1211 }
1212 if (CurrentToken && CurrentToken->is(tok::l_paren))
1213 CurrentToken->setType(TT_OverloadedOperatorLParen);
1214 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1215 CurrentToken->Previous->setType(TT_OverloadedOperator);
1216 break;
1217 case tok::question:
1218 if (Style.isJavaScript() && Tok->Next &&
1219 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1220 tok::r_brace)) {
1221 // Question marks before semicolons, colons, etc. indicate optional
1222 // types (fields, parameters), e.g.
1223 // function(x?: string, y?) {...}
1224 // class X { y?; }
1225 Tok->setType(TT_JsTypeOptionalQuestion);
1226 break;
1227 }
1228 // Declarations cannot be conditional expressions, this can only be part
1229 // of a type declaration.
1230 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1231 Style.isJavaScript()) {
1232 break;
1233 }
1234 if (Style.isCSharp()) {
1235 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1236 // nullable types.
1237 // Line.MustBeDeclaration will be true for `Type? name;`.
1238 if ((!Contexts.back().IsExpression && Line.MustBeDeclaration) ||
1239 (Tok->Next && Tok->Next->isOneOf(tok::r_paren, tok::greater)) ||
1240 (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1241 Tok->Next->Next->is(tok::equal))) {
1242 Tok->setType(TT_CSharpNullable);
1243 break;
1244 }
1245 }
1246 parseConditional();
1247 break;
1248 case tok::kw_template:
1249 parseTemplateDeclaration();
1250 break;
1251 case tok::comma:
1252 switch (Contexts.back().ContextType) {
1253 case Context::CtorInitializer:
1254 Tok->setType(TT_CtorInitializerComma);
1255 break;
1256 case Context::InheritanceList:
1257 Tok->setType(TT_InheritanceComma);
1258 break;
1259 default:
1260 if (Contexts.back().FirstStartOfName &&
1261 (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1262 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1263 Line.IsMultiVariableDeclStmt = true;
1264 }
1265 break;
1266 }
1267 if (Contexts.back().ContextType == Context::ForEachMacro)
1268 Contexts.back().IsExpression = true;
1269 break;
1270 case tok::kw_default:
1271 // Unindent case labels.
1272 if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1273 (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1274 --Line.Level;
1275 }
1276 break;
1277 case tok::identifier:
1278 if (Tok->isOneOf(Keywords.kw___has_include,
1279 Keywords.kw___has_include_next)) {
1280 parseHasInclude();
1281 }
1282 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1283 Tok->Next->isNot(tok::l_paren)) {
1284 Tok->setType(TT_CSharpGenericTypeConstraint);
1285 parseCSharpGenericTypeConstraint();
1286 if (Tok->getPreviousNonComment() == nullptr)
1287 Line.IsContinuation = true;
1288 }
1289 break;
1290 case tok::arrow:
1291 if (Tok->isNot(TT_LambdaArrow) && Tok->Previous &&
1292 Tok->Previous->is(tok::kw_noexcept)) {
1293 Tok->setType(TT_TrailingReturnArrow);
1294 }
1295 break;
1296 case tok::eof:
1297 if (Style.InsertNewlineAtEOF && Tok->NewlinesBefore == 0)
1298 Tok->NewlinesBefore = 1;
1299 break;
1300 default:
1301 break;
1302 }
1303 return true;
1304 }
1305
parseCSharpGenericTypeConstraint()1306 void parseCSharpGenericTypeConstraint() {
1307 int OpenAngleBracketsCount = 0;
1308 while (CurrentToken) {
1309 if (CurrentToken->is(tok::less)) {
1310 // parseAngle is too greedy and will consume the whole line.
1311 CurrentToken->setType(TT_TemplateOpener);
1312 ++OpenAngleBracketsCount;
1313 next();
1314 } else if (CurrentToken->is(tok::greater)) {
1315 CurrentToken->setType(TT_TemplateCloser);
1316 --OpenAngleBracketsCount;
1317 next();
1318 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1319 // We allow line breaks after GenericTypeConstraintComma's
1320 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1321 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1322 next();
1323 } else if (CurrentToken->is(Keywords.kw_where)) {
1324 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1325 next();
1326 } else if (CurrentToken->is(tok::colon)) {
1327 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1328 next();
1329 } else {
1330 next();
1331 }
1332 }
1333 }
1334
parseIncludeDirective()1335 void parseIncludeDirective() {
1336 if (CurrentToken && CurrentToken->is(tok::less)) {
1337 next();
1338 while (CurrentToken) {
1339 // Mark tokens up to the trailing line comments as implicit string
1340 // literals.
1341 if (CurrentToken->isNot(tok::comment) &&
1342 !CurrentToken->TokenText.startswith("//")) {
1343 CurrentToken->setType(TT_ImplicitStringLiteral);
1344 }
1345 next();
1346 }
1347 }
1348 }
1349
parseWarningOrError()1350 void parseWarningOrError() {
1351 next();
1352 // We still want to format the whitespace left of the first token of the
1353 // warning or error.
1354 next();
1355 while (CurrentToken) {
1356 CurrentToken->setType(TT_ImplicitStringLiteral);
1357 next();
1358 }
1359 }
1360
parsePragma()1361 void parsePragma() {
1362 next(); // Consume "pragma".
1363 if (CurrentToken &&
1364 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1365 Keywords.kw_region)) {
1366 bool IsMarkOrRegion =
1367 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1368 next();
1369 next(); // Consume first token (so we fix leading whitespace).
1370 while (CurrentToken) {
1371 if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1372 CurrentToken->setType(TT_ImplicitStringLiteral);
1373 next();
1374 }
1375 }
1376 }
1377
parseHasInclude()1378 void parseHasInclude() {
1379 if (!CurrentToken || !CurrentToken->is(tok::l_paren))
1380 return;
1381 next(); // '('
1382 parseIncludeDirective();
1383 next(); // ')'
1384 }
1385
parsePreprocessorDirective()1386 LineType parsePreprocessorDirective() {
1387 bool IsFirstToken = CurrentToken->IsFirst;
1388 LineType Type = LT_PreprocessorDirective;
1389 next();
1390 if (!CurrentToken)
1391 return Type;
1392
1393 if (Style.isJavaScript() && IsFirstToken) {
1394 // JavaScript files can contain shebang lines of the form:
1395 // #!/usr/bin/env node
1396 // Treat these like C++ #include directives.
1397 while (CurrentToken) {
1398 // Tokens cannot be comments here.
1399 CurrentToken->setType(TT_ImplicitStringLiteral);
1400 next();
1401 }
1402 return LT_ImportStatement;
1403 }
1404
1405 if (CurrentToken->is(tok::numeric_constant)) {
1406 CurrentToken->SpacesRequiredBefore = 1;
1407 return Type;
1408 }
1409 // Hashes in the middle of a line can lead to any strange token
1410 // sequence.
1411 if (!CurrentToken->Tok.getIdentifierInfo())
1412 return Type;
1413 // In Verilog macro expansions start with a backtick just like preprocessor
1414 // directives. Thus we stop if the word is not a preprocessor directive.
1415 if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1416 return LT_Invalid;
1417 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1418 case tok::pp_include:
1419 case tok::pp_include_next:
1420 case tok::pp_import:
1421 next();
1422 parseIncludeDirective();
1423 Type = LT_ImportStatement;
1424 break;
1425 case tok::pp_error:
1426 case tok::pp_warning:
1427 parseWarningOrError();
1428 break;
1429 case tok::pp_pragma:
1430 parsePragma();
1431 break;
1432 case tok::pp_if:
1433 case tok::pp_elif:
1434 Contexts.back().IsExpression = true;
1435 next();
1436 parseLine();
1437 break;
1438 default:
1439 break;
1440 }
1441 while (CurrentToken) {
1442 FormatToken *Tok = CurrentToken;
1443 next();
1444 if (Tok->is(tok::l_paren)) {
1445 parseParens();
1446 } else if (Tok->isOneOf(Keywords.kw___has_include,
1447 Keywords.kw___has_include_next)) {
1448 parseHasInclude();
1449 }
1450 }
1451 return Type;
1452 }
1453
1454 public:
parseLine()1455 LineType parseLine() {
1456 if (!CurrentToken)
1457 return LT_Invalid;
1458 NonTemplateLess.clear();
1459 if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1460 // We were not yet allowed to use C++17 optional when this was being
1461 // written. So we used LT_Invalid to mark that the line is not a
1462 // preprocessor directive.
1463 auto Type = parsePreprocessorDirective();
1464 if (Type != LT_Invalid)
1465 return Type;
1466 }
1467
1468 // Directly allow to 'import <string-literal>' to support protocol buffer
1469 // definitions (github.com/google/protobuf) or missing "#" (either way we
1470 // should not break the line).
1471 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1472 if ((Style.Language == FormatStyle::LK_Java &&
1473 CurrentToken->is(Keywords.kw_package)) ||
1474 (!Style.isVerilog() && Info &&
1475 Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1476 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1477 tok::kw_static))) {
1478 next();
1479 parseIncludeDirective();
1480 return LT_ImportStatement;
1481 }
1482
1483 // If this line starts and ends in '<' and '>', respectively, it is likely
1484 // part of "#define <a/b.h>".
1485 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1486 parseIncludeDirective();
1487 return LT_ImportStatement;
1488 }
1489
1490 // In .proto files, top-level options and package statements are very
1491 // similar to import statements and should not be line-wrapped.
1492 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1493 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1494 next();
1495 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1496 while (CurrentToken)
1497 next();
1498 return LT_ImportStatement;
1499 }
1500 }
1501
1502 bool KeywordVirtualFound = false;
1503 bool ImportStatement = false;
1504
1505 // import {...} from '...';
1506 if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1507 ImportStatement = true;
1508
1509 while (CurrentToken) {
1510 if (CurrentToken->is(tok::kw_virtual))
1511 KeywordVirtualFound = true;
1512 if (Style.isJavaScript()) {
1513 // export {...} from '...';
1514 // An export followed by "from 'some string';" is a re-export from
1515 // another module identified by a URI and is treated as a
1516 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1517 // Just "export {...};" or "export class ..." should not be treated as
1518 // an import in this sense.
1519 if (Line.First->is(tok::kw_export) &&
1520 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1521 CurrentToken->Next->isStringLiteral()) {
1522 ImportStatement = true;
1523 }
1524 if (isClosureImportStatement(*CurrentToken))
1525 ImportStatement = true;
1526 }
1527 if (!consumeToken())
1528 return LT_Invalid;
1529 }
1530 if (KeywordVirtualFound)
1531 return LT_VirtualFunctionDecl;
1532 if (ImportStatement)
1533 return LT_ImportStatement;
1534
1535 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1536 if (Contexts.back().FirstObjCSelectorName) {
1537 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1538 Contexts.back().LongestObjCSelectorName;
1539 }
1540 return LT_ObjCMethodDecl;
1541 }
1542
1543 for (const auto &ctx : Contexts)
1544 if (ctx.ContextType == Context::StructArrayInitializer)
1545 return LT_ArrayOfStructInitializer;
1546
1547 return LT_Other;
1548 }
1549
1550 private:
isClosureImportStatement(const FormatToken & Tok)1551 bool isClosureImportStatement(const FormatToken &Tok) {
1552 // FIXME: Closure-library specific stuff should not be hard-coded but be
1553 // configurable.
1554 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1555 Tok.Next->Next &&
1556 (Tok.Next->Next->TokenText == "module" ||
1557 Tok.Next->Next->TokenText == "provide" ||
1558 Tok.Next->Next->TokenText == "require" ||
1559 Tok.Next->Next->TokenText == "requireType" ||
1560 Tok.Next->Next->TokenText == "forwardDeclare") &&
1561 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1562 }
1563
resetTokenMetadata()1564 void resetTokenMetadata() {
1565 if (!CurrentToken)
1566 return;
1567
1568 // Reset token type in case we have already looked at it and then
1569 // recovered from an error (e.g. failure to find the matching >).
1570 if (!CurrentToken->isTypeFinalized() &&
1571 !CurrentToken->isOneOf(
1572 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
1573 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
1574 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
1575 TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
1576 TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
1577 TT_UntouchableMacroFunc, TT_StatementAttributeLikeMacro,
1578 TT_FunctionLikeOrFreestandingMacro, TT_ClassLBrace, TT_EnumLBrace,
1579 TT_RecordLBrace, TT_StructLBrace, TT_UnionLBrace, TT_RequiresClause,
1580 TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
1581 TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
1582 TT_CompoundRequirementLBrace, TT_BracedListLBrace)) {
1583 CurrentToken->setType(TT_Unknown);
1584 }
1585 CurrentToken->Role.reset();
1586 CurrentToken->MatchingParen = nullptr;
1587 CurrentToken->FakeLParens.clear();
1588 CurrentToken->FakeRParens = 0;
1589 }
1590
next()1591 void next() {
1592 if (!CurrentToken)
1593 return;
1594
1595 CurrentToken->NestingLevel = Contexts.size() - 1;
1596 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1597 modifyContext(*CurrentToken);
1598 determineTokenType(*CurrentToken);
1599 CurrentToken = CurrentToken->Next;
1600
1601 resetTokenMetadata();
1602 }
1603
1604 /// A struct to hold information valid in a specific context, e.g.
1605 /// a pair of parenthesis.
1606 struct Context {
Contextclang::format::__anon04924c560111::AnnotatingParser::Context1607 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1608 bool IsExpression)
1609 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1610 IsExpression(IsExpression) {}
1611
1612 tok::TokenKind ContextKind;
1613 unsigned BindingStrength;
1614 bool IsExpression;
1615 unsigned LongestObjCSelectorName = 0;
1616 bool ColonIsForRangeExpr = false;
1617 bool ColonIsDictLiteral = false;
1618 bool ColonIsObjCMethodExpr = false;
1619 FormatToken *FirstObjCSelectorName = nullptr;
1620 FormatToken *FirstStartOfName = nullptr;
1621 bool CanBeExpression = true;
1622 bool CaretFound = false;
1623 bool InCpp11AttributeSpecifier = false;
1624 bool InCSharpAttributeSpecifier = false;
1625 enum {
1626 Unknown,
1627 // Like the part after `:` in a constructor.
1628 // Context(...) : IsExpression(IsExpression)
1629 CtorInitializer,
1630 // Like in the parentheses in a foreach.
1631 ForEachMacro,
1632 // Like the inheritance list in a class declaration.
1633 // class Input : public IO
1634 InheritanceList,
1635 // Like in the braced list.
1636 // int x[] = {};
1637 StructArrayInitializer,
1638 // Like in `static_cast<int>`.
1639 TemplateArgument,
1640 // C11 _Generic selection.
1641 C11GenericSelection,
1642 } ContextType = Unknown;
1643 };
1644
1645 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1646 /// of each instance.
1647 struct ScopedContextCreator {
1648 AnnotatingParser &P;
1649
ScopedContextCreatorclang::format::__anon04924c560111::AnnotatingParser::ScopedContextCreator1650 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1651 unsigned Increase)
1652 : P(P) {
1653 P.Contexts.push_back(Context(ContextKind,
1654 P.Contexts.back().BindingStrength + Increase,
1655 P.Contexts.back().IsExpression));
1656 }
1657
~ScopedContextCreatorclang::format::__anon04924c560111::AnnotatingParser::ScopedContextCreator1658 ~ScopedContextCreator() {
1659 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1660 if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
1661 P.Contexts.pop_back();
1662 P.Contexts.back().ContextType = Context::StructArrayInitializer;
1663 return;
1664 }
1665 }
1666 P.Contexts.pop_back();
1667 }
1668 };
1669
modifyContext(const FormatToken & Current)1670 void modifyContext(const FormatToken &Current) {
1671 auto AssignmentStartsExpression = [&]() {
1672 if (Current.getPrecedence() != prec::Assignment)
1673 return false;
1674
1675 if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
1676 return false;
1677 if (Line.First->is(tok::kw_template)) {
1678 assert(Current.Previous);
1679 if (Current.Previous->is(tok::kw_operator)) {
1680 // `template ... operator=` cannot be an expression.
1681 return false;
1682 }
1683
1684 // `template` keyword can start a variable template.
1685 const FormatToken *Tok = Line.First->getNextNonComment();
1686 assert(Tok); // Current token is on the same line.
1687 if (Tok->isNot(TT_TemplateOpener)) {
1688 // Explicit template instantiations do not have `<>`.
1689 return false;
1690 }
1691
1692 Tok = Tok->MatchingParen;
1693 if (!Tok)
1694 return false;
1695 Tok = Tok->getNextNonComment();
1696 if (!Tok)
1697 return false;
1698
1699 if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
1700 tok::kw_using)) {
1701 return false;
1702 }
1703
1704 return true;
1705 }
1706
1707 // Type aliases use `type X = ...;` in TypeScript and can be exported
1708 // using `export type ...`.
1709 if (Style.isJavaScript() &&
1710 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1711 Line.startsWith(tok::kw_export, Keywords.kw_type,
1712 tok::identifier))) {
1713 return false;
1714 }
1715
1716 return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
1717 };
1718
1719 if (AssignmentStartsExpression()) {
1720 Contexts.back().IsExpression = true;
1721 if (!Line.startsWith(TT_UnaryOperator)) {
1722 for (FormatToken *Previous = Current.Previous;
1723 Previous && Previous->Previous &&
1724 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1725 Previous = Previous->Previous) {
1726 if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1727 Previous = Previous->MatchingParen;
1728 if (!Previous)
1729 break;
1730 }
1731 if (Previous->opensScope())
1732 break;
1733 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1734 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1735 Previous->Previous && Previous->Previous->isNot(tok::equal)) {
1736 Previous->setType(TT_PointerOrReference);
1737 }
1738 }
1739 }
1740 } else if (Current.is(tok::lessless) &&
1741 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1742 Contexts.back().IsExpression = true;
1743 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1744 Contexts.back().IsExpression = true;
1745 } else if (Current.is(TT_TrailingReturnArrow)) {
1746 Contexts.back().IsExpression = false;
1747 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1748 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1749 } else if (Current.Previous &&
1750 Current.Previous->is(TT_CtorInitializerColon)) {
1751 Contexts.back().IsExpression = true;
1752 Contexts.back().ContextType = Context::CtorInitializer;
1753 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1754 Contexts.back().ContextType = Context::InheritanceList;
1755 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1756 for (FormatToken *Previous = Current.Previous;
1757 Previous && Previous->isOneOf(tok::star, tok::amp);
1758 Previous = Previous->Previous) {
1759 Previous->setType(TT_PointerOrReference);
1760 }
1761 if (Line.MustBeDeclaration &&
1762 Contexts.front().ContextType != Context::CtorInitializer) {
1763 Contexts.back().IsExpression = false;
1764 }
1765 } else if (Current.is(tok::kw_new)) {
1766 Contexts.back().CanBeExpression = false;
1767 } else if (Current.is(tok::semi) ||
1768 (Current.is(tok::exclaim) && Current.Previous &&
1769 !Current.Previous->is(tok::kw_operator))) {
1770 // This should be the condition or increment in a for-loop.
1771 // But not operator !() (can't use TT_OverloadedOperator here as its not
1772 // been annotated yet).
1773 Contexts.back().IsExpression = true;
1774 }
1775 }
1776
untilMatchingParen(FormatToken * Current)1777 static FormatToken *untilMatchingParen(FormatToken *Current) {
1778 // Used when `MatchingParen` is not yet established.
1779 int ParenLevel = 0;
1780 while (Current) {
1781 if (Current->is(tok::l_paren))
1782 ++ParenLevel;
1783 if (Current->is(tok::r_paren))
1784 --ParenLevel;
1785 if (ParenLevel < 1)
1786 break;
1787 Current = Current->Next;
1788 }
1789 return Current;
1790 }
1791
isDeductionGuide(FormatToken & Current)1792 static bool isDeductionGuide(FormatToken &Current) {
1793 // Look for a deduction guide template<T> A(...) -> A<...>;
1794 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
1795 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
1796 // Find the TemplateCloser.
1797 FormatToken *TemplateCloser = Current.Next->Next;
1798 int NestingLevel = 0;
1799 while (TemplateCloser) {
1800 // Skip over an expressions in parens A<(3 < 2)>;
1801 if (TemplateCloser->is(tok::l_paren)) {
1802 // No Matching Paren yet so skip to matching paren
1803 TemplateCloser = untilMatchingParen(TemplateCloser);
1804 if (!TemplateCloser)
1805 break;
1806 }
1807 if (TemplateCloser->is(tok::less))
1808 ++NestingLevel;
1809 if (TemplateCloser->is(tok::greater))
1810 --NestingLevel;
1811 if (NestingLevel < 1)
1812 break;
1813 TemplateCloser = TemplateCloser->Next;
1814 }
1815 // Assuming we have found the end of the template ensure its followed
1816 // with a semi-colon.
1817 if (TemplateCloser && TemplateCloser->Next &&
1818 TemplateCloser->Next->is(tok::semi) &&
1819 Current.Previous->MatchingParen) {
1820 // Determine if the identifier `A` prior to the A<..>; is the same as
1821 // prior to the A(..)
1822 FormatToken *LeadingIdentifier =
1823 Current.Previous->MatchingParen->Previous;
1824
1825 return LeadingIdentifier &&
1826 LeadingIdentifier->TokenText == Current.Next->TokenText;
1827 }
1828 }
1829 return false;
1830 }
1831
determineTokenType(FormatToken & Current)1832 void determineTokenType(FormatToken &Current) {
1833 if (!Current.is(TT_Unknown)) {
1834 // The token type is already known.
1835 return;
1836 }
1837
1838 if ((Style.isJavaScript() || Style.isCSharp()) &&
1839 Current.is(tok::exclaim)) {
1840 if (Current.Previous) {
1841 bool IsIdentifier =
1842 Style.isJavaScript()
1843 ? Keywords.IsJavaScriptIdentifier(
1844 *Current.Previous, /* AcceptIdentifierName= */ true)
1845 : Current.Previous->is(tok::identifier);
1846 if (IsIdentifier ||
1847 Current.Previous->isOneOf(
1848 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
1849 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
1850 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
1851 Current.Previous->Tok.isLiteral()) {
1852 Current.setType(TT_NonNullAssertion);
1853 return;
1854 }
1855 }
1856 if (Current.Next &&
1857 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1858 Current.setType(TT_NonNullAssertion);
1859 return;
1860 }
1861 }
1862
1863 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1864 // function declaration have been found. In this case, 'Current' is a
1865 // trailing token of this declaration and thus cannot be a name.
1866 if (Current.is(Keywords.kw_instanceof)) {
1867 Current.setType(TT_BinaryOperator);
1868 } else if (isStartOfName(Current) &&
1869 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1870 Contexts.back().FirstStartOfName = &Current;
1871 Current.setType(TT_StartOfName);
1872 } else if (Current.is(tok::semi)) {
1873 // Reset FirstStartOfName after finding a semicolon so that a for loop
1874 // with multiple increment statements is not confused with a for loop
1875 // having multiple variable declarations.
1876 Contexts.back().FirstStartOfName = nullptr;
1877 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1878 AutoFound = true;
1879 } else if (Current.is(tok::arrow) &&
1880 Style.Language == FormatStyle::LK_Java) {
1881 Current.setType(TT_LambdaArrow);
1882 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1883 Current.NestingLevel == 0 &&
1884 !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
1885 // not auto operator->() -> xxx;
1886 Current.setType(TT_TrailingReturnArrow);
1887 } else if (Current.is(tok::arrow) && Current.Previous &&
1888 Current.Previous->is(tok::r_brace)) {
1889 // Concept implicit conversion constraint needs to be treated like
1890 // a trailing return type ... } -> <type>.
1891 Current.setType(TT_TrailingReturnArrow);
1892 } else if (isDeductionGuide(Current)) {
1893 // Deduction guides trailing arrow " A(...) -> A<T>;".
1894 Current.setType(TT_TrailingReturnArrow);
1895 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1896 Current.setType(determineStarAmpUsage(
1897 Current,
1898 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
1899 Contexts.back().ContextType == Context::TemplateArgument));
1900 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
1901 (Style.isVerilog() && Current.is(tok::pipe))) {
1902 Current.setType(determinePlusMinusCaretUsage(Current));
1903 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1904 Contexts.back().CaretFound = true;
1905 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1906 Current.setType(determineIncrementUsage(Current));
1907 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1908 Current.setType(TT_UnaryOperator);
1909 } else if (Current.is(tok::question)) {
1910 if (Style.isJavaScript() && Line.MustBeDeclaration &&
1911 !Contexts.back().IsExpression) {
1912 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1913 // on the interface, not a ternary expression.
1914 Current.setType(TT_JsTypeOptionalQuestion);
1915 } else {
1916 Current.setType(TT_ConditionalExpr);
1917 }
1918 } else if (Current.isBinaryOperator() &&
1919 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1920 (!Current.is(tok::greater) &&
1921 Style.Language != FormatStyle::LK_TextProto)) {
1922 Current.setType(TT_BinaryOperator);
1923 } else if (Current.is(tok::comment)) {
1924 if (Current.TokenText.startswith("/*")) {
1925 if (Current.TokenText.endswith("*/")) {
1926 Current.setType(TT_BlockComment);
1927 } else {
1928 // The lexer has for some reason determined a comment here. But we
1929 // cannot really handle it, if it isn't properly terminated.
1930 Current.Tok.setKind(tok::unknown);
1931 }
1932 } else {
1933 Current.setType(TT_LineComment);
1934 }
1935 } else if (Current.is(tok::l_paren)) {
1936 if (lParenStartsCppCast(Current))
1937 Current.setType(TT_CppCastLParen);
1938 } else if (Current.is(tok::r_paren)) {
1939 if (rParenEndsCast(Current))
1940 Current.setType(TT_CastRParen);
1941 if (Current.MatchingParen && Current.Next &&
1942 !Current.Next->isBinaryOperator() &&
1943 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1944 tok::comma, tok::period, tok::arrow,
1945 tok::coloncolon, tok::kw_noexcept)) {
1946 if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1947 // Make sure this isn't the return type of an Obj-C block declaration
1948 if (AfterParen->isNot(tok::caret)) {
1949 if (FormatToken *BeforeParen = Current.MatchingParen->Previous) {
1950 if (BeforeParen->is(tok::identifier) &&
1951 !BeforeParen->is(TT_TypenameMacro) &&
1952 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1953 (!BeforeParen->Previous ||
1954 BeforeParen->Previous->ClosesTemplateDeclaration)) {
1955 Current.setType(TT_FunctionAnnotationRParen);
1956 }
1957 }
1958 }
1959 }
1960 }
1961 } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
1962 Style.Language != FormatStyle::LK_Java) {
1963 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1964 // marks declarations and properties that need special formatting.
1965 switch (Current.Next->Tok.getObjCKeywordID()) {
1966 case tok::objc_interface:
1967 case tok::objc_implementation:
1968 case tok::objc_protocol:
1969 Current.setType(TT_ObjCDecl);
1970 break;
1971 case tok::objc_property:
1972 Current.setType(TT_ObjCProperty);
1973 break;
1974 default:
1975 break;
1976 }
1977 } else if (Current.is(tok::period)) {
1978 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1979 if (PreviousNoComment &&
1980 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
1981 Current.setType(TT_DesignatedInitializerPeriod);
1982 } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1983 Current.Previous->isOneOf(TT_JavaAnnotation,
1984 TT_LeadingJavaAnnotation)) {
1985 Current.setType(Current.Previous->getType());
1986 }
1987 } else if (canBeObjCSelectorComponent(Current) &&
1988 // FIXME(bug 36976): ObjC return types shouldn't use
1989 // TT_CastRParen.
1990 Current.Previous && Current.Previous->is(TT_CastRParen) &&
1991 Current.Previous->MatchingParen &&
1992 Current.Previous->MatchingParen->Previous &&
1993 Current.Previous->MatchingParen->Previous->is(
1994 TT_ObjCMethodSpecifier)) {
1995 // This is the first part of an Objective-C selector name. (If there's no
1996 // colon after this, this is the only place which annotates the identifier
1997 // as a selector.)
1998 Current.setType(TT_SelectorName);
1999 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2000 tok::kw_requires) &&
2001 Current.Previous &&
2002 !Current.Previous->isOneOf(tok::equal, tok::at,
2003 TT_CtorInitializerComma,
2004 TT_CtorInitializerColon) &&
2005 Line.MightBeFunctionDecl && Contexts.size() == 1) {
2006 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2007 // function declaration have been found.
2008 Current.setType(TT_TrailingAnnotation);
2009 } else if ((Style.Language == FormatStyle::LK_Java ||
2010 Style.isJavaScript()) &&
2011 Current.Previous) {
2012 if (Current.Previous->is(tok::at) &&
2013 Current.isNot(Keywords.kw_interface)) {
2014 const FormatToken &AtToken = *Current.Previous;
2015 const FormatToken *Previous = AtToken.getPreviousNonComment();
2016 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2017 Current.setType(TT_LeadingJavaAnnotation);
2018 else
2019 Current.setType(TT_JavaAnnotation);
2020 } else if (Current.Previous->is(tok::period) &&
2021 Current.Previous->isOneOf(TT_JavaAnnotation,
2022 TT_LeadingJavaAnnotation)) {
2023 Current.setType(Current.Previous->getType());
2024 }
2025 }
2026 }
2027
2028 /// Take a guess at whether \p Tok starts a name of a function or
2029 /// variable declaration.
2030 ///
2031 /// This is a heuristic based on whether \p Tok is an identifier following
2032 /// something that is likely a type.
isStartOfName(const FormatToken & Tok)2033 bool isStartOfName(const FormatToken &Tok) {
2034 if (Tok.isNot(tok::identifier) || !Tok.Previous)
2035 return false;
2036
2037 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2038 Keywords.kw_as)) {
2039 return false;
2040 }
2041 if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2042 return false;
2043
2044 // Skip "const" as it does not have an influence on whether this is a name.
2045 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2046
2047 // For javascript const can be like "let" or "var"
2048 if (!Style.isJavaScript())
2049 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2050 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2051
2052 if (!PreviousNotConst)
2053 return false;
2054
2055 if (PreviousNotConst->ClosesRequiresClause)
2056 return false;
2057
2058 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2059 PreviousNotConst->Previous &&
2060 PreviousNotConst->Previous->is(tok::hash);
2061
2062 if (PreviousNotConst->is(TT_TemplateCloser)) {
2063 return PreviousNotConst && PreviousNotConst->MatchingParen &&
2064 PreviousNotConst->MatchingParen->Previous &&
2065 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2066 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2067 }
2068
2069 if (PreviousNotConst->is(tok::r_paren) &&
2070 PreviousNotConst->is(TT_TypeDeclarationParen)) {
2071 return true;
2072 }
2073
2074 // If is a preprocess keyword like #define.
2075 if (IsPPKeyword)
2076 return false;
2077
2078 // int a or auto a.
2079 if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto))
2080 return true;
2081
2082 // *a or &a or &&a.
2083 if (PreviousNotConst->is(TT_PointerOrReference))
2084 return true;
2085
2086 // MyClass a;
2087 if (PreviousNotConst->isSimpleTypeSpecifier())
2088 return true;
2089
2090 // type[] a in Java
2091 if (Style.Language == FormatStyle::LK_Java &&
2092 PreviousNotConst->is(tok::r_square)) {
2093 return true;
2094 }
2095
2096 // const a = in JavaScript.
2097 return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2098 }
2099
2100 /// Determine whether '(' is starting a C++ cast.
lParenStartsCppCast(const FormatToken & Tok)2101 bool lParenStartsCppCast(const FormatToken &Tok) {
2102 // C-style casts are only used in C++.
2103 if (!Style.isCpp())
2104 return false;
2105
2106 FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2107 if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2108 LeftOfParens->MatchingParen) {
2109 auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2110 if (Prev &&
2111 Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2112 tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2113 // FIXME: Maybe we should handle identifiers ending with "_cast",
2114 // e.g. any_cast?
2115 return true;
2116 }
2117 }
2118 return false;
2119 }
2120
2121 /// Determine whether ')' is ending a cast.
rParenEndsCast(const FormatToken & Tok)2122 bool rParenEndsCast(const FormatToken &Tok) {
2123 // C-style casts are only used in C++, C# and Java.
2124 if (!Style.isCSharp() && !Style.isCpp() &&
2125 Style.Language != FormatStyle::LK_Java) {
2126 return false;
2127 }
2128
2129 // Empty parens aren't casts and there are no casts at the end of the line.
2130 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
2131 return false;
2132
2133 if (Tok.MatchingParen->is(TT_OverloadedOperatorLParen))
2134 return false;
2135
2136 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
2137 if (LeftOfParens) {
2138 // If there is a closing parenthesis left of the current
2139 // parentheses, look past it as these might be chained casts.
2140 if (LeftOfParens->is(tok::r_paren) &&
2141 LeftOfParens->isNot(TT_CastRParen)) {
2142 if (!LeftOfParens->MatchingParen ||
2143 !LeftOfParens->MatchingParen->Previous) {
2144 return false;
2145 }
2146 LeftOfParens = LeftOfParens->MatchingParen->Previous;
2147 }
2148
2149 if (LeftOfParens->is(tok::r_square)) {
2150 // delete[] (void *)ptr;
2151 auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2152 if (Tok->isNot(tok::r_square))
2153 return nullptr;
2154
2155 Tok = Tok->getPreviousNonComment();
2156 if (!Tok || Tok->isNot(tok::l_square))
2157 return nullptr;
2158
2159 Tok = Tok->getPreviousNonComment();
2160 if (!Tok || Tok->isNot(tok::kw_delete))
2161 return nullptr;
2162 return Tok;
2163 };
2164 if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2165 LeftOfParens = MaybeDelete;
2166 }
2167
2168 // The Condition directly below this one will see the operator arguments
2169 // as a (void *foo) cast.
2170 // void operator delete(void *foo) ATTRIB;
2171 if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2172 LeftOfParens->Previous->is(tok::kw_operator)) {
2173 return false;
2174 }
2175
2176 // If there is an identifier (or with a few exceptions a keyword) right
2177 // before the parentheses, this is unlikely to be a cast.
2178 if (LeftOfParens->Tok.getIdentifierInfo() &&
2179 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2180 tok::kw_delete, tok::kw_throw)) {
2181 return false;
2182 }
2183
2184 // Certain other tokens right before the parentheses are also signals that
2185 // this cannot be a cast.
2186 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2187 TT_TemplateCloser, tok::ellipsis)) {
2188 return false;
2189 }
2190 }
2191
2192 if (Tok.Next->is(tok::question))
2193 return false;
2194
2195 // `foreach((A a, B b) in someList)` should not be seen as a cast.
2196 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
2197 return false;
2198
2199 // Functions which end with decorations like volatile, noexcept are unlikely
2200 // to be casts.
2201 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2202 tok::kw_requires, tok::kw_throw, tok::arrow,
2203 Keywords.kw_override, Keywords.kw_final) ||
2204 isCpp11AttributeSpecifier(*Tok.Next)) {
2205 return false;
2206 }
2207
2208 // As Java has no function types, a "(" after the ")" likely means that this
2209 // is a cast.
2210 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
2211 return true;
2212
2213 // If a (non-string) literal follows, this is likely a cast.
2214 if (Tok.Next->isNot(tok::string_literal) &&
2215 (Tok.Next->Tok.isLiteral() ||
2216 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) {
2217 return true;
2218 }
2219
2220 // Heuristically try to determine whether the parentheses contain a type.
2221 auto IsQualifiedPointerOrReference = [](FormatToken *T) {
2222 // This is used to handle cases such as x = (foo *const)&y;
2223 assert(!T->isSimpleTypeSpecifier() && "Should have already been checked");
2224 // Strip trailing qualifiers such as const or volatile when checking
2225 // whether the parens could be a cast to a pointer/reference type.
2226 while (T) {
2227 if (T->is(TT_AttributeParen)) {
2228 // Handle `x = (foo *__attribute__((foo)))&v;`:
2229 if (T->MatchingParen && T->MatchingParen->Previous &&
2230 T->MatchingParen->Previous->is(tok::kw___attribute)) {
2231 T = T->MatchingParen->Previous->Previous;
2232 continue;
2233 }
2234 } else if (T->is(TT_AttributeSquare)) {
2235 // Handle `x = (foo *[[clang::foo]])&v;`:
2236 if (T->MatchingParen && T->MatchingParen->Previous) {
2237 T = T->MatchingParen->Previous;
2238 continue;
2239 }
2240 } else if (T->canBePointerOrReferenceQualifier()) {
2241 T = T->Previous;
2242 continue;
2243 }
2244 break;
2245 }
2246 return T && T->is(TT_PointerOrReference);
2247 };
2248 bool ParensAreType =
2249 !Tok.Previous ||
2250 Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2251 Tok.Previous->isSimpleTypeSpecifier() ||
2252 IsQualifiedPointerOrReference(Tok.Previous);
2253 bool ParensCouldEndDecl =
2254 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2255 if (ParensAreType && !ParensCouldEndDecl)
2256 return true;
2257
2258 // At this point, we heuristically assume that there are no casts at the
2259 // start of the line. We assume that we have found most cases where there
2260 // are by the logic above, e.g. "(void)x;".
2261 if (!LeftOfParens)
2262 return false;
2263
2264 // Certain token types inside the parentheses mean that this can't be a
2265 // cast.
2266 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
2267 Token = Token->Next) {
2268 if (Token->is(TT_BinaryOperator))
2269 return false;
2270 }
2271
2272 // If the following token is an identifier or 'this', this is a cast. All
2273 // cases where this can be something else are handled above.
2274 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
2275 return true;
2276
2277 // Look for a cast `( x ) (`.
2278 if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
2279 if (Tok.Previous->is(tok::identifier) &&
2280 Tok.Previous->Previous->is(tok::l_paren)) {
2281 return true;
2282 }
2283 }
2284
2285 if (!Tok.Next->Next)
2286 return false;
2287
2288 // If the next token after the parenthesis is a unary operator, assume
2289 // that this is cast, unless there are unexpected tokens inside the
2290 // parenthesis.
2291 bool NextIsUnary =
2292 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
2293 if (!NextIsUnary || Tok.Next->is(tok::plus) ||
2294 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2295 return false;
2296 }
2297 // Search for unexpected tokens.
2298 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
2299 Prev = Prev->Previous) {
2300 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2301 return false;
2302 }
2303 return true;
2304 }
2305
2306 /// Returns true if the token is used as a unary operator.
determineUnaryOperatorByUsage(const FormatToken & Tok)2307 bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2308 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2309 if (!PrevToken)
2310 return true;
2311
2312 // These keywords are deliberately not included here because they may
2313 // precede only one of unary star/amp and plus/minus but not both. They are
2314 // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2315 //
2316 // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2317 // know how they can be followed by a star or amp.
2318 if (PrevToken->isOneOf(
2319 TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2320 tok::equal, tok::question, tok::l_square, tok::l_brace,
2321 tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2322 tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2323 return true;
2324 }
2325
2326 // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2327 // where the unary `+` operator is overloaded, it is reasonable to write
2328 // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2329 if (PrevToken->is(tok::kw_sizeof))
2330 return true;
2331
2332 // A sequence of leading unary operators.
2333 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2334 return true;
2335
2336 // There can't be two consecutive binary operators.
2337 if (PrevToken->is(TT_BinaryOperator))
2338 return true;
2339
2340 return false;
2341 }
2342
2343 /// Return the type of the given token assuming it is * or &.
determineStarAmpUsage(const FormatToken & Tok,bool IsExpression,bool InTemplateArgument)2344 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2345 bool InTemplateArgument) {
2346 if (Style.isJavaScript())
2347 return TT_BinaryOperator;
2348
2349 // && in C# must be a binary operator.
2350 if (Style.isCSharp() && Tok.is(tok::ampamp))
2351 return TT_BinaryOperator;
2352
2353 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2354 if (!PrevToken)
2355 return TT_UnaryOperator;
2356
2357 const FormatToken *NextToken = Tok.getNextNonComment();
2358
2359 if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2360 return TT_BinaryOperator;
2361
2362 if (!NextToken ||
2363 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_noexcept, tok::comma,
2364 tok::r_paren) ||
2365 NextToken->canBePointerOrReferenceQualifier() ||
2366 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2367 return TT_PointerOrReference;
2368 }
2369
2370 if (PrevToken->is(tok::coloncolon))
2371 return TT_PointerOrReference;
2372
2373 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2374 return TT_PointerOrReference;
2375
2376 if (determineUnaryOperatorByUsage(Tok))
2377 return TT_UnaryOperator;
2378
2379 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2380 return TT_PointerOrReference;
2381 if (NextToken->is(tok::kw_operator) && !IsExpression)
2382 return TT_PointerOrReference;
2383 if (NextToken->isOneOf(tok::comma, tok::semi))
2384 return TT_PointerOrReference;
2385
2386 // After right braces, star tokens are likely to be pointers to struct,
2387 // union, or class.
2388 // struct {} *ptr;
2389 // This by itself is not sufficient to distinguish from multiplication
2390 // following a brace-initialized expression, as in:
2391 // int i = int{42} * 2;
2392 // In the struct case, the part of the struct declaration until the `{` and
2393 // the `}` are put on separate unwrapped lines; in the brace-initialized
2394 // case, the matching `{` is on the same unwrapped line, so check for the
2395 // presence of the matching brace to distinguish between those.
2396 if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
2397 !PrevToken->MatchingParen) {
2398 return TT_PointerOrReference;
2399 }
2400
2401 if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
2402 return TT_UnaryOperator;
2403
2404 if (PrevToken->Tok.isLiteral() ||
2405 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2406 tok::kw_false, tok::r_brace)) {
2407 return TT_BinaryOperator;
2408 }
2409
2410 const FormatToken *NextNonParen = NextToken;
2411 while (NextNonParen && NextNonParen->is(tok::l_paren))
2412 NextNonParen = NextNonParen->getNextNonComment();
2413 if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
2414 NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
2415 NextNonParen->isUnaryOperator())) {
2416 return TT_BinaryOperator;
2417 }
2418
2419 // If we know we're in a template argument, there are no named declarations.
2420 // Thus, having an identifier on the right-hand side indicates a binary
2421 // operator.
2422 if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
2423 return TT_BinaryOperator;
2424
2425 // "&&(" is quite unlikely to be two successive unary "&".
2426 if (Tok.is(tok::ampamp) && NextToken->is(tok::l_paren))
2427 return TT_BinaryOperator;
2428
2429 // This catches some cases where evaluation order is used as control flow:
2430 // aaa && aaa->f();
2431 if (NextToken->Tok.isAnyIdentifier()) {
2432 const FormatToken *NextNextToken = NextToken->getNextNonComment();
2433 if (NextNextToken && NextNextToken->is(tok::arrow))
2434 return TT_BinaryOperator;
2435 }
2436
2437 // It is very unlikely that we are going to find a pointer or reference type
2438 // definition on the RHS of an assignment.
2439 if (IsExpression && !Contexts.back().CaretFound)
2440 return TT_BinaryOperator;
2441
2442 return TT_PointerOrReference;
2443 }
2444
determinePlusMinusCaretUsage(const FormatToken & Tok)2445 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
2446 if (determineUnaryOperatorByUsage(Tok))
2447 return TT_UnaryOperator;
2448
2449 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2450 if (!PrevToken)
2451 return TT_UnaryOperator;
2452
2453 if (PrevToken->is(tok::at))
2454 return TT_UnaryOperator;
2455
2456 // Fall back to marking the token as binary operator.
2457 return TT_BinaryOperator;
2458 }
2459
2460 /// Determine whether ++/-- are pre- or post-increments/-decrements.
determineIncrementUsage(const FormatToken & Tok)2461 TokenType determineIncrementUsage(const FormatToken &Tok) {
2462 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2463 if (!PrevToken || PrevToken->is(TT_CastRParen))
2464 return TT_UnaryOperator;
2465 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
2466 return TT_TrailingUnaryOperator;
2467
2468 return TT_UnaryOperator;
2469 }
2470
2471 SmallVector<Context, 8> Contexts;
2472
2473 const FormatStyle &Style;
2474 AnnotatedLine &Line;
2475 FormatToken *CurrentToken;
2476 bool AutoFound;
2477 const AdditionalKeywords &Keywords;
2478
2479 // Set of "<" tokens that do not open a template parameter list. If parseAngle
2480 // determines that a specific token can't be a template opener, it will make
2481 // same decision irrespective of the decisions for tokens leading up to it.
2482 // Store this information to prevent this from causing exponential runtime.
2483 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
2484 };
2485
2486 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
2487 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
2488
2489 /// Parses binary expressions by inserting fake parenthesis based on
2490 /// operator precedence.
2491 class ExpressionParser {
2492 public:
ExpressionParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,AnnotatedLine & Line)2493 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
2494 AnnotatedLine &Line)
2495 : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
2496
2497 /// Parse expressions with the given operator precedence.
parse(int Precedence=0)2498 void parse(int Precedence = 0) {
2499 // Skip 'return' and ObjC selector colons as they are not part of a binary
2500 // expression.
2501 while (Current && (Current->is(tok::kw_return) ||
2502 (Current->is(tok::colon) &&
2503 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
2504 next();
2505 }
2506
2507 if (!Current || Precedence > PrecedenceArrowAndPeriod)
2508 return;
2509
2510 // Conditional expressions need to be parsed separately for proper nesting.
2511 if (Precedence == prec::Conditional) {
2512 parseConditionalExpr();
2513 return;
2514 }
2515
2516 // Parse unary operators, which all have a higher precedence than binary
2517 // operators.
2518 if (Precedence == PrecedenceUnaryOperator) {
2519 parseUnaryOperator();
2520 return;
2521 }
2522
2523 FormatToken *Start = Current;
2524 FormatToken *LatestOperator = nullptr;
2525 unsigned OperatorIndex = 0;
2526
2527 while (Current) {
2528 // Consume operators with higher precedence.
2529 parse(Precedence + 1);
2530
2531 int CurrentPrecedence = getCurrentPrecedence();
2532
2533 if (Precedence == CurrentPrecedence && Current &&
2534 Current->is(TT_SelectorName)) {
2535 if (LatestOperator)
2536 addFakeParenthesis(Start, prec::Level(Precedence));
2537 Start = Current;
2538 }
2539
2540 // At the end of the line or when an operator with higher precedence is
2541 // found, insert fake parenthesis and return.
2542 if (!Current ||
2543 (Current->closesScope() &&
2544 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
2545 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
2546 (CurrentPrecedence == prec::Conditional &&
2547 Precedence == prec::Assignment && Current->is(tok::colon))) {
2548 break;
2549 }
2550
2551 // Consume scopes: (), [], <> and {}
2552 // In addition to that we handle require clauses as scope, so that the
2553 // constraints in that are correctly indented.
2554 if (Current->opensScope() ||
2555 Current->isOneOf(TT_RequiresClause,
2556 TT_RequiresClauseInARequiresExpression)) {
2557 // In fragment of a JavaScript template string can look like '}..${' and
2558 // thus close a scope and open a new one at the same time.
2559 while (Current && (!Current->closesScope() || Current->opensScope())) {
2560 next();
2561 parse();
2562 }
2563 next();
2564 } else {
2565 // Operator found.
2566 if (CurrentPrecedence == Precedence) {
2567 if (LatestOperator)
2568 LatestOperator->NextOperator = Current;
2569 LatestOperator = Current;
2570 Current->OperatorIndex = OperatorIndex;
2571 ++OperatorIndex;
2572 }
2573 next(/*SkipPastLeadingComments=*/Precedence > 0);
2574 }
2575 }
2576
2577 if (LatestOperator && (Current || Precedence > 0)) {
2578 // The requires clauses do not neccessarily end in a semicolon or a brace,
2579 // but just go over to struct/class or a function declaration, we need to
2580 // intervene so that the fake right paren is inserted correctly.
2581 auto End =
2582 (Start->Previous &&
2583 Start->Previous->isOneOf(TT_RequiresClause,
2584 TT_RequiresClauseInARequiresExpression))
2585 ? [this]() {
2586 auto Ret = Current ? Current : Line.Last;
2587 while (!Ret->ClosesRequiresClause && Ret->Previous)
2588 Ret = Ret->Previous;
2589 return Ret;
2590 }()
2591 : nullptr;
2592
2593 if (Precedence == PrecedenceArrowAndPeriod) {
2594 // Call expressions don't have a binary operator precedence.
2595 addFakeParenthesis(Start, prec::Unknown, End);
2596 } else {
2597 addFakeParenthesis(Start, prec::Level(Precedence), End);
2598 }
2599 }
2600 }
2601
2602 private:
2603 /// Gets the precedence (+1) of the given token for binary operators
2604 /// and other tokens that we treat like binary operators.
getCurrentPrecedence()2605 int getCurrentPrecedence() {
2606 if (Current) {
2607 const FormatToken *NextNonComment = Current->getNextNonComment();
2608 if (Current->is(TT_ConditionalExpr))
2609 return prec::Conditional;
2610 if (NextNonComment && Current->is(TT_SelectorName) &&
2611 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
2612 ((Style.Language == FormatStyle::LK_Proto ||
2613 Style.Language == FormatStyle::LK_TextProto) &&
2614 NextNonComment->is(tok::less)))) {
2615 return prec::Assignment;
2616 }
2617 if (Current->is(TT_JsComputedPropertyName))
2618 return prec::Assignment;
2619 if (Current->is(TT_LambdaArrow))
2620 return prec::Comma;
2621 if (Current->is(TT_FatArrow))
2622 return prec::Assignment;
2623 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
2624 (Current->is(tok::comment) && NextNonComment &&
2625 NextNonComment->is(TT_SelectorName))) {
2626 return 0;
2627 }
2628 if (Current->is(TT_RangeBasedForLoopColon))
2629 return prec::Comma;
2630 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2631 Current->is(Keywords.kw_instanceof)) {
2632 return prec::Relational;
2633 }
2634 if (Style.isJavaScript() &&
2635 Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
2636 return prec::Relational;
2637 }
2638 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
2639 return Current->getPrecedence();
2640 if (Current->isOneOf(tok::period, tok::arrow) &&
2641 Current->isNot(TT_TrailingReturnArrow)) {
2642 return PrecedenceArrowAndPeriod;
2643 }
2644 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2645 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
2646 Keywords.kw_throws)) {
2647 return 0;
2648 }
2649 // In Verilog case labels are not on separate lines straight out of
2650 // UnwrappedLineParser. The colon is not part of an expression.
2651 if (Style.isVerilog() && Current->is(tok::colon))
2652 return 0;
2653 }
2654 return -1;
2655 }
2656
addFakeParenthesis(FormatToken * Start,prec::Level Precedence,FormatToken * End=nullptr)2657 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
2658 FormatToken *End = nullptr) {
2659 Start->FakeLParens.push_back(Precedence);
2660 if (Precedence > prec::Unknown)
2661 Start->StartsBinaryExpression = true;
2662 if (!End && Current)
2663 End = Current->getPreviousNonComment();
2664 if (End) {
2665 ++End->FakeRParens;
2666 if (Precedence > prec::Unknown)
2667 End->EndsBinaryExpression = true;
2668 }
2669 }
2670
2671 /// Parse unary operator expressions and surround them with fake
2672 /// parentheses if appropriate.
parseUnaryOperator()2673 void parseUnaryOperator() {
2674 llvm::SmallVector<FormatToken *, 2> Tokens;
2675 while (Current && Current->is(TT_UnaryOperator)) {
2676 Tokens.push_back(Current);
2677 next();
2678 }
2679 parse(PrecedenceArrowAndPeriod);
2680 for (FormatToken *Token : llvm::reverse(Tokens)) {
2681 // The actual precedence doesn't matter.
2682 addFakeParenthesis(Token, prec::Unknown);
2683 }
2684 }
2685
parseConditionalExpr()2686 void parseConditionalExpr() {
2687 while (Current && Current->isTrailingComment())
2688 next();
2689 FormatToken *Start = Current;
2690 parse(prec::LogicalOr);
2691 if (!Current || !Current->is(tok::question))
2692 return;
2693 next();
2694 parse(prec::Assignment);
2695 if (!Current || Current->isNot(TT_ConditionalExpr))
2696 return;
2697 next();
2698 parse(prec::Assignment);
2699 addFakeParenthesis(Start, prec::Conditional);
2700 }
2701
next(bool SkipPastLeadingComments=true)2702 void next(bool SkipPastLeadingComments = true) {
2703 if (Current)
2704 Current = Current->Next;
2705 while (Current &&
2706 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
2707 Current->isTrailingComment()) {
2708 Current = Current->Next;
2709 }
2710 }
2711
2712 const FormatStyle &Style;
2713 const AdditionalKeywords &Keywords;
2714 const AnnotatedLine &Line;
2715 FormatToken *Current;
2716 };
2717
2718 } // end anonymous namespace
2719
setCommentLineLevels(SmallVectorImpl<AnnotatedLine * > & Lines) const2720 void TokenAnnotator::setCommentLineLevels(
2721 SmallVectorImpl<AnnotatedLine *> &Lines) const {
2722 const AnnotatedLine *NextNonCommentLine = nullptr;
2723 for (AnnotatedLine *Line : llvm::reverse(Lines)) {
2724 assert(Line->First);
2725
2726 // If the comment is currently aligned with the line immediately following
2727 // it, that's probably intentional and we should keep it.
2728 if (NextNonCommentLine && Line->isComment() &&
2729 NextNonCommentLine->First->NewlinesBefore <= 1 &&
2730 NextNonCommentLine->First->OriginalColumn ==
2731 Line->First->OriginalColumn) {
2732 const bool PPDirectiveOrImportStmt =
2733 NextNonCommentLine->Type == LT_PreprocessorDirective ||
2734 NextNonCommentLine->Type == LT_ImportStatement;
2735 if (PPDirectiveOrImportStmt)
2736 Line->Type = LT_CommentAbovePPDirective;
2737 // Align comments for preprocessor lines with the # in column 0 if
2738 // preprocessor lines are not indented. Otherwise, align with the next
2739 // line.
2740 Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
2741 PPDirectiveOrImportStmt
2742 ? 0
2743 : NextNonCommentLine->Level;
2744 } else {
2745 NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
2746 }
2747
2748 setCommentLineLevels(Line->Children);
2749 }
2750 }
2751
maxNestingDepth(const AnnotatedLine & Line)2752 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
2753 unsigned Result = 0;
2754 for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
2755 Result = std::max(Result, Tok->NestingLevel);
2756 return Result;
2757 }
2758
annotate(AnnotatedLine & Line) const2759 void TokenAnnotator::annotate(AnnotatedLine &Line) const {
2760 for (auto &Child : Line.Children)
2761 annotate(*Child);
2762
2763 AnnotatingParser Parser(Style, Line, Keywords);
2764 Line.Type = Parser.parseLine();
2765
2766 // With very deep nesting, ExpressionParser uses lots of stack and the
2767 // formatting algorithm is very slow. We're not going to do a good job here
2768 // anyway - it's probably generated code being formatted by mistake.
2769 // Just skip the whole line.
2770 if (maxNestingDepth(Line) > 50)
2771 Line.Type = LT_Invalid;
2772
2773 if (Line.Type == LT_Invalid)
2774 return;
2775
2776 ExpressionParser ExprParser(Style, Keywords, Line);
2777 ExprParser.parse();
2778
2779 if (Line.startsWith(TT_ObjCMethodSpecifier))
2780 Line.Type = LT_ObjCMethodDecl;
2781 else if (Line.startsWith(TT_ObjCDecl))
2782 Line.Type = LT_ObjCDecl;
2783 else if (Line.startsWith(TT_ObjCProperty))
2784 Line.Type = LT_ObjCProperty;
2785
2786 Line.First->SpacesRequiredBefore = 1;
2787 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
2788 }
2789
2790 // This function heuristically determines whether 'Current' starts the name of a
2791 // function declaration.
isFunctionDeclarationName(bool IsCpp,const FormatToken & Current,const AnnotatedLine & Line)2792 static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
2793 const AnnotatedLine &Line) {
2794 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
2795 for (; Next; Next = Next->Next) {
2796 if (Next->is(TT_OverloadedOperatorLParen))
2797 return Next;
2798 if (Next->is(TT_OverloadedOperator))
2799 continue;
2800 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
2801 // For 'new[]' and 'delete[]'.
2802 if (Next->Next &&
2803 Next->Next->startsSequence(tok::l_square, tok::r_square)) {
2804 Next = Next->Next->Next;
2805 }
2806 continue;
2807 }
2808 if (Next->startsSequence(tok::l_square, tok::r_square)) {
2809 // For operator[]().
2810 Next = Next->Next;
2811 continue;
2812 }
2813 if ((Next->isSimpleTypeSpecifier() || Next->is(tok::identifier)) &&
2814 Next->Next && Next->Next->isOneOf(tok::star, tok::amp, tok::ampamp)) {
2815 // For operator void*(), operator char*(), operator Foo*().
2816 Next = Next->Next;
2817 continue;
2818 }
2819 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
2820 Next = Next->MatchingParen;
2821 continue;
2822 }
2823
2824 break;
2825 }
2826 return nullptr;
2827 };
2828
2829 // Find parentheses of parameter list.
2830 const FormatToken *Next = Current.Next;
2831 if (Current.is(tok::kw_operator)) {
2832 if (Current.Previous && Current.Previous->is(tok::coloncolon))
2833 return false;
2834 Next = skipOperatorName(Next);
2835 } else {
2836 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
2837 return false;
2838 for (; Next; Next = Next->Next) {
2839 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
2840 Next = Next->MatchingParen;
2841 } else if (Next->is(tok::coloncolon)) {
2842 Next = Next->Next;
2843 if (!Next)
2844 return false;
2845 if (Next->is(tok::kw_operator)) {
2846 Next = skipOperatorName(Next->Next);
2847 break;
2848 }
2849 if (!Next->is(tok::identifier))
2850 return false;
2851 } else if (isCppAttribute(IsCpp, *Next)) {
2852 Next = Next->MatchingParen;
2853 if (!Next)
2854 return false;
2855 } else if (Next->is(tok::l_paren)) {
2856 break;
2857 } else {
2858 return false;
2859 }
2860 }
2861 }
2862
2863 // Check whether parameter list can belong to a function declaration.
2864 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2865 return false;
2866 // If the lines ends with "{", this is likely a function definition.
2867 if (Line.Last->is(tok::l_brace))
2868 return true;
2869 if (Next->Next == Next->MatchingParen)
2870 return true; // Empty parentheses.
2871 // If there is an &/&& after the r_paren, this is likely a function.
2872 if (Next->MatchingParen->Next &&
2873 Next->MatchingParen->Next->is(TT_PointerOrReference)) {
2874 return true;
2875 }
2876
2877 // Check for K&R C function definitions (and C++ function definitions with
2878 // unnamed parameters), e.g.:
2879 // int f(i)
2880 // {
2881 // return i + 1;
2882 // }
2883 // bool g(size_t = 0, bool b = false)
2884 // {
2885 // return !b;
2886 // }
2887 if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
2888 !Line.endsWith(tok::semi)) {
2889 return true;
2890 }
2891
2892 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2893 Tok = Tok->Next) {
2894 if (Tok->is(TT_TypeDeclarationParen))
2895 return true;
2896 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
2897 Tok = Tok->MatchingParen;
2898 continue;
2899 }
2900 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2901 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) {
2902 return true;
2903 }
2904 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2905 Tok->Tok.isLiteral()) {
2906 return false;
2907 }
2908 }
2909 return false;
2910 }
2911
mustBreakForReturnType(const AnnotatedLine & Line) const2912 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2913 assert(Line.MightBeFunctionDecl);
2914
2915 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
2916 Style.AlwaysBreakAfterReturnType ==
2917 FormatStyle::RTBS_TopLevelDefinitions) &&
2918 Line.Level > 0) {
2919 return false;
2920 }
2921
2922 switch (Style.AlwaysBreakAfterReturnType) {
2923 case FormatStyle::RTBS_None:
2924 return false;
2925 case FormatStyle::RTBS_All:
2926 case FormatStyle::RTBS_TopLevel:
2927 return true;
2928 case FormatStyle::RTBS_AllDefinitions:
2929 case FormatStyle::RTBS_TopLevelDefinitions:
2930 return Line.mightBeFunctionDefinition();
2931 }
2932
2933 return false;
2934 }
2935
mustBreakAfterAttributes(const FormatToken & Tok,const FormatStyle & Style)2936 static bool mustBreakAfterAttributes(const FormatToken &Tok,
2937 const FormatStyle &Style) {
2938 switch (Style.BreakAfterAttributes) {
2939 case FormatStyle::ABS_Always:
2940 return true;
2941 case FormatStyle::ABS_Leave:
2942 return Tok.NewlinesBefore > 0;
2943 default:
2944 return false;
2945 }
2946 }
2947
calculateFormattingInformation(AnnotatedLine & Line) const2948 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
2949 for (AnnotatedLine *ChildLine : Line.Children)
2950 calculateFormattingInformation(*ChildLine);
2951
2952 Line.First->TotalLength =
2953 Line.First->IsMultiline ? Style.ColumnLimit
2954 : Line.FirstStartColumn + Line.First->ColumnWidth;
2955 FormatToken *Current = Line.First->Next;
2956 bool InFunctionDecl = Line.MightBeFunctionDecl;
2957 bool AlignArrayOfStructures =
2958 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
2959 Line.Type == LT_ArrayOfStructInitializer);
2960 if (AlignArrayOfStructures)
2961 calculateArrayInitializerColumnList(Line);
2962
2963 for (FormatToken *Tok = Current, *AfterLastAttribute = nullptr; Tok;
2964 Tok = Tok->Next) {
2965 if (isFunctionDeclarationName(Style.isCpp(), *Tok, Line)) {
2966 Tok->setType(TT_FunctionDeclarationName);
2967 if (AfterLastAttribute &&
2968 mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
2969 AfterLastAttribute->MustBreakBefore = true;
2970 Line.ReturnTypeWrapped = true;
2971 }
2972 break;
2973 }
2974 if (Tok->Previous->EndsCppAttributeGroup)
2975 AfterLastAttribute = Tok;
2976 }
2977
2978 while (Current) {
2979 const FormatToken *Prev = Current->Previous;
2980 if (Current->is(TT_LineComment)) {
2981 if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
2982 Current->SpacesRequiredBefore =
2983 (Style.Cpp11BracedListStyle && !Style.SpacesInParentheses) ? 0 : 1;
2984 } else {
2985 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
2986 }
2987
2988 // If we find a trailing comment, iterate backwards to determine whether
2989 // it seems to relate to a specific parameter. If so, break before that
2990 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2991 // to the previous line in:
2992 // SomeFunction(a,
2993 // b, // comment
2994 // c);
2995 if (!Current->HasUnescapedNewline) {
2996 for (FormatToken *Parameter = Current->Previous; Parameter;
2997 Parameter = Parameter->Previous) {
2998 if (Parameter->isOneOf(tok::comment, tok::r_brace))
2999 break;
3000 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
3001 if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
3002 Parameter->HasUnescapedNewline) {
3003 Parameter->MustBreakBefore = true;
3004 }
3005 break;
3006 }
3007 }
3008 }
3009 } else if (Current->SpacesRequiredBefore == 0 &&
3010 spaceRequiredBefore(Line, *Current)) {
3011 Current->SpacesRequiredBefore = 1;
3012 }
3013
3014 const auto &Children = Prev->Children;
3015 if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
3016 Current->MustBreakBefore = true;
3017 } else {
3018 Current->MustBreakBefore =
3019 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
3020 if (!Current->MustBreakBefore && InFunctionDecl &&
3021 Current->is(TT_FunctionDeclarationName)) {
3022 Current->MustBreakBefore = mustBreakForReturnType(Line);
3023 }
3024 }
3025
3026 Current->CanBreakBefore =
3027 Current->MustBreakBefore || canBreakBefore(Line, *Current);
3028 unsigned ChildSize = 0;
3029 if (Prev->Children.size() == 1) {
3030 FormatToken &LastOfChild = *Prev->Children[0]->Last;
3031 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
3032 : LastOfChild.TotalLength + 1;
3033 }
3034 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
3035 (Prev->Children.size() == 1 &&
3036 Prev->Children[0]->First->MustBreakBefore) ||
3037 Current->IsMultiline) {
3038 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
3039 } else {
3040 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
3041 ChildSize + Current->SpacesRequiredBefore;
3042 }
3043
3044 if (Current->is(TT_CtorInitializerColon))
3045 InFunctionDecl = false;
3046
3047 // FIXME: Only calculate this if CanBreakBefore is true once static
3048 // initializers etc. are sorted out.
3049 // FIXME: Move magic numbers to a better place.
3050
3051 // Reduce penalty for aligning ObjC method arguments using the colon
3052 // alignment as this is the canonical way (still prefer fitting everything
3053 // into one line if possible). Trying to fit a whole expression into one
3054 // line should not force other line breaks (e.g. when ObjC method
3055 // expression is a part of other expression).
3056 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
3057 if (Style.Language == FormatStyle::LK_ObjC &&
3058 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
3059 if (Current->ParameterIndex == 1)
3060 Current->SplitPenalty += 5 * Current->BindingStrength;
3061 } else {
3062 Current->SplitPenalty += 20 * Current->BindingStrength;
3063 }
3064
3065 Current = Current->Next;
3066 }
3067
3068 calculateUnbreakableTailLengths(Line);
3069 unsigned IndentLevel = Line.Level;
3070 for (Current = Line.First; Current != nullptr; Current = Current->Next) {
3071 if (Current->Role)
3072 Current->Role->precomputeFormattingInfos(Current);
3073 if (Current->MatchingParen &&
3074 Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
3075 IndentLevel > 0) {
3076 --IndentLevel;
3077 }
3078 Current->IndentLevel = IndentLevel;
3079 if (Current->opensBlockOrBlockTypeList(Style))
3080 ++IndentLevel;
3081 }
3082
3083 LLVM_DEBUG({ printDebugInfo(Line); });
3084 }
3085
calculateUnbreakableTailLengths(AnnotatedLine & Line) const3086 void TokenAnnotator::calculateUnbreakableTailLengths(
3087 AnnotatedLine &Line) const {
3088 unsigned UnbreakableTailLength = 0;
3089 FormatToken *Current = Line.Last;
3090 while (Current) {
3091 Current->UnbreakableTailLength = UnbreakableTailLength;
3092 if (Current->CanBreakBefore ||
3093 Current->isOneOf(tok::comment, tok::string_literal)) {
3094 UnbreakableTailLength = 0;
3095 } else {
3096 UnbreakableTailLength +=
3097 Current->ColumnWidth + Current->SpacesRequiredBefore;
3098 }
3099 Current = Current->Previous;
3100 }
3101 }
3102
calculateArrayInitializerColumnList(AnnotatedLine & Line) const3103 void TokenAnnotator::calculateArrayInitializerColumnList(
3104 AnnotatedLine &Line) const {
3105 if (Line.First == Line.Last)
3106 return;
3107 auto *CurrentToken = Line.First;
3108 CurrentToken->ArrayInitializerLineStart = true;
3109 unsigned Depth = 0;
3110 while (CurrentToken != nullptr && CurrentToken != Line.Last) {
3111 if (CurrentToken->is(tok::l_brace)) {
3112 CurrentToken->IsArrayInitializer = true;
3113 if (CurrentToken->Next != nullptr)
3114 CurrentToken->Next->MustBreakBefore = true;
3115 CurrentToken =
3116 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
3117 } else {
3118 CurrentToken = CurrentToken->Next;
3119 }
3120 }
3121 }
3122
calculateInitializerColumnList(AnnotatedLine & Line,FormatToken * CurrentToken,unsigned Depth) const3123 FormatToken *TokenAnnotator::calculateInitializerColumnList(
3124 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
3125 while (CurrentToken != nullptr && CurrentToken != Line.Last) {
3126 if (CurrentToken->is(tok::l_brace))
3127 ++Depth;
3128 else if (CurrentToken->is(tok::r_brace))
3129 --Depth;
3130 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
3131 CurrentToken = CurrentToken->Next;
3132 if (CurrentToken == nullptr)
3133 break;
3134 CurrentToken->StartsColumn = true;
3135 CurrentToken = CurrentToken->Previous;
3136 }
3137 CurrentToken = CurrentToken->Next;
3138 }
3139 return CurrentToken;
3140 }
3141
splitPenalty(const AnnotatedLine & Line,const FormatToken & Tok,bool InFunctionDecl) const3142 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
3143 const FormatToken &Tok,
3144 bool InFunctionDecl) const {
3145 const FormatToken &Left = *Tok.Previous;
3146 const FormatToken &Right = Tok;
3147
3148 if (Left.is(tok::semi))
3149 return 0;
3150
3151 // Language specific handling.
3152 if (Style.Language == FormatStyle::LK_Java) {
3153 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
3154 return 1;
3155 if (Right.is(Keywords.kw_implements))
3156 return 2;
3157 if (Left.is(tok::comma) && Left.NestingLevel == 0)
3158 return 3;
3159 } else if (Style.isJavaScript()) {
3160 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
3161 return 100;
3162 if (Left.is(TT_JsTypeColon))
3163 return 35;
3164 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
3165 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) {
3166 return 100;
3167 }
3168 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
3169 if (Left.opensScope() && Right.closesScope())
3170 return 200;
3171 } else if (Style.isProto()) {
3172 if (Right.is(tok::l_square))
3173 return 1;
3174 if (Right.is(tok::period))
3175 return 500;
3176 }
3177
3178 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3179 return 1;
3180 if (Right.is(tok::l_square)) {
3181 if (Left.is(tok::r_square))
3182 return 200;
3183 // Slightly prefer formatting local lambda definitions like functions.
3184 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
3185 return 35;
3186 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3187 TT_ArrayInitializerLSquare,
3188 TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
3189 return 500;
3190 }
3191 }
3192
3193 if (Left.is(tok::coloncolon))
3194 return 500;
3195 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3196 Right.is(tok::kw_operator)) {
3197 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
3198 return 3;
3199 if (Left.is(TT_StartOfName))
3200 return 110;
3201 if (InFunctionDecl && Right.NestingLevel == 0)
3202 return Style.PenaltyReturnTypeOnItsOwnLine;
3203 return 200;
3204 }
3205 if (Right.is(TT_PointerOrReference))
3206 return 190;
3207 if (Right.is(TT_LambdaArrow))
3208 return 110;
3209 if (Left.is(tok::equal) && Right.is(tok::l_brace))
3210 return 160;
3211 if (Left.is(TT_CastRParen))
3212 return 100;
3213 if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
3214 return 5000;
3215 if (Left.is(tok::comment))
3216 return 1000;
3217
3218 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
3219 TT_CtorInitializerColon)) {
3220 return 2;
3221 }
3222
3223 if (Right.isMemberAccess()) {
3224 // Breaking before the "./->" of a chained call/member access is reasonably
3225 // cheap, as formatting those with one call per line is generally
3226 // desirable. In particular, it should be cheaper to break before the call
3227 // than it is to break inside a call's parameters, which could lead to weird
3228 // "hanging" indents. The exception is the very last "./->" to support this
3229 // frequent pattern:
3230 //
3231 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
3232 // dddddddd);
3233 //
3234 // which might otherwise be blown up onto many lines. Here, clang-format
3235 // won't produce "hanging" indents anyway as there is no other trailing
3236 // call.
3237 //
3238 // Also apply higher penalty is not a call as that might lead to a wrapping
3239 // like:
3240 //
3241 // aaaaaaa
3242 // .aaaaaaaaa.bbbbbbbb(cccccccc);
3243 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
3244 ? 150
3245 : 35;
3246 }
3247
3248 if (Right.is(TT_TrailingAnnotation) &&
3249 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
3250 // Moving trailing annotations to the next line is fine for ObjC method
3251 // declarations.
3252 if (Line.startsWith(TT_ObjCMethodSpecifier))
3253 return 10;
3254 // Generally, breaking before a trailing annotation is bad unless it is
3255 // function-like. It seems to be especially preferable to keep standard
3256 // annotations (i.e. "const", "final" and "override") on the same line.
3257 // Use a slightly higher penalty after ")" so that annotations like
3258 // "const override" are kept together.
3259 bool is_short_annotation = Right.TokenText.size() < 10;
3260 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
3261 }
3262
3263 // In for-loops, prefer breaking at ',' and ';'.
3264 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
3265 return 4;
3266
3267 // In Objective-C method expressions, prefer breaking before "param:" over
3268 // breaking after it.
3269 if (Right.is(TT_SelectorName))
3270 return 0;
3271 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
3272 return Line.MightBeFunctionDecl ? 50 : 500;
3273
3274 // In Objective-C type declarations, avoid breaking after the category's
3275 // open paren (we'll prefer breaking after the protocol list's opening
3276 // angle bracket, if present).
3277 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
3278 Left.Previous->isOneOf(tok::identifier, tok::greater)) {
3279 return 500;
3280 }
3281
3282 if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
3283 return Style.PenaltyBreakOpenParenthesis;
3284 if (Left.is(tok::l_paren) && InFunctionDecl &&
3285 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
3286 return 100;
3287 }
3288 if (Left.is(tok::l_paren) && Left.Previous &&
3289 (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
3290 Left.Previous->isIf())) {
3291 return 1000;
3292 }
3293 if (Left.is(tok::equal) && InFunctionDecl)
3294 return 110;
3295 if (Right.is(tok::r_brace))
3296 return 1;
3297 if (Left.is(TT_TemplateOpener))
3298 return 100;
3299 if (Left.opensScope()) {
3300 // If we aren't aligning after opening parens/braces we can always break
3301 // here unless the style does not want us to place all arguments on the
3302 // next line.
3303 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
3304 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
3305 return 0;
3306 }
3307 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
3308 return 19;
3309 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
3310 : 19;
3311 }
3312 if (Left.is(TT_JavaAnnotation))
3313 return 50;
3314
3315 if (Left.is(TT_UnaryOperator))
3316 return 60;
3317 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
3318 Left.Previous->isLabelString() &&
3319 (Left.NextOperator || Left.OperatorIndex != 0)) {
3320 return 50;
3321 }
3322 if (Right.is(tok::plus) && Left.isLabelString() &&
3323 (Right.NextOperator || Right.OperatorIndex != 0)) {
3324 return 25;
3325 }
3326 if (Left.is(tok::comma))
3327 return 1;
3328 if (Right.is(tok::lessless) && Left.isLabelString() &&
3329 (Right.NextOperator || Right.OperatorIndex != 1)) {
3330 return 25;
3331 }
3332 if (Right.is(tok::lessless)) {
3333 // Breaking at a << is really cheap.
3334 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) {
3335 // Slightly prefer to break before the first one in log-like statements.
3336 return 2;
3337 }
3338 return 1;
3339 }
3340 if (Left.ClosesTemplateDeclaration)
3341 return Style.PenaltyBreakTemplateDeclaration;
3342 if (Left.ClosesRequiresClause)
3343 return 0;
3344 if (Left.is(TT_ConditionalExpr))
3345 return prec::Conditional;
3346 prec::Level Level = Left.getPrecedence();
3347 if (Level == prec::Unknown)
3348 Level = Right.getPrecedence();
3349 if (Level == prec::Assignment)
3350 return Style.PenaltyBreakAssignment;
3351 if (Level != prec::Unknown)
3352 return Level;
3353
3354 return 3;
3355 }
3356
spaceRequiredBeforeParens(const FormatToken & Right) const3357 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
3358 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
3359 return true;
3360 if (Right.is(TT_OverloadedOperatorLParen) &&
3361 Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
3362 return true;
3363 }
3364 if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
3365 Right.ParameterCount > 0) {
3366 return true;
3367 }
3368 return false;
3369 }
3370
spaceRequiredBetween(const AnnotatedLine & Line,const FormatToken & Left,const FormatToken & Right) const3371 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
3372 const FormatToken &Left,
3373 const FormatToken &Right) const {
3374 if (Left.is(tok::kw_return) &&
3375 !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
3376 return true;
3377 }
3378 if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
3379 Right.MatchingParen->is(TT_CastRParen)) {
3380 return true;
3381 }
3382 if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon))
3383 return false;
3384 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
3385 return true;
3386 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
3387 Left.Tok.getObjCKeywordID() == tok::objc_property) {
3388 return true;
3389 }
3390 if (Right.is(tok::hashhash))
3391 return Left.is(tok::hash);
3392 if (Left.isOneOf(tok::hashhash, tok::hash))
3393 return Right.is(tok::hash);
3394 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
3395 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
3396 Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
3397 return Style.SpaceInEmptyParentheses;
3398 }
3399 if (Style.SpacesInConditionalStatement) {
3400 const FormatToken *LeftParen = nullptr;
3401 if (Left.is(tok::l_paren))
3402 LeftParen = &Left;
3403 else if (Right.is(tok::r_paren) && Right.MatchingParen)
3404 LeftParen = Right.MatchingParen;
3405 if (LeftParen && LeftParen->Previous &&
3406 isKeywordWithCondition(*LeftParen->Previous)) {
3407 return true;
3408 }
3409 }
3410
3411 // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
3412 if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
3413 // function return type 'auto'
3414 TT_FunctionTypeLParen)) {
3415 return true;
3416 }
3417
3418 // auto{x} auto(x)
3419 if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
3420 return false;
3421
3422 // operator co_await(x)
3423 if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous &&
3424 Left.Previous->is(tok::kw_operator)) {
3425 return false;
3426 }
3427 // co_await (x), co_yield (x), co_return (x)
3428 if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
3429 !Right.isOneOf(tok::semi, tok::r_paren)) {
3430 return true;
3431 }
3432
3433 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
3434 return (Right.is(TT_CastRParen) ||
3435 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
3436 ? Style.SpacesInCStyleCastParentheses
3437 : Style.SpacesInParentheses;
3438 }
3439 if (Right.isOneOf(tok::semi, tok::comma))
3440 return false;
3441 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
3442 bool IsLightweightGeneric = Right.MatchingParen &&
3443 Right.MatchingParen->Next &&
3444 Right.MatchingParen->Next->is(tok::colon);
3445 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
3446 }
3447 if (Right.is(tok::less) && Left.is(tok::kw_template))
3448 return Style.SpaceAfterTemplateKeyword;
3449 if (Left.isOneOf(tok::exclaim, tok::tilde))
3450 return false;
3451 if (Left.is(tok::at) &&
3452 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
3453 tok::numeric_constant, tok::l_paren, tok::l_brace,
3454 tok::kw_true, tok::kw_false)) {
3455 return false;
3456 }
3457 if (Left.is(tok::colon))
3458 return !Left.is(TT_ObjCMethodExpr);
3459 if (Left.is(tok::coloncolon))
3460 return false;
3461 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
3462 if (Style.Language == FormatStyle::LK_TextProto ||
3463 (Style.Language == FormatStyle::LK_Proto &&
3464 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
3465 // Format empty list as `<>`.
3466 if (Left.is(tok::less) && Right.is(tok::greater))
3467 return false;
3468 return !Style.Cpp11BracedListStyle;
3469 }
3470 // Don't attempt to format operator<(), as it is handled later.
3471 if (Right.isNot(TT_OverloadedOperatorLParen))
3472 return false;
3473 }
3474 if (Right.is(tok::ellipsis)) {
3475 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
3476 Left.Previous->is(tok::kw_case));
3477 }
3478 if (Left.is(tok::l_square) && Right.is(tok::amp))
3479 return Style.SpacesInSquareBrackets;
3480 if (Right.is(TT_PointerOrReference)) {
3481 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
3482 if (!Left.MatchingParen)
3483 return true;
3484 FormatToken *TokenBeforeMatchingParen =
3485 Left.MatchingParen->getPreviousNonComment();
3486 if (!TokenBeforeMatchingParen || !Left.is(TT_TypeDeclarationParen))
3487 return true;
3488 }
3489 // Add a space if the previous token is a pointer qualifier or the closing
3490 // parenthesis of __attribute__(()) expression and the style requires spaces
3491 // after pointer qualifiers.
3492 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
3493 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3494 (Left.is(TT_AttributeParen) ||
3495 Left.canBePointerOrReferenceQualifier())) {
3496 return true;
3497 }
3498 if (Left.Tok.isLiteral())
3499 return true;
3500 // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
3501 if (Left.isTypeOrIdentifier() && Right.Next && Right.Next->Next &&
3502 Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
3503 return getTokenPointerOrReferenceAlignment(Right) !=
3504 FormatStyle::PAS_Left;
3505 }
3506 return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
3507 (getTokenPointerOrReferenceAlignment(Right) !=
3508 FormatStyle::PAS_Left ||
3509 (Line.IsMultiVariableDeclStmt &&
3510 (Left.NestingLevel == 0 ||
3511 (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
3512 }
3513 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
3514 (!Left.is(TT_PointerOrReference) ||
3515 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
3516 !Line.IsMultiVariableDeclStmt))) {
3517 return true;
3518 }
3519 if (Left.is(TT_PointerOrReference)) {
3520 // Add a space if the next token is a pointer qualifier and the style
3521 // requires spaces before pointer qualifiers.
3522 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
3523 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
3524 Right.canBePointerOrReferenceQualifier()) {
3525 return true;
3526 }
3527 // & 1
3528 if (Right.Tok.isLiteral())
3529 return true;
3530 // & /* comment
3531 if (Right.is(TT_BlockComment))
3532 return true;
3533 // foo() -> const Bar * override/final
3534 if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final,
3535 tok::kw_noexcept) &&
3536 !Right.is(TT_StartOfName)) {
3537 return true;
3538 }
3539 // & {
3540 if (Right.is(tok::l_brace) && Right.is(BK_Block))
3541 return true;
3542 // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
3543 if (Left.Previous && Left.Previous->isTypeOrIdentifier() && Right.Next &&
3544 Right.Next->is(TT_RangeBasedForLoopColon)) {
3545 return getTokenPointerOrReferenceAlignment(Left) !=
3546 FormatStyle::PAS_Right;
3547 }
3548 if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
3549 tok::l_paren)) {
3550 return false;
3551 }
3552 if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
3553 return false;
3554 // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
3555 // because it does not take into account nested scopes like lambdas.
3556 // In multi-variable declaration statements, attach */& to the variable
3557 // independently of the style. However, avoid doing it if we are in a nested
3558 // scope, e.g. lambda. We still need to special-case statements with
3559 // initializers.
3560 if (Line.IsMultiVariableDeclStmt &&
3561 (Left.NestingLevel == Line.First->NestingLevel ||
3562 ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
3563 startsWithInitStatement(Line)))) {
3564 return false;
3565 }
3566 return Left.Previous && !Left.Previous->isOneOf(
3567 tok::l_paren, tok::coloncolon, tok::l_square);
3568 }
3569 // Ensure right pointer alignment with ellipsis e.g. int *...P
3570 if (Left.is(tok::ellipsis) && Left.Previous &&
3571 Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp)) {
3572 return Style.PointerAlignment != FormatStyle::PAS_Right;
3573 }
3574
3575 if (Right.is(tok::star) && Left.is(tok::l_paren))
3576 return false;
3577 if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
3578 return false;
3579 if (Right.isOneOf(tok::star, tok::amp, tok::ampamp)) {
3580 const FormatToken *Previous = &Left;
3581 while (Previous && !Previous->is(tok::kw_operator)) {
3582 if (Previous->is(tok::identifier) || Previous->isSimpleTypeSpecifier()) {
3583 Previous = Previous->getPreviousNonComment();
3584 continue;
3585 }
3586 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
3587 Previous = Previous->MatchingParen->getPreviousNonComment();
3588 continue;
3589 }
3590 if (Previous->is(tok::coloncolon)) {
3591 Previous = Previous->getPreviousNonComment();
3592 continue;
3593 }
3594 break;
3595 }
3596 // Space between the type and the * in:
3597 // operator void*()
3598 // operator char*()
3599 // operator void const*()
3600 // operator void volatile*()
3601 // operator /*comment*/ const char*()
3602 // operator volatile /*comment*/ char*()
3603 // operator Foo*()
3604 // operator C<T>*()
3605 // operator std::Foo*()
3606 // operator C<T>::D<U>*()
3607 // dependent on PointerAlignment style.
3608 if (Previous) {
3609 if (Previous->endsSequence(tok::kw_operator))
3610 return Style.PointerAlignment != FormatStyle::PAS_Left;
3611 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
3612 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
3613 (Style.SpaceAroundPointerQualifiers ==
3614 FormatStyle::SAPQ_After) ||
3615 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
3616 }
3617 }
3618 }
3619 const auto SpaceRequiredForArrayInitializerLSquare =
3620 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
3621 return Style.SpacesInContainerLiterals ||
3622 ((Style.Language == FormatStyle::LK_Proto ||
3623 Style.Language == FormatStyle::LK_TextProto) &&
3624 !Style.Cpp11BracedListStyle &&
3625 LSquareTok.endsSequence(tok::l_square, tok::colon,
3626 TT_SelectorName));
3627 };
3628 if (Left.is(tok::l_square)) {
3629 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
3630 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
3631 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
3632 TT_LambdaLSquare) &&
3633 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
3634 }
3635 if (Right.is(tok::r_square)) {
3636 return Right.MatchingParen &&
3637 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
3638 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
3639 Style)) ||
3640 (Style.SpacesInSquareBrackets &&
3641 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
3642 TT_StructuredBindingLSquare,
3643 TT_LambdaLSquare)) ||
3644 Right.MatchingParen->is(TT_AttributeParen));
3645 }
3646 if (Right.is(tok::l_square) &&
3647 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
3648 TT_DesignatedInitializerLSquare,
3649 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
3650 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
3651 !(!Left.is(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
3652 Right.is(TT_ArraySubscriptLSquare))) {
3653 return false;
3654 }
3655 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
3656 return !Left.Children.empty(); // No spaces in "{}".
3657 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
3658 (Right.is(tok::r_brace) && Right.MatchingParen &&
3659 Right.MatchingParen->isNot(BK_Block))) {
3660 return Style.Cpp11BracedListStyle ? Style.SpacesInParentheses : true;
3661 }
3662 if (Left.is(TT_BlockComment)) {
3663 // No whitespace in x(/*foo=*/1), except for JavaScript.
3664 return Style.isJavaScript() || !Left.TokenText.endswith("=*/");
3665 }
3666
3667 // Space between template and attribute.
3668 // e.g. template <typename T> [[nodiscard]] ...
3669 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
3670 return true;
3671 // Space before parentheses common for all languages
3672 if (Right.is(tok::l_paren)) {
3673 if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
3674 return spaceRequiredBeforeParens(Right);
3675 if (Left.isOneOf(TT_RequiresClause,
3676 TT_RequiresClauseInARequiresExpression)) {
3677 return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
3678 spaceRequiredBeforeParens(Right);
3679 }
3680 if (Left.is(TT_RequiresExpression)) {
3681 return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
3682 spaceRequiredBeforeParens(Right);
3683 }
3684 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
3685 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
3686 return true;
3687 }
3688 if (Left.is(TT_ForEachMacro)) {
3689 return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
3690 spaceRequiredBeforeParens(Right);
3691 }
3692 if (Left.is(TT_IfMacro)) {
3693 return Style.SpaceBeforeParensOptions.AfterIfMacros ||
3694 spaceRequiredBeforeParens(Right);
3695 }
3696 if (Line.Type == LT_ObjCDecl)
3697 return true;
3698 if (Left.is(tok::semi))
3699 return true;
3700 if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
3701 tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
3702 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
3703 Right.is(TT_ConditionLParen)) {
3704 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3705 spaceRequiredBeforeParens(Right);
3706 }
3707
3708 // TODO add Operator overloading specific Options to
3709 // SpaceBeforeParensOptions
3710 if (Right.is(TT_OverloadedOperatorLParen))
3711 return spaceRequiredBeforeParens(Right);
3712 // Function declaration or definition
3713 if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
3714 if (Line.mightBeFunctionDefinition()) {
3715 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
3716 spaceRequiredBeforeParens(Right);
3717 } else {
3718 return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
3719 spaceRequiredBeforeParens(Right);
3720 }
3721 }
3722 // Lambda
3723 if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
3724 Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
3725 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
3726 spaceRequiredBeforeParens(Right);
3727 }
3728 if (!Left.Previous || Left.Previous->isNot(tok::period)) {
3729 if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
3730 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3731 spaceRequiredBeforeParens(Right);
3732 }
3733 if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
3734 return ((!Line.MightBeFunctionDecl || !Left.Previous) &&
3735 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
3736 spaceRequiredBeforeParens(Right);
3737 }
3738
3739 if (Left.is(tok::r_square) && Left.MatchingParen &&
3740 Left.MatchingParen->Previous &&
3741 Left.MatchingParen->Previous->is(tok::kw_delete)) {
3742 return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
3743 spaceRequiredBeforeParens(Right);
3744 }
3745 }
3746 // Handle builtins like identifiers.
3747 if (Line.Type != LT_PreprocessorDirective &&
3748 (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
3749 return spaceRequiredBeforeParens(Right);
3750 }
3751 return false;
3752 }
3753 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
3754 return false;
3755 if (Right.is(TT_UnaryOperator)) {
3756 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
3757 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
3758 }
3759 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
3760 tok::r_paren) ||
3761 Left.isSimpleTypeSpecifier()) &&
3762 Right.is(tok::l_brace) && Right.getNextNonComment() &&
3763 Right.isNot(BK_Block)) {
3764 return false;
3765 }
3766 if (Left.is(tok::period) || Right.is(tok::period))
3767 return false;
3768 // u#str, U#str, L#str, u8#str
3769 // uR#str, UR#str, LR#str, u8R#str
3770 if (Right.is(tok::hash) && Left.is(tok::identifier) &&
3771 (Left.TokenText == "L" || Left.TokenText == "u" ||
3772 Left.TokenText == "U" || Left.TokenText == "u8" ||
3773 Left.TokenText == "LR" || Left.TokenText == "uR" ||
3774 Left.TokenText == "UR" || Left.TokenText == "u8R")) {
3775 return false;
3776 }
3777 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
3778 Left.MatchingParen->Previous &&
3779 (Left.MatchingParen->Previous->is(tok::period) ||
3780 Left.MatchingParen->Previous->is(tok::coloncolon))) {
3781 // Java call to generic function with explicit type:
3782 // A.<B<C<...>>>DoSomething();
3783 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
3784 return false;
3785 }
3786 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
3787 return false;
3788 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
3789 // Objective-C dictionary literal -> no space after opening brace.
3790 return false;
3791 }
3792 if (Right.is(tok::r_brace) && Right.MatchingParen &&
3793 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
3794 // Objective-C dictionary literal -> no space before closing brace.
3795 return false;
3796 }
3797 if (Right.getType() == TT_TrailingAnnotation &&
3798 Right.isOneOf(tok::amp, tok::ampamp) &&
3799 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
3800 (!Right.Next || Right.Next->is(tok::semi))) {
3801 // Match const and volatile ref-qualifiers without any additional
3802 // qualifiers such as
3803 // void Fn() const &;
3804 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
3805 }
3806
3807 return true;
3808 }
3809
spaceRequiredBefore(const AnnotatedLine & Line,const FormatToken & Right) const3810 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
3811 const FormatToken &Right) const {
3812 const FormatToken &Left = *Right.Previous;
3813
3814 // If the token is finalized don't touch it (as it could be in a
3815 // clang-format-off section).
3816 if (Left.Finalized)
3817 return Right.hasWhitespaceBefore();
3818
3819 // Never ever merge two words.
3820 if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
3821 return true;
3822
3823 // Leave a space between * and /* to avoid C4138 `comment end` found outside
3824 // of comment.
3825 if (Left.is(tok::star) && Right.is(tok::comment))
3826 return true;
3827
3828 if (Style.isCpp()) {
3829 // Space between UDL and dot: auto b = 4s .count();
3830 if (Right.is(tok::period) && Left.is(tok::numeric_constant))
3831 return true;
3832 // Space between import <iostream>.
3833 // or import .....;
3834 if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
3835 return true;
3836 // Space between `module :` and `import :`.
3837 if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
3838 Right.is(TT_ModulePartitionColon)) {
3839 return true;
3840 }
3841 // No space between import foo:bar but keep a space between import :bar;
3842 if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
3843 return false;
3844 // No space between :bar;
3845 if (Left.is(TT_ModulePartitionColon) &&
3846 Right.isOneOf(tok::identifier, tok::kw_private)) {
3847 return false;
3848 }
3849 if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
3850 Line.First->is(Keywords.kw_import)) {
3851 return false;
3852 }
3853 // Space in __attribute__((attr)) ::type.
3854 if (Left.is(TT_AttributeParen) && Right.is(tok::coloncolon))
3855 return true;
3856
3857 if (Left.is(tok::kw_operator))
3858 return Right.is(tok::coloncolon);
3859 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
3860 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
3861 return true;
3862 }
3863 if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
3864 Right.is(TT_TemplateOpener)) {
3865 return true;
3866 }
3867 } else if (Style.Language == FormatStyle::LK_Proto ||
3868 Style.Language == FormatStyle::LK_TextProto) {
3869 if (Right.is(tok::period) &&
3870 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
3871 Keywords.kw_repeated, Keywords.kw_extend)) {
3872 return true;
3873 }
3874 if (Right.is(tok::l_paren) &&
3875 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
3876 return true;
3877 }
3878 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
3879 return true;
3880 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
3881 if (Left.is(tok::slash) || Right.is(tok::slash))
3882 return false;
3883 if (Left.MatchingParen &&
3884 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
3885 Right.isOneOf(tok::l_brace, tok::less)) {
3886 return !Style.Cpp11BracedListStyle;
3887 }
3888 // A percent is probably part of a formatting specification, such as %lld.
3889 if (Left.is(tok::percent))
3890 return false;
3891 // Preserve the existence of a space before a percent for cases like 0x%04x
3892 // and "%d %d"
3893 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
3894 return Right.hasWhitespaceBefore();
3895 } else if (Style.isJson()) {
3896 if (Right.is(tok::colon))
3897 return false;
3898 } else if (Style.isCSharp()) {
3899 // Require spaces around '{' and before '}' unless they appear in
3900 // interpolated strings. Interpolated strings are merged into a single token
3901 // so cannot have spaces inserted by this function.
3902
3903 // No space between 'this' and '['
3904 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
3905 return false;
3906
3907 // No space between 'new' and '('
3908 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
3909 return false;
3910
3911 // Space before { (including space within '{ {').
3912 if (Right.is(tok::l_brace))
3913 return true;
3914
3915 // Spaces inside braces.
3916 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
3917 return true;
3918
3919 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
3920 return true;
3921
3922 // Spaces around '=>'.
3923 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
3924 return true;
3925
3926 // No spaces around attribute target colons
3927 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
3928 return false;
3929
3930 // space between type and variable e.g. Dictionary<string,string> foo;
3931 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
3932 return true;
3933
3934 // spaces inside square brackets.
3935 if (Left.is(tok::l_square) || Right.is(tok::r_square))
3936 return Style.SpacesInSquareBrackets;
3937
3938 // No space before ? in nullable types.
3939 if (Right.is(TT_CSharpNullable))
3940 return false;
3941
3942 // No space before null forgiving '!'.
3943 if (Right.is(TT_NonNullAssertion))
3944 return false;
3945
3946 // No space between consecutive commas '[,,]'.
3947 if (Left.is(tok::comma) && Right.is(tok::comma))
3948 return false;
3949
3950 // space after var in `var (key, value)`
3951 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
3952 return true;
3953
3954 // space between keywords and paren e.g. "using ("
3955 if (Right.is(tok::l_paren)) {
3956 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
3957 Keywords.kw_lock)) {
3958 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
3959 spaceRequiredBeforeParens(Right);
3960 }
3961 }
3962
3963 // space between method modifier and opening parenthesis of a tuple return
3964 // type
3965 if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
3966 tok::kw_virtual, tok::kw_extern, tok::kw_static,
3967 Keywords.kw_internal, Keywords.kw_abstract,
3968 Keywords.kw_sealed, Keywords.kw_override,
3969 Keywords.kw_async, Keywords.kw_unsafe) &&
3970 Right.is(tok::l_paren)) {
3971 return true;
3972 }
3973 } else if (Style.isJavaScript()) {
3974 if (Left.is(TT_FatArrow))
3975 return true;
3976 // for await ( ...
3977 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
3978 Left.Previous->is(tok::kw_for)) {
3979 return true;
3980 }
3981 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
3982 Right.MatchingParen) {
3983 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
3984 // An async arrow function, for example: `x = async () => foo();`,
3985 // as opposed to calling a function called async: `x = async();`
3986 if (Next && Next->is(TT_FatArrow))
3987 return true;
3988 }
3989 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
3990 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) {
3991 return false;
3992 }
3993 // In tagged template literals ("html`bar baz`"), there is no space between
3994 // the tag identifier and the template string.
3995 if (Keywords.IsJavaScriptIdentifier(Left,
3996 /* AcceptIdentifierName= */ false) &&
3997 Right.is(TT_TemplateString)) {
3998 return false;
3999 }
4000 if (Right.is(tok::star) &&
4001 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
4002 return false;
4003 }
4004 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
4005 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
4006 Keywords.kw_extends, Keywords.kw_implements)) {
4007 return true;
4008 }
4009 if (Right.is(tok::l_paren)) {
4010 // JS methods can use some keywords as names (e.g. `delete()`).
4011 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
4012 return false;
4013 // Valid JS method names can include keywords, e.g. `foo.delete()` or
4014 // `bar.instanceof()`. Recognize call positions by preceding period.
4015 if (Left.Previous && Left.Previous->is(tok::period) &&
4016 Left.Tok.getIdentifierInfo()) {
4017 return false;
4018 }
4019 // Additional unary JavaScript operators that need a space after.
4020 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
4021 tok::kw_void)) {
4022 return true;
4023 }
4024 }
4025 // `foo as const;` casts into a const type.
4026 if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
4027 return false;
4028 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
4029 tok::kw_const) ||
4030 // "of" is only a keyword if it appears after another identifier
4031 // (e.g. as "const x of y" in a for loop), or after a destructuring
4032 // operation (const [x, y] of z, const {a, b} of c).
4033 (Left.is(Keywords.kw_of) && Left.Previous &&
4034 (Left.Previous->is(tok::identifier) ||
4035 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
4036 (!Left.Previous || !Left.Previous->is(tok::period))) {
4037 return true;
4038 }
4039 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
4040 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
4041 return false;
4042 }
4043 if (Left.is(Keywords.kw_as) &&
4044 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
4045 return true;
4046 }
4047 if (Left.is(tok::kw_default) && Left.Previous &&
4048 Left.Previous->is(tok::kw_export)) {
4049 return true;
4050 }
4051 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
4052 return true;
4053 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
4054 return false;
4055 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
4056 return false;
4057 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
4058 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
4059 return false;
4060 }
4061 if (Left.is(tok::ellipsis))
4062 return false;
4063 if (Left.is(TT_TemplateCloser) &&
4064 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
4065 Keywords.kw_implements, Keywords.kw_extends)) {
4066 // Type assertions ('<type>expr') are not followed by whitespace. Other
4067 // locations that should have whitespace following are identified by the
4068 // above set of follower tokens.
4069 return false;
4070 }
4071 if (Right.is(TT_NonNullAssertion))
4072 return false;
4073 if (Left.is(TT_NonNullAssertion) &&
4074 Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
4075 return true; // "x! as string", "x! in y"
4076 }
4077 } else if (Style.Language == FormatStyle::LK_Java) {
4078 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
4079 return true;
4080 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
4081 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4082 spaceRequiredBeforeParens(Right);
4083 }
4084 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
4085 tok::kw_protected) ||
4086 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
4087 Keywords.kw_native)) &&
4088 Right.is(TT_TemplateOpener)) {
4089 return true;
4090 }
4091 } else if (Style.isVerilog()) {
4092 // Add space between things in a primitive's state table unless in a
4093 // transition like `(0?)`.
4094 if ((Left.is(TT_VerilogTableItem) &&
4095 !Right.isOneOf(tok::r_paren, tok::semi)) ||
4096 (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
4097 const FormatToken *Next = Right.getNextNonComment();
4098 return !(Next && Next->is(tok::r_paren));
4099 }
4100 // Don't add space within a delay like `#0`.
4101 if (Left.isNot(TT_BinaryOperator) &&
4102 Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
4103 return false;
4104 }
4105 // Add space after a delay.
4106 if (!Right.is(tok::semi) &&
4107 (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
4108 Left.endsSequence(tok::numeric_constant,
4109 Keywords.kw_verilogHashHash) ||
4110 (Left.is(tok::r_paren) && Left.MatchingParen &&
4111 Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
4112 return true;
4113 }
4114 // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
4115 // literal like `'{}`.
4116 if (Left.is(Keywords.kw_apostrophe) ||
4117 (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
4118 return false;
4119 }
4120 // Add space between the type name and dimension like `logic [1:0]`.
4121 if (Right.is(tok::l_square) &&
4122 Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
4123 return true;
4124 }
4125 // Don't add spaces between a casting type and the quote or repetition count
4126 // and the brace.
4127 if ((Right.is(Keywords.kw_apostrophe) ||
4128 (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
4129 !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
4130 Keywords.isVerilogWordOperator(Left)) &&
4131 (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
4132 tok::numeric_constant) ||
4133 Keywords.isWordLike(Left))) {
4134 return false;
4135 }
4136 // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
4137 if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
4138 return true;
4139 }
4140 if (Left.is(TT_ImplicitStringLiteral))
4141 return Right.hasWhitespaceBefore();
4142 if (Line.Type == LT_ObjCMethodDecl) {
4143 if (Left.is(TT_ObjCMethodSpecifier))
4144 return true;
4145 if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) {
4146 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
4147 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
4148 // method declaration.
4149 return false;
4150 }
4151 }
4152 if (Line.Type == LT_ObjCProperty &&
4153 (Right.is(tok::equal) || Left.is(tok::equal))) {
4154 return false;
4155 }
4156
4157 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
4158 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
4159 return true;
4160 }
4161 if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
4162 return true;
4163 if (Right.is(tok::comma))
4164 return false;
4165 if (Right.is(TT_ObjCBlockLParen))
4166 return true;
4167 if (Right.is(TT_CtorInitializerColon))
4168 return Style.SpaceBeforeCtorInitializerColon;
4169 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
4170 return false;
4171 if (Right.is(TT_RangeBasedForLoopColon) &&
4172 !Style.SpaceBeforeRangeBasedForLoopColon) {
4173 return false;
4174 }
4175 if (Left.is(TT_BitFieldColon)) {
4176 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4177 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
4178 }
4179 if (Right.is(tok::colon)) {
4180 if (Right.is(TT_GotoLabelColon) ||
4181 (!Style.isVerilog() &&
4182 Line.First->isOneOf(tok::kw_default, tok::kw_case))) {
4183 return Style.SpaceBeforeCaseColon;
4184 }
4185 if (Line.First->isOneOf(tok::kw_default, tok::kw_case))
4186 return Style.SpaceBeforeCaseColon;
4187 const FormatToken *Next = Right.getNextNonComment();
4188 if (!Next || Next->is(tok::semi))
4189 return false;
4190 if (Right.is(TT_ObjCMethodExpr))
4191 return false;
4192 if (Left.is(tok::question))
4193 return false;
4194 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
4195 return false;
4196 if (Right.is(TT_DictLiteral))
4197 return Style.SpacesInContainerLiterals;
4198 if (Right.is(TT_AttributeColon))
4199 return false;
4200 if (Right.is(TT_CSharpNamedArgumentColon))
4201 return false;
4202 if (Right.is(TT_GenericSelectionColon))
4203 return false;
4204 if (Right.is(TT_BitFieldColon)) {
4205 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
4206 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
4207 }
4208 return true;
4209 }
4210 // Do not merge "- -" into "--".
4211 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
4212 Right.isOneOf(tok::minus, tok::minusminus)) ||
4213 (Left.isOneOf(tok::plus, tok::plusplus) &&
4214 Right.isOneOf(tok::plus, tok::plusplus))) {
4215 return true;
4216 }
4217 if (Left.is(TT_UnaryOperator)) {
4218 if (!Right.is(tok::l_paren)) {
4219 // The alternative operators for ~ and ! are "compl" and "not".
4220 // If they are used instead, we do not want to combine them with
4221 // the token to the right, unless that is a left paren.
4222 if (Left.is(tok::exclaim) && Left.TokenText == "not")
4223 return true;
4224 if (Left.is(tok::tilde) && Left.TokenText == "compl")
4225 return true;
4226 // Lambda captures allow for a lone &, so "&]" needs to be properly
4227 // handled.
4228 if (Left.is(tok::amp) && Right.is(tok::r_square))
4229 return Style.SpacesInSquareBrackets;
4230 }
4231 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
4232 Right.is(TT_BinaryOperator);
4233 }
4234
4235 // If the next token is a binary operator or a selector name, we have
4236 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
4237 if (Left.is(TT_CastRParen)) {
4238 return Style.SpaceAfterCStyleCast ||
4239 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
4240 }
4241
4242 auto ShouldAddSpacesInAngles = [this, &Right]() {
4243 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
4244 return true;
4245 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
4246 return Right.hasWhitespaceBefore();
4247 return false;
4248 };
4249
4250 if (Left.is(tok::greater) && Right.is(tok::greater)) {
4251 if (Style.Language == FormatStyle::LK_TextProto ||
4252 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
4253 return !Style.Cpp11BracedListStyle;
4254 }
4255 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
4256 ((Style.Standard < FormatStyle::LS_Cpp11) ||
4257 ShouldAddSpacesInAngles());
4258 }
4259 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
4260 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
4261 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
4262 return false;
4263 }
4264 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
4265 Right.getPrecedence() == prec::Assignment) {
4266 return false;
4267 }
4268 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
4269 (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
4270 return false;
4271 }
4272 if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
4273 // Generally don't remove existing spaces between an identifier and "::".
4274 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
4275 // this turns out to be too lenient, add analysis of the identifier itself.
4276 return Right.hasWhitespaceBefore();
4277 }
4278 if (Right.is(tok::coloncolon) &&
4279 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
4280 // Put a space between < and :: in vector< ::std::string >
4281 return (Left.is(TT_TemplateOpener) &&
4282 ((Style.Standard < FormatStyle::LS_Cpp11) ||
4283 ShouldAddSpacesInAngles())) ||
4284 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
4285 tok::kw___super, TT_TemplateOpener,
4286 TT_TemplateCloser)) ||
4287 (Left.is(tok::l_paren) && Style.SpacesInParentheses);
4288 }
4289 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
4290 return ShouldAddSpacesInAngles();
4291 // Space before TT_StructuredBindingLSquare.
4292 if (Right.is(TT_StructuredBindingLSquare)) {
4293 return !Left.isOneOf(tok::amp, tok::ampamp) ||
4294 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
4295 }
4296 // Space before & or && following a TT_StructuredBindingLSquare.
4297 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
4298 Right.isOneOf(tok::amp, tok::ampamp)) {
4299 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4300 }
4301 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
4302 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
4303 !Right.is(tok::r_paren))) {
4304 return true;
4305 }
4306 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
4307 Left.MatchingParen &&
4308 Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
4309 return false;
4310 }
4311 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
4312 Line.Type == LT_ImportStatement) {
4313 return true;
4314 }
4315 if (Right.is(TT_TrailingUnaryOperator))
4316 return false;
4317 if (Left.is(TT_RegexLiteral))
4318 return false;
4319 return spaceRequiredBetween(Line, Left, Right);
4320 }
4321
4322 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
isAllmanBrace(const FormatToken & Tok)4323 static bool isAllmanBrace(const FormatToken &Tok) {
4324 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4325 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
4326 }
4327
4328 // Returns 'true' if 'Tok' is a function argument.
IsFunctionArgument(const FormatToken & Tok)4329 static bool IsFunctionArgument(const FormatToken &Tok) {
4330 return Tok.MatchingParen && Tok.MatchingParen->Next &&
4331 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
4332 }
4333
4334 static bool
isItAnEmptyLambdaAllowed(const FormatToken & Tok,FormatStyle::ShortLambdaStyle ShortLambdaOption)4335 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
4336 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
4337 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
4338 }
4339
isAllmanLambdaBrace(const FormatToken & Tok)4340 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
4341 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
4342 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
4343 }
4344
4345 // Returns the first token on the line that is not a comment.
getFirstNonComment(const AnnotatedLine & Line)4346 static const FormatToken *getFirstNonComment(const AnnotatedLine &Line) {
4347 const FormatToken *Next = Line.First;
4348 if (!Next)
4349 return Next;
4350 if (Next->is(tok::comment))
4351 Next = Next->getNextNonComment();
4352 return Next;
4353 }
4354
mustBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const4355 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
4356 const FormatToken &Right) const {
4357 const FormatToken &Left = *Right.Previous;
4358 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
4359 return true;
4360
4361 if (Style.isCSharp()) {
4362 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
4363 Style.BraceWrapping.AfterFunction) {
4364 return true;
4365 }
4366 if (Right.is(TT_CSharpNamedArgumentColon) ||
4367 Left.is(TT_CSharpNamedArgumentColon)) {
4368 return false;
4369 }
4370 if (Right.is(TT_CSharpGenericTypeConstraint))
4371 return true;
4372 if (Right.Next && Right.Next->is(TT_FatArrow) &&
4373 (Right.is(tok::numeric_constant) ||
4374 (Right.is(tok::identifier) && Right.TokenText == "_"))) {
4375 return true;
4376 }
4377
4378 // Break after C# [...] and before public/protected/private/internal.
4379 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
4380 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
4381 Right.is(Keywords.kw_internal))) {
4382 return true;
4383 }
4384 // Break between ] and [ but only when there are really 2 attributes.
4385 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
4386 Left.is(tok::r_square) && Right.is(tok::l_square)) {
4387 return true;
4388 }
4389
4390 } else if (Style.isJavaScript()) {
4391 // FIXME: This might apply to other languages and token kinds.
4392 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
4393 Left.Previous->is(tok::string_literal)) {
4394 return true;
4395 }
4396 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
4397 Left.Previous && Left.Previous->is(tok::equal) &&
4398 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
4399 tok::kw_const) &&
4400 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
4401 // above.
4402 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
4403 // Object literals on the top level of a file are treated as "enum-style".
4404 // Each key/value pair is put on a separate line, instead of bin-packing.
4405 return true;
4406 }
4407 if (Left.is(tok::l_brace) && Line.Level == 0 &&
4408 (Line.startsWith(tok::kw_enum) ||
4409 Line.startsWith(tok::kw_const, tok::kw_enum) ||
4410 Line.startsWith(tok::kw_export, tok::kw_enum) ||
4411 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
4412 // JavaScript top-level enum key/value pairs are put on separate lines
4413 // instead of bin-packing.
4414 return true;
4415 }
4416 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
4417 Left.Previous->is(TT_FatArrow)) {
4418 // JS arrow function (=> {...}).
4419 switch (Style.AllowShortLambdasOnASingleLine) {
4420 case FormatStyle::SLS_All:
4421 return false;
4422 case FormatStyle::SLS_None:
4423 return true;
4424 case FormatStyle::SLS_Empty:
4425 return !Left.Children.empty();
4426 case FormatStyle::SLS_Inline:
4427 // allow one-lining inline (e.g. in function call args) and empty arrow
4428 // functions.
4429 return (Left.NestingLevel == 0 && Line.Level == 0) &&
4430 !Left.Children.empty();
4431 }
4432 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
4433 }
4434
4435 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
4436 !Left.Children.empty()) {
4437 // Support AllowShortFunctionsOnASingleLine for JavaScript.
4438 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
4439 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
4440 (Left.NestingLevel == 0 && Line.Level == 0 &&
4441 Style.AllowShortFunctionsOnASingleLine &
4442 FormatStyle::SFS_InlineOnly);
4443 }
4444 } else if (Style.Language == FormatStyle::LK_Java) {
4445 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
4446 Right.Next->is(tok::string_literal)) {
4447 return true;
4448 }
4449 } else if (Style.isVerilog()) {
4450 // Break after labels. In Verilog labels don't have the 'case' keyword, so
4451 // it is hard to identify them in UnwrappedLineParser.
4452 if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
4453 return true;
4454 } else if (Style.Language == FormatStyle::LK_Cpp ||
4455 Style.Language == FormatStyle::LK_ObjC ||
4456 Style.Language == FormatStyle::LK_Proto ||
4457 Style.Language == FormatStyle::LK_TableGen ||
4458 Style.Language == FormatStyle::LK_TextProto) {
4459 if (Left.isStringLiteral() && Right.isStringLiteral())
4460 return true;
4461 }
4462
4463 // Basic JSON newline processing.
4464 if (Style.isJson()) {
4465 // Always break after a JSON record opener.
4466 // {
4467 // }
4468 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
4469 return true;
4470 // Always break after a JSON array opener based on BreakArrays.
4471 if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
4472 Right.isNot(tok::r_square)) ||
4473 Left.is(tok::comma)) {
4474 if (Right.is(tok::l_brace))
4475 return true;
4476 // scan to the right if an we see an object or an array inside
4477 // then break.
4478 for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
4479 if (Tok->isOneOf(tok::l_brace, tok::l_square))
4480 return true;
4481 if (Tok->isOneOf(tok::r_brace, tok::r_square))
4482 break;
4483 }
4484 return Style.BreakArrays;
4485 }
4486 }
4487
4488 if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
4489 Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
4490 return true;
4491 }
4492
4493 // If the last token before a '}', ']', or ')' is a comma or a trailing
4494 // comment, the intention is to insert a line break after it in order to make
4495 // shuffling around entries easier. Import statements, especially in
4496 // JavaScript, can be an exception to this rule.
4497 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
4498 const FormatToken *BeforeClosingBrace = nullptr;
4499 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
4500 (Style.isJavaScript() && Left.is(tok::l_paren))) &&
4501 Left.isNot(BK_Block) && Left.MatchingParen) {
4502 BeforeClosingBrace = Left.MatchingParen->Previous;
4503 } else if (Right.MatchingParen &&
4504 (Right.MatchingParen->isOneOf(tok::l_brace,
4505 TT_ArrayInitializerLSquare) ||
4506 (Style.isJavaScript() &&
4507 Right.MatchingParen->is(tok::l_paren)))) {
4508 BeforeClosingBrace = &Left;
4509 }
4510 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
4511 BeforeClosingBrace->isTrailingComment())) {
4512 return true;
4513 }
4514 }
4515
4516 if (Right.is(tok::comment)) {
4517 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
4518 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
4519 }
4520 if (Left.isTrailingComment())
4521 return true;
4522 if (Left.IsUnterminatedLiteral)
4523 return true;
4524 if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
4525 Right.Next->is(tok::string_literal)) {
4526 return true;
4527 }
4528 if (Right.is(TT_RequiresClause)) {
4529 switch (Style.RequiresClausePosition) {
4530 case FormatStyle::RCPS_OwnLine:
4531 case FormatStyle::RCPS_WithFollowing:
4532 return true;
4533 default:
4534 break;
4535 }
4536 }
4537 // Can break after template<> declaration
4538 if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
4539 Left.MatchingParen->NestingLevel == 0) {
4540 // Put concepts on the next line e.g.
4541 // template<typename T>
4542 // concept ...
4543 if (Right.is(tok::kw_concept))
4544 return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
4545 return Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes;
4546 }
4547 if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
4548 switch (Style.RequiresClausePosition) {
4549 case FormatStyle::RCPS_OwnLine:
4550 case FormatStyle::RCPS_WithPreceding:
4551 return true;
4552 default:
4553 break;
4554 }
4555 }
4556 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
4557 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
4558 (Left.is(TT_CtorInitializerComma) ||
4559 Right.is(TT_CtorInitializerColon))) {
4560 return true;
4561 }
4562
4563 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
4564 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
4565 return true;
4566 }
4567 }
4568 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
4569 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
4570 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
4571 return true;
4572 }
4573 // Break only if we have multiple inheritance.
4574 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
4575 Right.is(TT_InheritanceComma)) {
4576 return true;
4577 }
4578 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
4579 Left.is(TT_InheritanceComma)) {
4580 return true;
4581 }
4582 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) {
4583 // Multiline raw string literals are special wrt. line breaks. The author
4584 // has made a deliberate choice and might have aligned the contents of the
4585 // string literal accordingly. Thus, we try keep existing line breaks.
4586 return Right.IsMultiline && Right.NewlinesBefore > 0;
4587 }
4588 if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous &&
4589 Left.Previous->is(tok::equal))) &&
4590 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
4591 // Don't put enums or option definitions onto single lines in protocol
4592 // buffers.
4593 return true;
4594 }
4595 if (Right.is(TT_InlineASMBrace))
4596 return Right.HasUnescapedNewline;
4597
4598 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
4599 auto FirstNonComment = getFirstNonComment(Line);
4600 bool AccessSpecifier =
4601 FirstNonComment &&
4602 FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
4603 tok::kw_private, tok::kw_protected);
4604
4605 if (Style.BraceWrapping.AfterEnum) {
4606 if (Line.startsWith(tok::kw_enum) ||
4607 Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
4608 return true;
4609 }
4610 // Ensure BraceWrapping for `public enum A {`.
4611 if (AccessSpecifier && FirstNonComment->Next &&
4612 FirstNonComment->Next->is(tok::kw_enum)) {
4613 return true;
4614 }
4615 }
4616
4617 // Ensure BraceWrapping for `public interface A {`.
4618 if (Style.BraceWrapping.AfterClass &&
4619 ((AccessSpecifier && FirstNonComment->Next &&
4620 FirstNonComment->Next->is(Keywords.kw_interface)) ||
4621 Line.startsWith(Keywords.kw_interface))) {
4622 return true;
4623 }
4624
4625 return (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
4626 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
4627 }
4628
4629 if (Left.is(TT_ObjCBlockLBrace) &&
4630 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
4631 return true;
4632 }
4633
4634 // Ensure wrapping after __attribute__((XX)) and @interface etc.
4635 if (Left.is(TT_AttributeParen) && Right.is(TT_ObjCDecl))
4636 return true;
4637
4638 if (Left.is(TT_LambdaLBrace)) {
4639 if (IsFunctionArgument(Left) &&
4640 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
4641 return false;
4642 }
4643
4644 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
4645 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
4646 (!Left.Children.empty() &&
4647 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
4648 return true;
4649 }
4650 }
4651
4652 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
4653 Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) {
4654 return true;
4655 }
4656
4657 // Put multiple Java annotation on a new line.
4658 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4659 Left.is(TT_LeadingJavaAnnotation) &&
4660 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
4661 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
4662 return true;
4663 }
4664
4665 if (Right.is(TT_ProtoExtensionLSquare))
4666 return true;
4667
4668 // In text proto instances if a submessage contains at least 2 entries and at
4669 // least one of them is a submessage, like A { ... B { ... } ... },
4670 // put all of the entries of A on separate lines by forcing the selector of
4671 // the submessage B to be put on a newline.
4672 //
4673 // Example: these can stay on one line:
4674 // a { scalar_1: 1 scalar_2: 2 }
4675 // a { b { key: value } }
4676 //
4677 // and these entries need to be on a new line even if putting them all in one
4678 // line is under the column limit:
4679 // a {
4680 // scalar: 1
4681 // b { key: value }
4682 // }
4683 //
4684 // We enforce this by breaking before a submessage field that has previous
4685 // siblings, *and* breaking before a field that follows a submessage field.
4686 //
4687 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
4688 // the TT_SelectorName there, but we don't want to break inside the brackets.
4689 //
4690 // Another edge case is @submessage { key: value }, which is a common
4691 // substitution placeholder. In this case we want to keep `@` and `submessage`
4692 // together.
4693 //
4694 // We ensure elsewhere that extensions are always on their own line.
4695 if ((Style.Language == FormatStyle::LK_Proto ||
4696 Style.Language == FormatStyle::LK_TextProto) &&
4697 Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
4698 // Keep `@submessage` together in:
4699 // @submessage { key: value }
4700 if (Left.is(tok::at))
4701 return false;
4702 // Look for the scope opener after selector in cases like:
4703 // selector { ...
4704 // selector: { ...
4705 // selector: @base { ...
4706 FormatToken *LBrace = Right.Next;
4707 if (LBrace && LBrace->is(tok::colon)) {
4708 LBrace = LBrace->Next;
4709 if (LBrace && LBrace->is(tok::at)) {
4710 LBrace = LBrace->Next;
4711 if (LBrace)
4712 LBrace = LBrace->Next;
4713 }
4714 }
4715 if (LBrace &&
4716 // The scope opener is one of {, [, <:
4717 // selector { ... }
4718 // selector [ ... ]
4719 // selector < ... >
4720 //
4721 // In case of selector { ... }, the l_brace is TT_DictLiteral.
4722 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
4723 // so we check for immediately following r_brace.
4724 ((LBrace->is(tok::l_brace) &&
4725 (LBrace->is(TT_DictLiteral) ||
4726 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
4727 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
4728 // If Left.ParameterCount is 0, then this submessage entry is not the
4729 // first in its parent submessage, and we want to break before this entry.
4730 // If Left.ParameterCount is greater than 0, then its parent submessage
4731 // might contain 1 or more entries and we want to break before this entry
4732 // if it contains at least 2 entries. We deal with this case later by
4733 // detecting and breaking before the next entry in the parent submessage.
4734 if (Left.ParameterCount == 0)
4735 return true;
4736 // However, if this submessage is the first entry in its parent
4737 // submessage, Left.ParameterCount might be 1 in some cases.
4738 // We deal with this case later by detecting an entry
4739 // following a closing paren of this submessage.
4740 }
4741
4742 // If this is an entry immediately following a submessage, it will be
4743 // preceded by a closing paren of that submessage, like in:
4744 // left---. .---right
4745 // v v
4746 // sub: { ... } key: value
4747 // If there was a comment between `}` an `key` above, then `key` would be
4748 // put on a new line anyways.
4749 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
4750 return true;
4751 }
4752
4753 // Deal with lambda arguments in C++ - we want consistent line breaks whether
4754 // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
4755 // as aggressive line breaks are placed when the lambda is not the last arg.
4756 if ((Style.Language == FormatStyle::LK_Cpp ||
4757 Style.Language == FormatStyle::LK_ObjC) &&
4758 Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
4759 !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
4760 // Multiple lambdas in the same function call force line breaks.
4761 if (Left.BlockParameterCount > 1)
4762 return true;
4763
4764 // A lambda followed by another arg forces a line break.
4765 if (!Left.Role)
4766 return false;
4767 auto Comma = Left.Role->lastComma();
4768 if (!Comma)
4769 return false;
4770 auto Next = Comma->getNextNonComment();
4771 if (!Next)
4772 return false;
4773 if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
4774 return true;
4775 }
4776
4777 return false;
4778 }
4779
canBreakBefore(const AnnotatedLine & Line,const FormatToken & Right) const4780 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
4781 const FormatToken &Right) const {
4782 const FormatToken &Left = *Right.Previous;
4783 // Language-specific stuff.
4784 if (Style.isCSharp()) {
4785 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
4786 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
4787 return false;
4788 }
4789 // Only break after commas for generic type constraints.
4790 if (Line.First->is(TT_CSharpGenericTypeConstraint))
4791 return Left.is(TT_CSharpGenericTypeConstraintComma);
4792 // Keep nullable operators attached to their identifiers.
4793 if (Right.is(TT_CSharpNullable))
4794 return false;
4795 } else if (Style.Language == FormatStyle::LK_Java) {
4796 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
4797 Keywords.kw_implements)) {
4798 return false;
4799 }
4800 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
4801 Keywords.kw_implements)) {
4802 return true;
4803 }
4804 } else if (Style.isJavaScript()) {
4805 const FormatToken *NonComment = Right.getPreviousNonComment();
4806 if (NonComment &&
4807 NonComment->isOneOf(
4808 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
4809 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
4810 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
4811 Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
4812 Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
4813 Keywords.kw_await)) {
4814 return false; // Otherwise automatic semicolon insertion would trigger.
4815 }
4816 if (Right.NestingLevel == 0 &&
4817 (Left.Tok.getIdentifierInfo() ||
4818 Left.isOneOf(tok::r_square, tok::r_paren)) &&
4819 Right.isOneOf(tok::l_square, tok::l_paren)) {
4820 return false; // Otherwise automatic semicolon insertion would trigger.
4821 }
4822 if (NonComment && NonComment->is(tok::identifier) &&
4823 NonComment->TokenText == "asserts") {
4824 return false;
4825 }
4826 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
4827 return false;
4828 if (Left.is(TT_JsTypeColon))
4829 return true;
4830 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
4831 if (Left.is(tok::exclaim) && Right.is(tok::colon))
4832 return false;
4833 // Look for is type annotations like:
4834 // function f(): a is B { ... }
4835 // Do not break before is in these cases.
4836 if (Right.is(Keywords.kw_is)) {
4837 const FormatToken *Next = Right.getNextNonComment();
4838 // If `is` is followed by a colon, it's likely that it's a dict key, so
4839 // ignore it for this check.
4840 // For example this is common in Polymer:
4841 // Polymer({
4842 // is: 'name',
4843 // ...
4844 // });
4845 if (!Next || !Next->is(tok::colon))
4846 return false;
4847 }
4848 if (Left.is(Keywords.kw_in))
4849 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
4850 if (Right.is(Keywords.kw_in))
4851 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
4852 if (Right.is(Keywords.kw_as))
4853 return false; // must not break before as in 'x as type' casts
4854 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
4855 // extends and infer can appear as keywords in conditional types:
4856 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
4857 // do not break before them, as the expressions are subject to ASI.
4858 return false;
4859 }
4860 if (Left.is(Keywords.kw_as))
4861 return true;
4862 if (Left.is(TT_NonNullAssertion))
4863 return true;
4864 if (Left.is(Keywords.kw_declare) &&
4865 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
4866 Keywords.kw_function, tok::kw_class, tok::kw_enum,
4867 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
4868 Keywords.kw_let, tok::kw_const)) {
4869 // See grammar for 'declare' statements at:
4870 // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
4871 return false;
4872 }
4873 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
4874 Right.isOneOf(tok::identifier, tok::string_literal)) {
4875 return false; // must not break in "module foo { ...}"
4876 }
4877 if (Right.is(TT_TemplateString) && Right.closesScope())
4878 return false;
4879 // Don't split tagged template literal so there is a break between the tag
4880 // identifier and template string.
4881 if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
4882 return false;
4883 if (Left.is(TT_TemplateString) && Left.opensScope())
4884 return true;
4885 }
4886
4887 if (Left.is(tok::at))
4888 return false;
4889 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
4890 return false;
4891 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
4892 return !Right.is(tok::l_paren);
4893 if (Right.is(TT_PointerOrReference)) {
4894 return Line.IsMultiVariableDeclStmt ||
4895 (getTokenPointerOrReferenceAlignment(Right) ==
4896 FormatStyle::PAS_Right &&
4897 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
4898 }
4899 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
4900 Right.is(tok::kw_operator)) {
4901 return true;
4902 }
4903 if (Left.is(TT_PointerOrReference))
4904 return false;
4905 if (Right.isTrailingComment()) {
4906 // We rely on MustBreakBefore being set correctly here as we should not
4907 // change the "binding" behavior of a comment.
4908 // The first comment in a braced lists is always interpreted as belonging to
4909 // the first list element. Otherwise, it should be placed outside of the
4910 // list.
4911 return Left.is(BK_BracedInit) ||
4912 (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
4913 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
4914 }
4915 if (Left.is(tok::question) && Right.is(tok::colon))
4916 return false;
4917 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
4918 return Style.BreakBeforeTernaryOperators;
4919 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
4920 return !Style.BreakBeforeTernaryOperators;
4921 if (Left.is(TT_InheritanceColon))
4922 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
4923 if (Right.is(TT_InheritanceColon))
4924 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
4925 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
4926 Left.isNot(TT_SelectorName)) {
4927 return true;
4928 }
4929
4930 if (Right.is(tok::colon) &&
4931 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
4932 return false;
4933 }
4934 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
4935 if (Style.Language == FormatStyle::LK_Proto ||
4936 Style.Language == FormatStyle::LK_TextProto) {
4937 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
4938 return false;
4939 // Prevent cases like:
4940 //
4941 // submessage:
4942 // { key: valueeeeeeeeeeee }
4943 //
4944 // when the snippet does not fit into one line.
4945 // Prefer:
4946 //
4947 // submessage: {
4948 // key: valueeeeeeeeeeee
4949 // }
4950 //
4951 // instead, even if it is longer by one line.
4952 //
4953 // Note that this allows the "{" to go over the column limit
4954 // when the column limit is just between ":" and "{", but that does
4955 // not happen too often and alternative formattings in this case are
4956 // not much better.
4957 //
4958 // The code covers the cases:
4959 //
4960 // submessage: { ... }
4961 // submessage: < ... >
4962 // repeated: [ ... ]
4963 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
4964 Right.is(TT_DictLiteral)) ||
4965 Right.is(TT_ArrayInitializerLSquare)) {
4966 return false;
4967 }
4968 }
4969 return true;
4970 }
4971 if (Right.is(tok::r_square) && Right.MatchingParen &&
4972 Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
4973 return false;
4974 }
4975 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
4976 Right.Next->is(TT_ObjCMethodExpr))) {
4977 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
4978 }
4979 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
4980 return true;
4981 if (Right.is(tok::kw_concept))
4982 return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
4983 if (Right.is(TT_RequiresClause))
4984 return true;
4985 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
4986 return true;
4987 if (Left.ClosesRequiresClause)
4988 return true;
4989 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
4990 TT_OverloadedOperator)) {
4991 return false;
4992 }
4993 if (Left.is(TT_RangeBasedForLoopColon))
4994 return true;
4995 if (Right.is(TT_RangeBasedForLoopColon))
4996 return false;
4997 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
4998 return true;
4999 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
5000 (Left.is(tok::less) && Right.is(tok::less))) {
5001 return false;
5002 }
5003 if (Right.is(TT_BinaryOperator) &&
5004 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
5005 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
5006 Right.getPrecedence() != prec::Assignment)) {
5007 return true;
5008 }
5009 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
5010 Left.is(tok::kw_operator)) {
5011 return false;
5012 }
5013 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
5014 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
5015 return false;
5016 }
5017 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
5018 !Style.Cpp11BracedListStyle) {
5019 return false;
5020 }
5021 if (Left.is(tok::l_paren) &&
5022 Left.isOneOf(TT_AttributeParen, TT_TypeDeclarationParen)) {
5023 return false;
5024 }
5025 if (Left.is(tok::l_paren) && Left.Previous &&
5026 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
5027 return false;
5028 }
5029 if (Right.is(TT_ImplicitStringLiteral))
5030 return false;
5031
5032 if (Right.is(TT_TemplateCloser))
5033 return false;
5034 if (Right.is(tok::r_square) && Right.MatchingParen &&
5035 Right.MatchingParen->is(TT_LambdaLSquare)) {
5036 return false;
5037 }
5038
5039 // We only break before r_brace if there was a corresponding break before
5040 // the l_brace, which is tracked by BreakBeforeClosingBrace.
5041 if (Right.is(tok::r_brace))
5042 return Right.MatchingParen && Right.MatchingParen->is(BK_Block);
5043
5044 // We only break before r_paren if we're in a block indented context.
5045 if (Right.is(tok::r_paren)) {
5046 if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
5047 !Right.MatchingParen) {
5048 return false;
5049 }
5050 auto Next = Right.Next;
5051 if (Next && Next->is(tok::r_paren))
5052 Next = Next->Next;
5053 if (Next && Next->is(tok::l_paren))
5054 return false;
5055 const FormatToken *Previous = Right.MatchingParen->Previous;
5056 return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
5057 }
5058
5059 // Allow breaking after a trailing annotation, e.g. after a method
5060 // declaration.
5061 if (Left.is(TT_TrailingAnnotation)) {
5062 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
5063 tok::less, tok::coloncolon);
5064 }
5065
5066 if (Right.is(tok::kw___attribute) ||
5067 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) {
5068 return !Left.is(TT_AttributeSquare);
5069 }
5070
5071 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
5072 return true;
5073
5074 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
5075 return true;
5076
5077 if (Left.is(TT_CtorInitializerColon)) {
5078 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5079 (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
5080 }
5081 if (Right.is(TT_CtorInitializerColon))
5082 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
5083 if (Left.is(TT_CtorInitializerComma) &&
5084 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5085 return false;
5086 }
5087 if (Right.is(TT_CtorInitializerComma) &&
5088 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
5089 return true;
5090 }
5091 if (Left.is(TT_InheritanceComma) &&
5092 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5093 return false;
5094 }
5095 if (Right.is(TT_InheritanceComma) &&
5096 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
5097 return true;
5098 }
5099 if (Left.is(TT_ArrayInitializerLSquare))
5100 return true;
5101 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
5102 return true;
5103 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
5104 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
5105 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
5106 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
5107 Left.getPrecedence() == prec::Assignment)) {
5108 return true;
5109 }
5110 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
5111 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
5112 return false;
5113 }
5114
5115 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
5116 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
5117 if (isAllmanLambdaBrace(Left))
5118 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
5119 if (isAllmanLambdaBrace(Right))
5120 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
5121 }
5122
5123 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
5124 tok::kw_class, tok::kw_struct, tok::comment) ||
5125 Right.isMemberAccess() ||
5126 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
5127 tok::colon, tok::l_square, tok::at) ||
5128 (Left.is(tok::r_paren) &&
5129 Right.isOneOf(tok::identifier, tok::kw_const)) ||
5130 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
5131 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
5132 }
5133
printDebugInfo(const AnnotatedLine & Line) const5134 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
5135 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
5136 << ", T=" << Line.Type << ", C=" << Line.IsContinuation
5137 << "):\n";
5138 const FormatToken *Tok = Line.First;
5139 while (Tok) {
5140 llvm::errs() << " M=" << Tok->MustBreakBefore
5141 << " C=" << Tok->CanBreakBefore
5142 << " T=" << getTokenTypeName(Tok->getType())
5143 << " S=" << Tok->SpacesRequiredBefore
5144 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
5145 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
5146 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
5147 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
5148 for (prec::Level LParen : Tok->FakeLParens)
5149 llvm::errs() << LParen << "/";
5150 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
5151 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
5152 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
5153 if (!Tok->Next)
5154 assert(Tok == Line.Last);
5155 Tok = Tok->Next;
5156 }
5157 llvm::errs() << "----\n";
5158 }
5159
5160 FormatStyle::PointerAlignmentStyle
getTokenReferenceAlignment(const FormatToken & Reference) const5161 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
5162 assert(Reference.isOneOf(tok::amp, tok::ampamp));
5163 switch (Style.ReferenceAlignment) {
5164 case FormatStyle::RAS_Pointer:
5165 return Style.PointerAlignment;
5166 case FormatStyle::RAS_Left:
5167 return FormatStyle::PAS_Left;
5168 case FormatStyle::RAS_Right:
5169 return FormatStyle::PAS_Right;
5170 case FormatStyle::RAS_Middle:
5171 return FormatStyle::PAS_Middle;
5172 }
5173 assert(0); //"Unhandled value of ReferenceAlignment"
5174 return Style.PointerAlignment;
5175 }
5176
5177 FormatStyle::PointerAlignmentStyle
getTokenPointerOrReferenceAlignment(const FormatToken & PointerOrReference) const5178 TokenAnnotator::getTokenPointerOrReferenceAlignment(
5179 const FormatToken &PointerOrReference) const {
5180 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
5181 switch (Style.ReferenceAlignment) {
5182 case FormatStyle::RAS_Pointer:
5183 return Style.PointerAlignment;
5184 case FormatStyle::RAS_Left:
5185 return FormatStyle::PAS_Left;
5186 case FormatStyle::RAS_Right:
5187 return FormatStyle::PAS_Right;
5188 case FormatStyle::RAS_Middle:
5189 return FormatStyle::PAS_Middle;
5190 }
5191 }
5192 assert(PointerOrReference.is(tok::star));
5193 return Style.PointerAlignment;
5194 }
5195
5196 } // namespace format
5197 } // namespace clang
5198