1 //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "UnwrappedLineFormatter.h"
10 #include "NamespaceEndCommentsFixer.h"
11 #include "WhitespaceManager.h"
12 #include "llvm/Support/Debug.h"
13 #include <queue>
14 
15 #define DEBUG_TYPE "format-formatter"
16 
17 namespace clang {
18 namespace format {
19 
20 namespace {
21 
22 bool startsExternCBlock(const AnnotatedLine &Line) {
23   const FormatToken *Next = Line.First->getNextNonComment();
24   const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
25   return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
26          NextNext && NextNext->is(tok::l_brace);
27 }
28 
29 bool isRecordLBrace(const FormatToken &Tok) {
30   return Tok.isOneOf(TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace,
31                      TT_StructLBrace, TT_UnionLBrace);
32 }
33 
34 /// Tracks the indent level of \c AnnotatedLines across levels.
35 ///
36 /// \c nextLine must be called for each \c AnnotatedLine, after which \c
37 /// getIndent() will return the indent for the last line \c nextLine was called
38 /// with.
39 /// If the line is not formatted (and thus the indent does not change), calling
40 /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
41 /// subsequent lines on the same level to be indented at the same level as the
42 /// given line.
43 class LevelIndentTracker {
44 public:
45   LevelIndentTracker(const FormatStyle &Style,
46                      const AdditionalKeywords &Keywords, unsigned StartLevel,
47                      int AdditionalIndent)
48       : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
49     for (unsigned i = 0; i != StartLevel; ++i)
50       IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
51   }
52 
53   /// Returns the indent for the current line.
54   unsigned getIndent() const { return Indent; }
55 
56   /// Update the indent state given that \p Line is going to be formatted
57   /// next.
58   void nextLine(const AnnotatedLine &Line) {
59     Offset = getIndentOffset(*Line.First);
60     // Update the indent level cache size so that we can rely on it
61     // having the right size in adjustToUnmodifiedline.
62     skipLine(Line, /*UnknownIndent=*/true);
63     if (Line.InPPDirective) {
64       unsigned IndentWidth =
65           (Style.PPIndentWidth >= 0) ? Style.PPIndentWidth : Style.IndentWidth;
66       Indent = Line.Level * IndentWidth + AdditionalIndent;
67     } else {
68       Indent = getIndent(Line.Level);
69     }
70     if (static_cast<int>(Indent) + Offset >= 0)
71       Indent += Offset;
72     if (Line.First->is(TT_CSharpGenericTypeConstraint))
73       Indent = Line.Level * Style.IndentWidth + Style.ContinuationIndentWidth;
74   }
75 
76   /// Update the indent state given that \p Line indent should be
77   /// skipped.
78   void skipLine(const AnnotatedLine &Line, bool UnknownIndent = false) {
79     if (Line.Level >= IndentForLevel.size())
80       IndentForLevel.resize(Line.Level + 1, UnknownIndent ? -1 : Indent);
81   }
82 
83   /// Update the level indent to adapt to the given \p Line.
84   ///
85   /// When a line is not formatted, we move the subsequent lines on the same
86   /// level to the same indent.
87   /// Note that \c nextLine must have been called before this method.
88   void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
89     unsigned LevelIndent = Line.First->OriginalColumn;
90     if (static_cast<int>(LevelIndent) - Offset >= 0)
91       LevelIndent -= Offset;
92     assert(Line.Level < IndentForLevel.size());
93     if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
94         !Line.InPPDirective) {
95       IndentForLevel[Line.Level] = LevelIndent;
96     }
97   }
98 
99 private:
100   /// Get the offset of the line relatively to the level.
101   ///
102   /// For example, 'public:' labels in classes are offset by 1 or 2
103   /// characters to the left from their level.
104   int getIndentOffset(const FormatToken &RootToken) {
105     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
106         Style.isCSharp()) {
107       return 0;
108     }
109 
110     auto IsAccessModifier = [this, &RootToken]() {
111       if (RootToken.isAccessSpecifier(Style.isCpp())) {
112         return true;
113       } else if (RootToken.isObjCAccessSpecifier()) {
114         return true;
115       }
116       // Handle Qt signals.
117       else if ((RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
118                 RootToken.Next && RootToken.Next->is(tok::colon))) {
119         return true;
120       } else if (RootToken.Next &&
121                  RootToken.Next->isOneOf(Keywords.kw_slots,
122                                          Keywords.kw_qslots) &&
123                  RootToken.Next->Next && RootToken.Next->Next->is(tok::colon)) {
124         return true;
125       }
126       // Handle malformed access specifier e.g. 'private' without trailing ':'.
127       else if (!RootToken.Next && RootToken.isAccessSpecifier(false)) {
128         return true;
129       }
130       return false;
131     };
132 
133     if (IsAccessModifier()) {
134       // The AccessModifierOffset may be overridden by IndentAccessModifiers,
135       // in which case we take a negative value of the IndentWidth to simulate
136       // the upper indent level.
137       return Style.IndentAccessModifiers ? -Style.IndentWidth
138                                          : Style.AccessModifierOffset;
139     }
140     return 0;
141   }
142 
143   /// Get the indent of \p Level from \p IndentForLevel.
144   ///
145   /// \p IndentForLevel must contain the indent for the level \c l
146   /// at \p IndentForLevel[l], or a value < 0 if the indent for
147   /// that level is unknown.
148   unsigned getIndent(unsigned Level) const {
149     if (IndentForLevel[Level] != -1)
150       return IndentForLevel[Level];
151     if (Level == 0)
152       return 0;
153     return getIndent(Level - 1) + Style.IndentWidth;
154   }
155 
156   const FormatStyle &Style;
157   const AdditionalKeywords &Keywords;
158   const unsigned AdditionalIndent;
159 
160   /// The indent in characters for each level.
161   SmallVector<int> IndentForLevel;
162 
163   /// Offset of the current line relative to the indent level.
164   ///
165   /// For example, the 'public' keywords is often indented with a negative
166   /// offset.
167   int Offset = 0;
168 
169   /// The current line's indent.
170   unsigned Indent = 0;
171 };
172 
173 const FormatToken *getMatchingNamespaceToken(
174     const AnnotatedLine *Line,
175     const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
176   if (!Line->startsWith(tok::r_brace))
177     return nullptr;
178   size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
179   if (StartLineIndex == UnwrappedLine::kInvalidIndex)
180     return nullptr;
181   assert(StartLineIndex < AnnotatedLines.size());
182   return AnnotatedLines[StartLineIndex]->First->getNamespaceToken();
183 }
184 
185 StringRef getNamespaceTokenText(const AnnotatedLine *Line) {
186   const FormatToken *NamespaceToken = Line->First->getNamespaceToken();
187   return NamespaceToken ? NamespaceToken->TokenText : StringRef();
188 }
189 
190 StringRef getMatchingNamespaceTokenText(
191     const AnnotatedLine *Line,
192     const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
193   const FormatToken *NamespaceToken =
194       getMatchingNamespaceToken(Line, AnnotatedLines);
195   return NamespaceToken ? NamespaceToken->TokenText : StringRef();
196 }
197 
198 class LineJoiner {
199 public:
200   LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
201              const SmallVectorImpl<AnnotatedLine *> &Lines)
202       : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()),
203         AnnotatedLines(Lines) {}
204 
205   /// Returns the next line, merging multiple lines into one if possible.
206   const AnnotatedLine *getNextMergedLine(bool DryRun,
207                                          LevelIndentTracker &IndentTracker) {
208     if (Next == End)
209       return nullptr;
210     const AnnotatedLine *Current = *Next;
211     IndentTracker.nextLine(*Current);
212     unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End);
213     if (MergedLines > 0 && Style.ColumnLimit == 0) {
214       // Disallow line merging if there is a break at the start of one of the
215       // input lines.
216       for (unsigned i = 0; i < MergedLines; ++i)
217         if (Next[i + 1]->First->NewlinesBefore > 0)
218           MergedLines = 0;
219     }
220     if (!DryRun)
221       for (unsigned i = 0; i < MergedLines; ++i)
222         join(*Next[0], *Next[i + 1]);
223     Next = Next + MergedLines + 1;
224     return Current;
225   }
226 
227 private:
228   /// Calculates how many lines can be merged into 1 starting at \p I.
229   unsigned
230   tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker,
231                            SmallVectorImpl<AnnotatedLine *>::const_iterator I,
232                            SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
233     const unsigned Indent = IndentTracker.getIndent();
234 
235     // Can't join the last line with anything.
236     if (I + 1 == E)
237       return 0;
238     // We can never merge stuff if there are trailing line comments.
239     const AnnotatedLine *TheLine = *I;
240     if (TheLine->Last->is(TT_LineComment))
241       return 0;
242     const auto &NextLine = *I[1];
243     if (NextLine.Type == LT_Invalid || NextLine.First->MustBreakBefore)
244       return 0;
245     if (TheLine->InPPDirective &&
246         (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) {
247       return 0;
248     }
249 
250     if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
251       return 0;
252 
253     unsigned Limit =
254         Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
255     // If we already exceed the column limit, we set 'Limit' to 0. The different
256     // tryMerge..() functions can then decide whether to still do merging.
257     Limit = TheLine->Last->TotalLength > Limit
258                 ? 0
259                 : Limit - TheLine->Last->TotalLength;
260 
261     if (TheLine->Last->is(TT_FunctionLBrace) &&
262         TheLine->First == TheLine->Last &&
263         !Style.BraceWrapping.SplitEmptyFunction &&
264         NextLine.First->is(tok::r_brace)) {
265       return tryMergeSimpleBlock(I, E, Limit);
266     }
267 
268     const auto *PreviousLine = I != AnnotatedLines.begin() ? I[-1] : nullptr;
269     // Handle empty record blocks where the brace has already been wrapped.
270     if (PreviousLine && TheLine->Last->is(tok::l_brace) &&
271         TheLine->First == TheLine->Last) {
272       bool EmptyBlock = NextLine.First->is(tok::r_brace);
273 
274       const FormatToken *Tok = PreviousLine->First;
275       if (Tok && Tok->is(tok::comment))
276         Tok = Tok->getNextNonComment();
277 
278       if (Tok && Tok->getNamespaceToken()) {
279         return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock
280                    ? tryMergeSimpleBlock(I, E, Limit)
281                    : 0;
282       }
283 
284       if (Tok && Tok->is(tok::kw_typedef))
285         Tok = Tok->getNextNonComment();
286       if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union,
287                               tok::kw_extern, Keywords.kw_interface)) {
288         return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock
289                    ? tryMergeSimpleBlock(I, E, Limit)
290                    : 0;
291       }
292 
293       if (Tok && Tok->is(tok::kw_template) &&
294           Style.BraceWrapping.SplitEmptyRecord && EmptyBlock) {
295         return 0;
296       }
297     }
298 
299     auto ShouldMergeShortFunctions = [this, &I, &NextLine, PreviousLine,
300                                       TheLine]() {
301       if (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All)
302         return true;
303       if (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
304           NextLine.First->is(tok::r_brace)) {
305         return true;
306       }
307 
308       if (Style.AllowShortFunctionsOnASingleLine &
309           FormatStyle::SFS_InlineOnly) {
310         // Just checking TheLine->Level != 0 is not enough, because it
311         // provokes treating functions inside indented namespaces as short.
312         if (Style.isJavaScript() && TheLine->Last->is(TT_FunctionLBrace))
313           return true;
314 
315         if (TheLine->Level != 0) {
316           if (!PreviousLine)
317             return false;
318 
319           // TODO: Use IndentTracker to avoid loop?
320           // Find the last line with lower level.
321           const AnnotatedLine *Line = nullptr;
322           for (auto J = I - 1; J >= AnnotatedLines.begin(); --J) {
323             assert(*J);
324             if (!(*J)->InPPDirective && !(*J)->isComment() &&
325                 (*J)->Level < TheLine->Level) {
326               Line = *J;
327               break;
328             }
329           }
330 
331           if (!Line)
332             return false;
333 
334           // Check if the found line starts a record.
335           const FormatToken *LastNonComment = Line->Last;
336           assert(LastNonComment);
337           if (LastNonComment->is(tok::comment)) {
338             LastNonComment = LastNonComment->getPreviousNonComment();
339             // There must be another token (usually `{`), because we chose a
340             // non-PPDirective and non-comment line that has a smaller level.
341             assert(LastNonComment);
342           }
343           return isRecordLBrace(*LastNonComment);
344         }
345       }
346 
347       return false;
348     };
349 
350     bool MergeShortFunctions = ShouldMergeShortFunctions();
351 
352     const FormatToken *FirstNonComment = TheLine->First;
353     if (FirstNonComment->is(tok::comment)) {
354       FirstNonComment = FirstNonComment->getNextNonComment();
355       if (!FirstNonComment)
356         return 0;
357     }
358     // FIXME: There are probably cases where we should use FirstNonComment
359     // instead of TheLine->First.
360 
361     if (Style.CompactNamespaces) {
362       if (auto nsToken = TheLine->First->getNamespaceToken()) {
363         int i = 0;
364         unsigned closingLine = TheLine->MatchingClosingBlockLineIndex - 1;
365         for (; I + 1 + i != E &&
366                nsToken->TokenText == getNamespaceTokenText(I[i + 1]) &&
367                closingLine == I[i + 1]->MatchingClosingBlockLineIndex &&
368                I[i + 1]->Last->TotalLength < Limit;
369              i++, --closingLine) {
370           // No extra indent for compacted namespaces.
371           IndentTracker.skipLine(*I[i + 1]);
372 
373           Limit -= I[i + 1]->Last->TotalLength;
374         }
375         return i;
376       }
377 
378       if (auto nsToken = getMatchingNamespaceToken(TheLine, AnnotatedLines)) {
379         int i = 0;
380         unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
381         for (; I + 1 + i != E &&
382                nsToken->TokenText ==
383                    getMatchingNamespaceTokenText(I[i + 1], AnnotatedLines) &&
384                openingLine == I[i + 1]->MatchingOpeningBlockLineIndex;
385              i++, --openingLine) {
386           // No space between consecutive braces.
387           I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace);
388 
389           // Indent like the outer-most namespace.
390           IndentTracker.nextLine(*I[i + 1]);
391         }
392         return i;
393       }
394     }
395 
396     // Try to merge a function block with left brace unwrapped.
397     if (TheLine->Last->is(TT_FunctionLBrace) && TheLine->First != TheLine->Last)
398       return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
399     // Try to merge a control statement block with left brace unwrapped.
400     if (TheLine->Last->is(tok::l_brace) && FirstNonComment != TheLine->Last &&
401         FirstNonComment->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for,
402                                  TT_ForEachMacro)) {
403       return Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never
404                  ? tryMergeSimpleBlock(I, E, Limit)
405                  : 0;
406     }
407     // Try to merge a control statement block with left brace wrapped.
408     if (NextLine.First->is(tok::l_brace)) {
409       if ((TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
410                                    tok::kw_for, tok::kw_switch, tok::kw_try,
411                                    tok::kw_do, TT_ForEachMacro) ||
412            (TheLine->First->is(tok::r_brace) && TheLine->First->Next &&
413             TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) &&
414           Style.BraceWrapping.AfterControlStatement ==
415               FormatStyle::BWACS_MultiLine) {
416         // If possible, merge the next line's wrapped left brace with the
417         // current line. Otherwise, leave it on the next line, as this is a
418         // multi-line control statement.
419         return (Style.ColumnLimit == 0 || TheLine->Level * Style.IndentWidth +
420                                                   TheLine->Last->TotalLength <=
421                                               Style.ColumnLimit)
422                    ? 1
423                    : 0;
424       }
425       if (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
426                                   tok::kw_for, TT_ForEachMacro)) {
427         return (Style.BraceWrapping.AfterControlStatement ==
428                 FormatStyle::BWACS_Always)
429                    ? tryMergeSimpleBlock(I, E, Limit)
430                    : 0;
431       }
432       if (TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) &&
433           Style.BraceWrapping.AfterControlStatement ==
434               FormatStyle::BWACS_MultiLine) {
435         // This case if different from the upper BWACS_MultiLine processing
436         // in that a preceding r_brace is not on the same line as else/catch
437         // most likely because of BeforeElse/BeforeCatch set to true.
438         // If the line length doesn't fit ColumnLimit, leave l_brace on the
439         // next line to respect the BWACS_MultiLine.
440         return (Style.ColumnLimit == 0 ||
441                 TheLine->Last->TotalLength <= Style.ColumnLimit)
442                    ? 1
443                    : 0;
444       }
445     }
446     if (PreviousLine && TheLine->First->is(tok::l_brace)) {
447       switch (PreviousLine->First->Tok.getKind()) {
448       case tok::at:
449         // Don't merge block with left brace wrapped after ObjC special blocks.
450         if (PreviousLine->First->Next) {
451           tok::ObjCKeywordKind kwId =
452               PreviousLine->First->Next->Tok.getObjCKeywordID();
453           if (kwId == tok::objc_autoreleasepool ||
454               kwId == tok::objc_synchronized) {
455             return 0;
456           }
457         }
458         break;
459 
460       case tok::kw_case:
461       case tok::kw_default:
462         // Don't merge block with left brace wrapped after case labels.
463         return 0;
464 
465       default:
466         break;
467       }
468     }
469 
470     // Don't merge an empty template class or struct if SplitEmptyRecords
471     // is defined.
472     if (PreviousLine && Style.BraceWrapping.SplitEmptyRecord &&
473         TheLine->Last->is(tok::l_brace) && PreviousLine->Last) {
474       const FormatToken *Previous = PreviousLine->Last;
475       if (Previous) {
476         if (Previous->is(tok::comment))
477           Previous = Previous->getPreviousNonComment();
478         if (Previous) {
479           if (Previous->is(tok::greater) && !PreviousLine->InPPDirective)
480             return 0;
481           if (Previous->is(tok::identifier)) {
482             const FormatToken *PreviousPrevious =
483                 Previous->getPreviousNonComment();
484             if (PreviousPrevious &&
485                 PreviousPrevious->isOneOf(tok::kw_class, tok::kw_struct)) {
486               return 0;
487             }
488           }
489         }
490       }
491     }
492 
493     if (TheLine->Last->is(tok::l_brace)) {
494       bool ShouldMerge = false;
495       // Try to merge records.
496       if (TheLine->Last->is(TT_EnumLBrace)) {
497         ShouldMerge = Style.AllowShortEnumsOnASingleLine;
498       } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace)) {
499         // NOTE: We use AfterClass (whereas AfterStruct exists) for both classes
500         // and structs, but it seems that wrapping is still handled correctly
501         // elsewhere.
502         ShouldMerge = !Style.BraceWrapping.AfterClass ||
503                       (NextLine.First->is(tok::r_brace) &&
504                        !Style.BraceWrapping.SplitEmptyRecord);
505       } else {
506         // Try to merge a block with left brace unwrapped that wasn't yet
507         // covered.
508         assert(TheLine->InPPDirective ||
509                !TheLine->First->isOneOf(tok::kw_class, tok::kw_enum,
510                                         tok::kw_struct));
511         ShouldMerge = !Style.BraceWrapping.AfterFunction ||
512                       (NextLine.First->is(tok::r_brace) &&
513                        !Style.BraceWrapping.SplitEmptyFunction);
514       }
515       return ShouldMerge ? tryMergeSimpleBlock(I, E, Limit) : 0;
516     }
517 
518     // Try to merge a function block with left brace wrapped.
519     if (NextLine.First->is(TT_FunctionLBrace) &&
520         Style.BraceWrapping.AfterFunction) {
521       if (NextLine.Last->is(TT_LineComment))
522         return 0;
523 
524       // Check for Limit <= 2 to account for the " {".
525       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
526         return 0;
527       Limit -= 2;
528 
529       unsigned MergedLines = 0;
530       if (MergeShortFunctions ||
531           (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
532            NextLine.First == NextLine.Last && I + 2 != E &&
533            I[2]->First->is(tok::r_brace))) {
534         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
535         // If we managed to merge the block, count the function header, which is
536         // on a separate line.
537         if (MergedLines > 0)
538           ++MergedLines;
539       }
540       return MergedLines;
541     }
542     auto IsElseLine = [&TheLine]() -> bool {
543       const FormatToken *First = TheLine->First;
544       if (First->is(tok::kw_else))
545         return true;
546 
547       return First->is(tok::r_brace) && First->Next &&
548              First->Next->is(tok::kw_else);
549     };
550     if (TheLine->First->is(tok::kw_if) ||
551         (IsElseLine() && (Style.AllowShortIfStatementsOnASingleLine ==
552                           FormatStyle::SIS_AllIfsAndElse))) {
553       return Style.AllowShortIfStatementsOnASingleLine
554                  ? tryMergeSimpleControlStatement(I, E, Limit)
555                  : 0;
556     }
557     if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while, tok::kw_do,
558                                 TT_ForEachMacro)) {
559       return Style.AllowShortLoopsOnASingleLine
560                  ? tryMergeSimpleControlStatement(I, E, Limit)
561                  : 0;
562     }
563     if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
564       return Style.AllowShortCaseLabelsOnASingleLine
565                  ? tryMergeShortCaseLabels(I, E, Limit)
566                  : 0;
567     }
568     if (TheLine->InPPDirective &&
569         (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
570       return tryMergeSimplePPDirective(I, E, Limit);
571     }
572     return 0;
573   }
574 
575   unsigned
576   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
577                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
578                             unsigned Limit) {
579     if (Limit == 0)
580       return 0;
581     if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
582       return 0;
583     if (1 + I[1]->Last->TotalLength > Limit)
584       return 0;
585     return 1;
586   }
587 
588   unsigned tryMergeSimpleControlStatement(
589       SmallVectorImpl<AnnotatedLine *>::const_iterator I,
590       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
591     if (Limit == 0)
592       return 0;
593     if (Style.BraceWrapping.AfterControlStatement ==
594             FormatStyle::BWACS_Always &&
595         I[1]->First->is(tok::l_brace) &&
596         Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
597       return 0;
598     }
599     if (I[1]->InPPDirective != (*I)->InPPDirective ||
600         (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) {
601       return 0;
602     }
603     Limit = limitConsideringMacros(I + 1, E, Limit);
604     AnnotatedLine &Line = **I;
605     if (!Line.First->is(tok::kw_do) && !Line.First->is(tok::kw_else) &&
606         !Line.Last->is(tok::kw_else) && Line.Last->isNot(tok::r_paren)) {
607       return 0;
608     }
609     // Only merge `do while` if `do` is the only statement on the line.
610     if (Line.First->is(tok::kw_do) && !Line.Last->is(tok::kw_do))
611       return 0;
612     if (1 + I[1]->Last->TotalLength > Limit)
613       return 0;
614     // Don't merge with loops, ifs, a single semicolon or a line comment.
615     if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
616                              TT_ForEachMacro, TT_LineComment)) {
617       return 0;
618     }
619     // Only inline simple if's (no nested if or else), unless specified
620     if (Style.AllowShortIfStatementsOnASingleLine ==
621         FormatStyle::SIS_WithoutElse) {
622       if (I + 2 != E && Line.startsWith(tok::kw_if) &&
623           I[2]->First->is(tok::kw_else)) {
624         return 0;
625       }
626     }
627     return 1;
628   }
629 
630   unsigned
631   tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
632                           SmallVectorImpl<AnnotatedLine *>::const_iterator E,
633                           unsigned Limit) {
634     if (Limit == 0 || I + 1 == E ||
635         I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) {
636       return 0;
637     }
638     if (I[0]->Last->is(tok::l_brace) || I[1]->First->is(tok::l_brace))
639       return 0;
640     unsigned NumStmts = 0;
641     unsigned Length = 0;
642     bool EndsWithComment = false;
643     bool InPPDirective = I[0]->InPPDirective;
644     const unsigned Level = I[0]->Level;
645     for (; NumStmts < 3; ++NumStmts) {
646       if (I + 1 + NumStmts == E)
647         break;
648       const AnnotatedLine *Line = I[1 + NumStmts];
649       if (Line->InPPDirective != InPPDirective)
650         break;
651       if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
652         break;
653       if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
654                                tok::kw_while) ||
655           EndsWithComment) {
656         return 0;
657       }
658       if (Line->First->is(tok::comment)) {
659         if (Level != Line->Level)
660           return 0;
661         SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts;
662         for (; J != E; ++J) {
663           Line = *J;
664           if (Line->InPPDirective != InPPDirective)
665             break;
666           if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
667             break;
668           if (Line->First->isNot(tok::comment) || Level != Line->Level)
669             return 0;
670         }
671         break;
672       }
673       if (Line->Last->is(tok::comment))
674         EndsWithComment = true;
675       Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
676     }
677     if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
678       return 0;
679     return NumStmts;
680   }
681 
682   unsigned
683   tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
684                       SmallVectorImpl<AnnotatedLine *>::const_iterator E,
685                       unsigned Limit) {
686     // Don't merge with a preprocessor directive.
687     if (I[1]->Type == LT_PreprocessorDirective)
688       return 0;
689 
690     AnnotatedLine &Line = **I;
691 
692     // Don't merge ObjC @ keywords and methods.
693     // FIXME: If an option to allow short exception handling clauses on a single
694     // line is added, change this to not return for @try and friends.
695     if (Style.Language != FormatStyle::LK_Java &&
696         Line.First->isOneOf(tok::at, tok::minus, tok::plus)) {
697       return 0;
698     }
699 
700     // Check that the current line allows merging. This depends on whether we
701     // are in a control flow statements as well as several style flags.
702     if (Line.First->is(tok::kw_case) ||
703         (Line.First->Next && Line.First->Next->is(tok::kw_else))) {
704       return 0;
705     }
706     // default: in switch statement
707     if (Line.First->is(tok::kw_default)) {
708       const FormatToken *Tok = Line.First->getNextNonComment();
709       if (Tok && Tok->is(tok::colon))
710         return 0;
711     }
712     if (Line.First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, tok::kw_do,
713                             tok::kw_try, tok::kw___try, tok::kw_catch,
714                             tok::kw___finally, tok::kw_for, TT_ForEachMacro,
715                             tok::r_brace, Keywords.kw___except)) {
716       if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
717         return 0;
718       if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Empty &&
719           !I[1]->First->is(tok::r_brace)) {
720         return 0;
721       }
722       // Don't merge when we can't except the case when
723       // the control statement block is empty
724       if (!Style.AllowShortIfStatementsOnASingleLine &&
725           Line.First->isOneOf(tok::kw_if, tok::kw_else) &&
726           !Style.BraceWrapping.AfterControlStatement &&
727           !I[1]->First->is(tok::r_brace)) {
728         return 0;
729       }
730       if (!Style.AllowShortIfStatementsOnASingleLine &&
731           Line.First->isOneOf(tok::kw_if, tok::kw_else) &&
732           Style.BraceWrapping.AfterControlStatement ==
733               FormatStyle::BWACS_Always &&
734           I + 2 != E && !I[2]->First->is(tok::r_brace)) {
735         return 0;
736       }
737       if (!Style.AllowShortLoopsOnASingleLine &&
738           Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for,
739                               TT_ForEachMacro) &&
740           !Style.BraceWrapping.AfterControlStatement &&
741           !I[1]->First->is(tok::r_brace)) {
742         return 0;
743       }
744       if (!Style.AllowShortLoopsOnASingleLine &&
745           Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for,
746                               TT_ForEachMacro) &&
747           Style.BraceWrapping.AfterControlStatement ==
748               FormatStyle::BWACS_Always &&
749           I + 2 != E && !I[2]->First->is(tok::r_brace)) {
750         return 0;
751       }
752       // FIXME: Consider an option to allow short exception handling clauses on
753       // a single line.
754       // FIXME: This isn't covered by tests.
755       // FIXME: For catch, __except, __finally the first token on the line
756       // is '}', so this isn't correct here.
757       if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
758                               Keywords.kw___except, tok::kw___finally)) {
759         return 0;
760       }
761     }
762 
763     if (Line.Last->is(tok::l_brace)) {
764       FormatToken *Tok = I[1]->First;
765       auto ShouldMerge = [Tok]() {
766         if (Tok->isNot(tok::r_brace) || Tok->MustBreakBefore)
767           return false;
768         const FormatToken *Next = Tok->getNextNonComment();
769         return !Next || Next->is(tok::semi);
770       };
771 
772       if (ShouldMerge()) {
773         // We merge empty blocks even if the line exceeds the column limit.
774         Tok->SpacesRequiredBefore = Style.SpaceInEmptyBlock ? 1 : 0;
775         Tok->CanBreakBefore = true;
776         return 1;
777       } else if (Limit != 0 && !Line.startsWithNamespace() &&
778                  !startsExternCBlock(Line)) {
779         // We don't merge short records.
780         if (isRecordLBrace(*Line.Last))
781           return 0;
782 
783         // Check that we still have three lines and they fit into the limit.
784         if (I + 2 == E || I[2]->Type == LT_Invalid)
785           return 0;
786         Limit = limitConsideringMacros(I + 2, E, Limit);
787 
788         if (!nextTwoLinesFitInto(I, Limit))
789           return 0;
790 
791         // Second, check that the next line does not contain any braces - if it
792         // does, readability declines when putting it into a single line.
793         if (I[1]->Last->is(TT_LineComment))
794           return 0;
795         do {
796           if (Tok->is(tok::l_brace) && Tok->isNot(BK_BracedInit))
797             return 0;
798           Tok = Tok->Next;
799         } while (Tok);
800 
801         // Last, check that the third line starts with a closing brace.
802         Tok = I[2]->First;
803         if (Tok->isNot(tok::r_brace))
804           return 0;
805 
806         // Don't merge "if (a) { .. } else {".
807         if (Tok->Next && Tok->Next->is(tok::kw_else))
808           return 0;
809 
810         // Don't merge a trailing multi-line control statement block like:
811         // } else if (foo &&
812         //            bar)
813         // { <-- current Line
814         //   baz();
815         // }
816         if (Line.First == Line.Last && Line.First->isNot(TT_FunctionLBrace) &&
817             Style.BraceWrapping.AfterControlStatement ==
818                 FormatStyle::BWACS_MultiLine) {
819           return 0;
820         }
821 
822         return 2;
823       }
824     } else if (I[1]->First->is(tok::l_brace)) {
825       if (I[1]->Last->is(TT_LineComment))
826         return 0;
827 
828       // Check for Limit <= 2 to account for the " {".
829       if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I)))
830         return 0;
831       Limit -= 2;
832       unsigned MergedLines = 0;
833       if (Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never ||
834           (I[1]->First == I[1]->Last && I + 2 != E &&
835            I[2]->First->is(tok::r_brace))) {
836         MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
837         // If we managed to merge the block, count the statement header, which
838         // is on a separate line.
839         if (MergedLines > 0)
840           ++MergedLines;
841       }
842       return MergedLines;
843     }
844     return 0;
845   }
846 
847   /// Returns the modified column limit for \p I if it is inside a macro and
848   /// needs a trailing '\'.
849   unsigned
850   limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
851                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
852                          unsigned Limit) {
853     if (I[0]->InPPDirective && I + 1 != E &&
854         !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
855       return Limit < 2 ? 0 : Limit - 2;
856     }
857     return Limit;
858   }
859 
860   bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
861                            unsigned Limit) {
862     if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
863       return false;
864     return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
865   }
866 
867   bool containsMustBreak(const AnnotatedLine *Line) {
868     for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next)
869       if (Tok->MustBreakBefore)
870         return true;
871     return false;
872   }
873 
874   void join(AnnotatedLine &A, const AnnotatedLine &B) {
875     assert(!A.Last->Next);
876     assert(!B.First->Previous);
877     if (B.Affected)
878       A.Affected = true;
879     A.Last->Next = B.First;
880     B.First->Previous = A.Last;
881     B.First->CanBreakBefore = true;
882     unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
883     for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
884       Tok->TotalLength += LengthA;
885       A.Last = Tok;
886     }
887   }
888 
889   const FormatStyle &Style;
890   const AdditionalKeywords &Keywords;
891   const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
892 
893   SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
894   const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines;
895 };
896 
897 static void markFinalized(FormatToken *Tok) {
898   for (; Tok; Tok = Tok->Next) {
899     Tok->Finalized = true;
900     for (AnnotatedLine *Child : Tok->Children)
901       markFinalized(Child->First);
902   }
903 }
904 
905 #ifndef NDEBUG
906 static void printLineState(const LineState &State) {
907   llvm::dbgs() << "State: ";
908   for (const ParenState &P : State.Stack) {
909     llvm::dbgs() << (P.Tok ? P.Tok->TokenText : "F") << "|" << P.Indent << "|"
910                  << P.LastSpace << "|" << P.NestedBlockIndent << " ";
911   }
912   llvm::dbgs() << State.NextToken->TokenText << "\n";
913 }
914 #endif
915 
916 /// Base class for classes that format one \c AnnotatedLine.
917 class LineFormatter {
918 public:
919   LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
920                 const FormatStyle &Style,
921                 UnwrappedLineFormatter *BlockFormatter)
922       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
923         BlockFormatter(BlockFormatter) {}
924   virtual ~LineFormatter() {}
925 
926   /// Formats an \c AnnotatedLine and returns the penalty.
927   ///
928   /// If \p DryRun is \c false, directly applies the changes.
929   virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
930                               unsigned FirstStartColumn, bool DryRun) = 0;
931 
932 protected:
933   /// If the \p State's next token is an r_brace closing a nested block,
934   /// format the nested block before it.
935   ///
936   /// Returns \c true if all children could be placed successfully and adapts
937   /// \p Penalty as well as \p State. If \p DryRun is false, also directly
938   /// creates changes using \c Whitespaces.
939   ///
940   /// The crucial idea here is that children always get formatted upon
941   /// encountering the closing brace right after the nested block. Now, if we
942   /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
943   /// \c false), the entire block has to be kept on the same line (which is only
944   /// possible if it fits on the line, only contains a single statement, etc.
945   ///
946   /// If \p NewLine is true, we format the nested block on separate lines, i.e.
947   /// break after the "{", format all lines with correct indentation and the put
948   /// the closing "}" on yet another new line.
949   ///
950   /// This enables us to keep the simple structure of the
951   /// \c UnwrappedLineFormatter, where we only have two options for each token:
952   /// break or don't break.
953   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
954                       unsigned &Penalty) {
955     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
956     FormatToken &Previous = *State.NextToken->Previous;
957     if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
958         Previous.Children.size() == 0) {
959       // The previous token does not open a block. Nothing to do. We don't
960       // assert so that we can simply call this function for all tokens.
961       return true;
962     }
963 
964     if (NewLine) {
965       const ParenState &P = State.Stack.back();
966 
967       int AdditionalIndent =
968           P.Indent - Previous.Children[0]->Level * Style.IndentWidth;
969 
970       if (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope &&
971           P.NestedBlockIndent == P.LastSpace) {
972         if (State.NextToken->MatchingParen &&
973             State.NextToken->MatchingParen->is(TT_LambdaLBrace)) {
974           State.Stack.pop_back();
975         }
976         if (LBrace->is(TT_LambdaLBrace))
977           AdditionalIndent = 0;
978       }
979 
980       Penalty +=
981           BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
982                                  /*FixBadIndentation=*/true);
983       return true;
984     }
985 
986     if (Previous.Children[0]->First->MustBreakBefore)
987       return false;
988 
989     // Cannot merge into one line if this line ends on a comment.
990     if (Previous.is(tok::comment))
991       return false;
992 
993     // Cannot merge multiple statements into a single line.
994     if (Previous.Children.size() > 1)
995       return false;
996 
997     const AnnotatedLine *Child = Previous.Children[0];
998     // We can't put the closing "}" on a line with a trailing comment.
999     if (Child->Last->isTrailingComment())
1000       return false;
1001 
1002     // If the child line exceeds the column limit, we wouldn't want to merge it.
1003     // We add +2 for the trailing " }".
1004     if (Style.ColumnLimit > 0 &&
1005         Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) {
1006       return false;
1007     }
1008 
1009     if (!DryRun) {
1010       Whitespaces->replaceWhitespace(
1011           *Child->First, /*Newlines=*/0, /*Spaces=*/1,
1012           /*StartOfTokenColumn=*/State.Column, /*IsAligned=*/false,
1013           State.Line->InPPDirective);
1014     }
1015     Penalty +=
1016         formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun);
1017 
1018     State.Column += 1 + Child->Last->TotalLength;
1019     return true;
1020   }
1021 
1022   ContinuationIndenter *Indenter;
1023 
1024 private:
1025   WhitespaceManager *Whitespaces;
1026   const FormatStyle &Style;
1027   UnwrappedLineFormatter *BlockFormatter;
1028 };
1029 
1030 /// Formatter that keeps the existing line breaks.
1031 class NoColumnLimitLineFormatter : public LineFormatter {
1032 public:
1033   NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
1034                              WhitespaceManager *Whitespaces,
1035                              const FormatStyle &Style,
1036                              UnwrappedLineFormatter *BlockFormatter)
1037       : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
1038 
1039   /// Formats the line, simply keeping all of the input's line breaking
1040   /// decisions.
1041   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
1042                       unsigned FirstStartColumn, bool DryRun) override {
1043     assert(!DryRun);
1044     LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn,
1045                                                 &Line, /*DryRun=*/false);
1046     while (State.NextToken) {
1047       bool Newline =
1048           Indenter->mustBreak(State) ||
1049           (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
1050       unsigned Penalty = 0;
1051       formatChildren(State, Newline, /*DryRun=*/false, Penalty);
1052       Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
1053     }
1054     return 0;
1055   }
1056 };
1057 
1058 /// Formatter that puts all tokens into a single line without breaks.
1059 class NoLineBreakFormatter : public LineFormatter {
1060 public:
1061   NoLineBreakFormatter(ContinuationIndenter *Indenter,
1062                        WhitespaceManager *Whitespaces, const FormatStyle &Style,
1063                        UnwrappedLineFormatter *BlockFormatter)
1064       : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
1065 
1066   /// Puts all tokens into a single line.
1067   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
1068                       unsigned FirstStartColumn, bool DryRun) override {
1069     unsigned Penalty = 0;
1070     LineState State =
1071         Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
1072     while (State.NextToken) {
1073       formatChildren(State, /*NewLine=*/false, DryRun, Penalty);
1074       Indenter->addTokenToState(
1075           State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
1076     }
1077     return Penalty;
1078   }
1079 };
1080 
1081 /// Finds the best way to break lines.
1082 class OptimizingLineFormatter : public LineFormatter {
1083 public:
1084   OptimizingLineFormatter(ContinuationIndenter *Indenter,
1085                           WhitespaceManager *Whitespaces,
1086                           const FormatStyle &Style,
1087                           UnwrappedLineFormatter *BlockFormatter)
1088       : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
1089 
1090   /// Formats the line by finding the best line breaks with line lengths
1091   /// below the column limit.
1092   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
1093                       unsigned FirstStartColumn, bool DryRun) override {
1094     LineState State =
1095         Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
1096 
1097     // If the ObjC method declaration does not fit on a line, we should format
1098     // it with one arg per line.
1099     if (State.Line->Type == LT_ObjCMethodDecl)
1100       State.Stack.back().BreakBeforeParameter = true;
1101 
1102     // Find best solution in solution space.
1103     return analyzeSolutionSpace(State, DryRun);
1104   }
1105 
1106 private:
1107   struct CompareLineStatePointers {
1108     bool operator()(LineState *obj1, LineState *obj2) const {
1109       return *obj1 < *obj2;
1110     }
1111   };
1112 
1113   /// A pair of <penalty, count> that is used to prioritize the BFS on.
1114   ///
1115   /// In case of equal penalties, we want to prefer states that were inserted
1116   /// first. During state generation we make sure that we insert states first
1117   /// that break the line as late as possible.
1118   typedef std::pair<unsigned, unsigned> OrderedPenalty;
1119 
1120   /// An edge in the solution space from \c Previous->State to \c State,
1121   /// inserting a newline dependent on the \c NewLine.
1122   struct StateNode {
1123     StateNode(const LineState &State, bool NewLine, StateNode *Previous)
1124         : State(State), NewLine(NewLine), Previous(Previous) {}
1125     LineState State;
1126     bool NewLine;
1127     StateNode *Previous;
1128   };
1129 
1130   /// An item in the prioritized BFS search queue. The \c StateNode's
1131   /// \c State has the given \c OrderedPenalty.
1132   typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
1133 
1134   /// The BFS queue type.
1135   typedef std::priority_queue<QueueItem, SmallVector<QueueItem>,
1136                               std::greater<QueueItem>>
1137       QueueType;
1138 
1139   /// Analyze the entire solution space starting from \p InitialState.
1140   ///
1141   /// This implements a variant of Dijkstra's algorithm on the graph that spans
1142   /// the solution space (\c LineStates are the nodes). The algorithm tries to
1143   /// find the shortest path (the one with lowest penalty) from \p InitialState
1144   /// to a state where all tokens are placed. Returns the penalty.
1145   ///
1146   /// If \p DryRun is \c false, directly applies the changes.
1147   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
1148     std::set<LineState *, CompareLineStatePointers> Seen;
1149 
1150     // Increasing count of \c StateNode items we have created. This is used to
1151     // create a deterministic order independent of the container.
1152     unsigned Count = 0;
1153     QueueType Queue;
1154 
1155     // Insert start element into queue.
1156     StateNode *RootNode =
1157         new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
1158     Queue.push(QueueItem(OrderedPenalty(0, Count), RootNode));
1159     ++Count;
1160 
1161     unsigned Penalty = 0;
1162 
1163     // While not empty, take first element and follow edges.
1164     while (!Queue.empty()) {
1165       // Quit if we still haven't found a solution by now.
1166       if (Count > 25000000)
1167         return 0;
1168 
1169       Penalty = Queue.top().first.first;
1170       StateNode *Node = Queue.top().second;
1171       if (!Node->State.NextToken) {
1172         LLVM_DEBUG(llvm::dbgs()
1173                    << "\n---\nPenalty for line: " << Penalty << "\n");
1174         break;
1175       }
1176       Queue.pop();
1177 
1178       // Cut off the analysis of certain solutions if the analysis gets too
1179       // complex. See description of IgnoreStackForComparison.
1180       if (Count > 50000)
1181         Node->State.IgnoreStackForComparison = true;
1182 
1183       if (!Seen.insert(&Node->State).second) {
1184         // State already examined with lower penalty.
1185         continue;
1186       }
1187 
1188       FormatDecision LastFormat = Node->State.NextToken->getDecision();
1189       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
1190         addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
1191       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
1192         addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
1193     }
1194 
1195     if (Queue.empty()) {
1196       // We were unable to find a solution, do nothing.
1197       // FIXME: Add diagnostic?
1198       LLVM_DEBUG(llvm::dbgs() << "Could not find a solution.\n");
1199       return 0;
1200     }
1201 
1202     // Reconstruct the solution.
1203     if (!DryRun)
1204       reconstructPath(InitialState, Queue.top().second);
1205 
1206     LLVM_DEBUG(llvm::dbgs()
1207                << "Total number of analyzed states: " << Count << "\n");
1208     LLVM_DEBUG(llvm::dbgs() << "---\n");
1209 
1210     return Penalty;
1211   }
1212 
1213   /// Add the following state to the analysis queue \c Queue.
1214   ///
1215   /// Assume the current state is \p PreviousNode and has been reached with a
1216   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
1217   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1218                            bool NewLine, unsigned *Count, QueueType *Queue) {
1219     if (NewLine && !Indenter->canBreak(PreviousNode->State))
1220       return;
1221     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1222       return;
1223 
1224     StateNode *Node = new (Allocator.Allocate())
1225         StateNode(PreviousNode->State, NewLine, PreviousNode);
1226     if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1227       return;
1228 
1229     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1230 
1231     Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1232     ++(*Count);
1233   }
1234 
1235   /// Applies the best formatting by reconstructing the path in the
1236   /// solution space that leads to \c Best.
1237   void reconstructPath(LineState &State, StateNode *Best) {
1238     llvm::SmallVector<StateNode *> Path;
1239     // We do not need a break before the initial token.
1240     while (Best->Previous) {
1241       Path.push_back(Best);
1242       Best = Best->Previous;
1243     }
1244     for (const auto &Node : llvm::reverse(Path)) {
1245       unsigned Penalty = 0;
1246       formatChildren(State, Node->NewLine, /*DryRun=*/false, Penalty);
1247       Penalty += Indenter->addTokenToState(State, Node->NewLine, false);
1248 
1249       LLVM_DEBUG({
1250         printLineState(Node->Previous->State);
1251         if (Node->NewLine) {
1252           llvm::dbgs() << "Penalty for placing "
1253                        << Node->Previous->State.NextToken->Tok.getName()
1254                        << " on a new line: " << Penalty << "\n";
1255         }
1256       });
1257     }
1258   }
1259 
1260   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1261 };
1262 
1263 } // anonymous namespace
1264 
1265 unsigned UnwrappedLineFormatter::format(
1266     const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
1267     int AdditionalIndent, bool FixBadIndentation, unsigned FirstStartColumn,
1268     unsigned NextStartColumn, unsigned LastStartColumn) {
1269   LineJoiner Joiner(Style, Keywords, Lines);
1270 
1271   // Try to look up already computed penalty in DryRun-mode.
1272   std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
1273       &Lines, AdditionalIndent);
1274   auto CacheIt = PenaltyCache.find(CacheKey);
1275   if (DryRun && CacheIt != PenaltyCache.end())
1276     return CacheIt->second;
1277 
1278   assert(!Lines.empty());
1279   unsigned Penalty = 0;
1280   LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
1281                                    AdditionalIndent);
1282   const AnnotatedLine *PrevPrevLine = nullptr;
1283   const AnnotatedLine *PreviousLine = nullptr;
1284   const AnnotatedLine *NextLine = nullptr;
1285 
1286   // The minimum level of consecutive lines that have been formatted.
1287   unsigned RangeMinLevel = UINT_MAX;
1288 
1289   bool FirstLine = true;
1290   for (const AnnotatedLine *Line =
1291            Joiner.getNextMergedLine(DryRun, IndentTracker);
1292        Line; PrevPrevLine = PreviousLine, PreviousLine = Line, Line = NextLine,
1293                            FirstLine = false) {
1294     assert(Line->First);
1295     const AnnotatedLine &TheLine = *Line;
1296     unsigned Indent = IndentTracker.getIndent();
1297 
1298     // We continue formatting unchanged lines to adjust their indent, e.g. if a
1299     // scope was added. However, we need to carefully stop doing this when we
1300     // exit the scope of affected lines to prevent indenting a the entire
1301     // remaining file if it currently missing a closing brace.
1302     bool PreviousRBrace =
1303         PreviousLine && PreviousLine->startsWith(tok::r_brace);
1304     bool ContinueFormatting =
1305         TheLine.Level > RangeMinLevel ||
1306         (TheLine.Level == RangeMinLevel && !PreviousRBrace &&
1307          !TheLine.startsWith(tok::r_brace));
1308 
1309     bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
1310                           Indent != TheLine.First->OriginalColumn;
1311     bool ShouldFormat = TheLine.Affected || FixIndentation;
1312     // We cannot format this line; if the reason is that the line had a
1313     // parsing error, remember that.
1314     if (ShouldFormat && TheLine.Type == LT_Invalid && Status) {
1315       Status->FormatComplete = false;
1316       Status->Line =
1317           SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation());
1318     }
1319 
1320     if (ShouldFormat && TheLine.Type != LT_Invalid) {
1321       if (!DryRun) {
1322         bool LastLine = TheLine.First->is(tok::eof);
1323         formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent,
1324                          LastLine ? LastStartColumn : NextStartColumn + Indent);
1325       }
1326 
1327       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
1328       unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
1329       bool FitsIntoOneLine =
1330           TheLine.Last->TotalLength + Indent <= ColumnLimit ||
1331           (TheLine.Type == LT_ImportStatement &&
1332            (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
1333           (Style.isCSharp() &&
1334            TheLine.InPPDirective); // don't split #regions in C#
1335       if (Style.ColumnLimit == 0) {
1336         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
1337             .formatLine(TheLine, NextStartColumn + Indent,
1338                         FirstLine ? FirstStartColumn : 0, DryRun);
1339       } else if (FitsIntoOneLine) {
1340         Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
1341                        .formatLine(TheLine, NextStartColumn + Indent,
1342                                    FirstLine ? FirstStartColumn : 0, DryRun);
1343       } else {
1344         Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
1345                        .formatLine(TheLine, NextStartColumn + Indent,
1346                                    FirstLine ? FirstStartColumn : 0, DryRun);
1347       }
1348       RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
1349     } else {
1350       // If no token in the current line is affected, we still need to format
1351       // affected children.
1352       if (TheLine.ChildrenAffected) {
1353         for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
1354           if (!Tok->Children.empty())
1355             format(Tok->Children, DryRun);
1356       }
1357 
1358       // Adapt following lines on the current indent level to the same level
1359       // unless the current \c AnnotatedLine is not at the beginning of a line.
1360       bool StartsNewLine =
1361           TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
1362       if (StartsNewLine)
1363         IndentTracker.adjustToUnmodifiedLine(TheLine);
1364       if (!DryRun) {
1365         bool ReformatLeadingWhitespace =
1366             StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
1367                               TheLine.LeadingEmptyLinesAffected);
1368         // Format the first token.
1369         if (ReformatLeadingWhitespace) {
1370           formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines,
1371                            TheLine.First->OriginalColumn,
1372                            TheLine.First->OriginalColumn);
1373         } else {
1374           Whitespaces->addUntouchableToken(*TheLine.First,
1375                                            TheLine.InPPDirective);
1376         }
1377 
1378         // Notify the WhitespaceManager about the unchanged whitespace.
1379         for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
1380           Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
1381       }
1382       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
1383       RangeMinLevel = UINT_MAX;
1384     }
1385     if (!DryRun)
1386       markFinalized(TheLine.First);
1387   }
1388   PenaltyCache[CacheKey] = Penalty;
1389   return Penalty;
1390 }
1391 
1392 void UnwrappedLineFormatter::formatFirstToken(
1393     const AnnotatedLine &Line, const AnnotatedLine *PreviousLine,
1394     const AnnotatedLine *PrevPrevLine,
1395     const SmallVectorImpl<AnnotatedLine *> &Lines, unsigned Indent,
1396     unsigned NewlineIndent) {
1397   FormatToken &RootToken = *Line.First;
1398   if (RootToken.is(tok::eof)) {
1399     unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
1400     unsigned TokenIndent = Newlines ? NewlineIndent : 0;
1401     Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent,
1402                                    TokenIndent);
1403     return;
1404   }
1405   unsigned Newlines =
1406       std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1407   // Remove empty lines before "}" where applicable.
1408   if (RootToken.is(tok::r_brace) &&
1409       (!RootToken.Next ||
1410        (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) &&
1411       // Do not remove empty lines before namespace closing "}".
1412       !getNamespaceToken(&Line, Lines)) {
1413     Newlines = std::min(Newlines, 1u);
1414   }
1415   // Remove empty lines at the start of nested blocks (lambdas/arrow functions)
1416   if (PreviousLine == nullptr && Line.Level > 0)
1417     Newlines = std::min(Newlines, 1u);
1418   if (Newlines == 0 && !RootToken.IsFirst)
1419     Newlines = 1;
1420   if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
1421     Newlines = 0;
1422 
1423   // Remove empty lines after "{".
1424   if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
1425       PreviousLine->Last->is(tok::l_brace) &&
1426       !PreviousLine->startsWithNamespace() &&
1427       !(PrevPrevLine && PrevPrevLine->startsWithNamespace() &&
1428         PreviousLine->startsWith(tok::l_brace)) &&
1429       !startsExternCBlock(*PreviousLine)) {
1430     Newlines = 1;
1431   }
1432 
1433   // Insert or remove empty line before access specifiers.
1434   if (PreviousLine && RootToken.isAccessSpecifier()) {
1435     switch (Style.EmptyLineBeforeAccessModifier) {
1436     case FormatStyle::ELBAMS_Never:
1437       if (Newlines > 1)
1438         Newlines = 1;
1439       break;
1440     case FormatStyle::ELBAMS_Leave:
1441       Newlines = std::max(RootToken.NewlinesBefore, 1u);
1442       break;
1443     case FormatStyle::ELBAMS_LogicalBlock:
1444       if (PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && Newlines <= 1)
1445         Newlines = 2;
1446       if (PreviousLine->First->isAccessSpecifier())
1447         Newlines = 1; // Previous is an access modifier remove all new lines.
1448       break;
1449     case FormatStyle::ELBAMS_Always: {
1450       const FormatToken *previousToken;
1451       if (PreviousLine->Last->is(tok::comment))
1452         previousToken = PreviousLine->Last->getPreviousNonComment();
1453       else
1454         previousToken = PreviousLine->Last;
1455       if ((!previousToken || !previousToken->is(tok::l_brace)) && Newlines <= 1)
1456         Newlines = 2;
1457     } break;
1458     }
1459   }
1460 
1461   // Insert or remove empty line after access specifiers.
1462   if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
1463       (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) {
1464     // EmptyLineBeforeAccessModifier is handling the case when two access
1465     // modifiers follow each other.
1466     if (!RootToken.isAccessSpecifier()) {
1467       switch (Style.EmptyLineAfterAccessModifier) {
1468       case FormatStyle::ELAAMS_Never:
1469         Newlines = 1;
1470         break;
1471       case FormatStyle::ELAAMS_Leave:
1472         Newlines = std::max(Newlines, 1u);
1473         break;
1474       case FormatStyle::ELAAMS_Always:
1475         if (RootToken.is(tok::r_brace)) // Do not add at end of class.
1476           Newlines = 1u;
1477         else
1478           Newlines = std::max(Newlines, 2u);
1479         break;
1480       }
1481     }
1482   }
1483 
1484   if (Newlines)
1485     Indent = NewlineIndent;
1486 
1487   // Preprocessor directives get indented before the hash only if specified. In
1488   // Javascript import statements are indented like normal statements.
1489   if (!Style.isJavaScript() &&
1490       Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
1491       (Line.Type == LT_PreprocessorDirective ||
1492        Line.Type == LT_ImportStatement)) {
1493     Indent = 0;
1494   }
1495 
1496   Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
1497                                  /*IsAligned=*/false,
1498                                  Line.InPPDirective &&
1499                                      !RootToken.HasUnescapedNewline);
1500 }
1501 
1502 unsigned
1503 UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
1504                                        const AnnotatedLine *NextLine) const {
1505   // In preprocessor directives reserve two chars for trailing " \" if the
1506   // next line continues the preprocessor directive.
1507   bool ContinuesPPDirective =
1508       InPPDirective &&
1509       // If there is no next line, this is likely a child line and the parent
1510       // continues the preprocessor directive.
1511       (!NextLine ||
1512        (NextLine->InPPDirective &&
1513         // If there is an unescaped newline between this line and the next, the
1514         // next line starts a new preprocessor directive.
1515         !NextLine->First->HasUnescapedNewline));
1516   return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
1517 }
1518 
1519 } // namespace format
1520 } // namespace clang
1521