1 //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "UnwrappedLineFormatter.h"
10 #include "NamespaceEndCommentsFixer.h"
11 #include "WhitespaceManager.h"
12 #include "llvm/Support/Debug.h"
13 #include <queue>
14
15 #define DEBUG_TYPE "format-formatter"
16
17 namespace clang {
18 namespace format {
19
20 namespace {
21
startsExternCBlock(const AnnotatedLine & Line)22 bool startsExternCBlock(const AnnotatedLine &Line) {
23 const FormatToken *Next = Line.First->getNextNonComment();
24 const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
25 return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
26 NextNext && NextNext->is(tok::l_brace);
27 }
28
29 /// Tracks the indent level of \c AnnotatedLines across levels.
30 ///
31 /// \c nextLine must be called for each \c AnnotatedLine, after which \c
32 /// getIndent() will return the indent for the last line \c nextLine was called
33 /// with.
34 /// If the line is not formatted (and thus the indent does not change), calling
35 /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
36 /// subsequent lines on the same level to be indented at the same level as the
37 /// given line.
38 class LevelIndentTracker {
39 public:
LevelIndentTracker(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned StartLevel,int AdditionalIndent)40 LevelIndentTracker(const FormatStyle &Style,
41 const AdditionalKeywords &Keywords, unsigned StartLevel,
42 int AdditionalIndent)
43 : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
44 for (unsigned i = 0; i != StartLevel; ++i)
45 IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
46 }
47
48 /// Returns the indent for the current line.
getIndent() const49 unsigned getIndent() const { return Indent; }
50
51 /// Update the indent state given that \p Line is going to be formatted
52 /// next.
nextLine(const AnnotatedLine & Line)53 void nextLine(const AnnotatedLine &Line) {
54 Offset = getIndentOffset(*Line.First);
55 // Update the indent level cache size so that we can rely on it
56 // having the right size in adjustToUnmodifiedline.
57 while (IndentForLevel.size() <= Line.Level)
58 IndentForLevel.push_back(-1);
59 if (Line.InPPDirective) {
60 unsigned IndentWidth =
61 (Style.PPIndentWidth >= 0) ? Style.PPIndentWidth : Style.IndentWidth;
62 Indent = Line.Level * IndentWidth + AdditionalIndent;
63 } else {
64 IndentForLevel.resize(Line.Level + 1);
65 Indent = getIndent(IndentForLevel, Line.Level);
66 }
67 if (static_cast<int>(Indent) + Offset >= 0)
68 Indent += Offset;
69 if (Line.First->is(TT_CSharpGenericTypeConstraint))
70 Indent = Line.Level * Style.IndentWidth + Style.ContinuationIndentWidth;
71 }
72
73 /// Update the indent state given that \p Line indent should be
74 /// skipped.
skipLine(const AnnotatedLine & Line)75 void skipLine(const AnnotatedLine &Line) {
76 while (IndentForLevel.size() <= Line.Level)
77 IndentForLevel.push_back(Indent);
78 }
79
80 /// Update the level indent to adapt to the given \p Line.
81 ///
82 /// When a line is not formatted, we move the subsequent lines on the same
83 /// level to the same indent.
84 /// Note that \c nextLine must have been called before this method.
adjustToUnmodifiedLine(const AnnotatedLine & Line)85 void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
86 unsigned LevelIndent = Line.First->OriginalColumn;
87 if (static_cast<int>(LevelIndent) - Offset >= 0)
88 LevelIndent -= Offset;
89 if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
90 !Line.InPPDirective)
91 IndentForLevel[Line.Level] = LevelIndent;
92 }
93
94 private:
95 /// Get the offset of the line relatively to the level.
96 ///
97 /// For example, 'public:' labels in classes are offset by 1 or 2
98 /// characters to the left from their level.
getIndentOffset(const FormatToken & RootToken)99 int getIndentOffset(const FormatToken &RootToken) {
100 if (Style.Language == FormatStyle::LK_Java ||
101 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
102 return 0;
103 if (RootToken.isAccessSpecifier(false) ||
104 RootToken.isObjCAccessSpecifier() ||
105 (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
106 RootToken.Next && RootToken.Next->is(tok::colon))) {
107 // The AccessModifierOffset may be overriden by IndentAccessModifiers,
108 // in which case we take a negative value of the IndentWidth to simulate
109 // the upper indent level.
110 return Style.IndentAccessModifiers ? -Style.IndentWidth
111 : Style.AccessModifierOffset;
112 }
113 return 0;
114 }
115
116 /// Get the indent of \p Level from \p IndentForLevel.
117 ///
118 /// \p IndentForLevel must contain the indent for the level \c l
119 /// at \p IndentForLevel[l], or a value < 0 if the indent for
120 /// that level is unknown.
getIndent(ArrayRef<int> IndentForLevel,unsigned Level)121 unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
122 if (IndentForLevel[Level] != -1)
123 return IndentForLevel[Level];
124 if (Level == 0)
125 return 0;
126 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
127 }
128
129 const FormatStyle &Style;
130 const AdditionalKeywords &Keywords;
131 const unsigned AdditionalIndent;
132
133 /// The indent in characters for each level.
134 std::vector<int> IndentForLevel;
135
136 /// Offset of the current line relative to the indent level.
137 ///
138 /// For example, the 'public' keywords is often indented with a negative
139 /// offset.
140 int Offset = 0;
141
142 /// The current line's indent.
143 unsigned Indent = 0;
144 };
145
getMatchingNamespaceToken(const AnnotatedLine * Line,const SmallVectorImpl<AnnotatedLine * > & AnnotatedLines)146 const FormatToken *getMatchingNamespaceToken(
147 const AnnotatedLine *Line,
148 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
149 if (!Line->startsWith(tok::r_brace))
150 return nullptr;
151 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
152 if (StartLineIndex == UnwrappedLine::kInvalidIndex)
153 return nullptr;
154 assert(StartLineIndex < AnnotatedLines.size());
155 return AnnotatedLines[StartLineIndex]->First->getNamespaceToken();
156 }
157
getNamespaceTokenText(const AnnotatedLine * Line)158 StringRef getNamespaceTokenText(const AnnotatedLine *Line) {
159 const FormatToken *NamespaceToken = Line->First->getNamespaceToken();
160 return NamespaceToken ? NamespaceToken->TokenText : StringRef();
161 }
162
getMatchingNamespaceTokenText(const AnnotatedLine * Line,const SmallVectorImpl<AnnotatedLine * > & AnnotatedLines)163 StringRef getMatchingNamespaceTokenText(
164 const AnnotatedLine *Line,
165 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
166 const FormatToken *NamespaceToken =
167 getMatchingNamespaceToken(Line, AnnotatedLines);
168 return NamespaceToken ? NamespaceToken->TokenText : StringRef();
169 }
170
171 class LineJoiner {
172 public:
LineJoiner(const FormatStyle & Style,const AdditionalKeywords & Keywords,const SmallVectorImpl<AnnotatedLine * > & Lines)173 LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
174 const SmallVectorImpl<AnnotatedLine *> &Lines)
175 : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()),
176 AnnotatedLines(Lines) {}
177
178 /// Returns the next line, merging multiple lines into one if possible.
getNextMergedLine(bool DryRun,LevelIndentTracker & IndentTracker)179 const AnnotatedLine *getNextMergedLine(bool DryRun,
180 LevelIndentTracker &IndentTracker) {
181 if (Next == End)
182 return nullptr;
183 const AnnotatedLine *Current = *Next;
184 IndentTracker.nextLine(*Current);
185 unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End);
186 if (MergedLines > 0 && Style.ColumnLimit == 0)
187 // Disallow line merging if there is a break at the start of one of the
188 // input lines.
189 for (unsigned i = 0; i < MergedLines; ++i)
190 if (Next[i + 1]->First->NewlinesBefore > 0)
191 MergedLines = 0;
192 if (!DryRun)
193 for (unsigned i = 0; i < MergedLines; ++i)
194 join(*Next[0], *Next[i + 1]);
195 Next = Next + MergedLines + 1;
196 return Current;
197 }
198
199 private:
200 /// Calculates how many lines can be merged into 1 starting at \p I.
201 unsigned
tryFitMultipleLinesInOne(LevelIndentTracker & IndentTracker,SmallVectorImpl<AnnotatedLine * >::const_iterator I,SmallVectorImpl<AnnotatedLine * >::const_iterator E)202 tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker,
203 SmallVectorImpl<AnnotatedLine *>::const_iterator I,
204 SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
205 const unsigned Indent = IndentTracker.getIndent();
206
207 // Can't join the last line with anything.
208 if (I + 1 == E)
209 return 0;
210 // We can never merge stuff if there are trailing line comments.
211 const AnnotatedLine *TheLine = *I;
212 if (TheLine->Last->is(TT_LineComment))
213 return 0;
214 if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
215 return 0;
216 if (TheLine->InPPDirective &&
217 (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
218 return 0;
219
220 if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
221 return 0;
222
223 unsigned Limit =
224 Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
225 // If we already exceed the column limit, we set 'Limit' to 0. The different
226 // tryMerge..() functions can then decide whether to still do merging.
227 Limit = TheLine->Last->TotalLength > Limit
228 ? 0
229 : Limit - TheLine->Last->TotalLength;
230
231 if (TheLine->Last->is(TT_FunctionLBrace) &&
232 TheLine->First == TheLine->Last &&
233 !Style.BraceWrapping.SplitEmptyFunction &&
234 I[1]->First->is(tok::r_brace))
235 return tryMergeSimpleBlock(I, E, Limit);
236
237 // Handle empty record blocks where the brace has already been wrapped
238 if (TheLine->Last->is(tok::l_brace) && TheLine->First == TheLine->Last &&
239 I != AnnotatedLines.begin()) {
240 bool EmptyBlock = I[1]->First->is(tok::r_brace);
241
242 const FormatToken *Tok = I[-1]->First;
243 if (Tok && Tok->is(tok::comment))
244 Tok = Tok->getNextNonComment();
245
246 if (Tok && Tok->getNamespaceToken())
247 return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock
248 ? tryMergeSimpleBlock(I, E, Limit)
249 : 0;
250
251 if (Tok && Tok->is(tok::kw_typedef))
252 Tok = Tok->getNextNonComment();
253 if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union,
254 tok::kw_extern, Keywords.kw_interface))
255 return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock
256 ? tryMergeSimpleBlock(I, E, Limit)
257 : 0;
258
259 if (Tok && Tok->is(tok::kw_template) &&
260 Style.BraceWrapping.SplitEmptyRecord && EmptyBlock) {
261 return 0;
262 }
263 }
264
265 // FIXME: TheLine->Level != 0 might or might not be the right check to do.
266 // If necessary, change to something smarter.
267 bool MergeShortFunctions =
268 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
269 (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
270 I[1]->First->is(tok::r_brace)) ||
271 (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly &&
272 TheLine->Level != 0);
273
274 if (Style.CompactNamespaces) {
275 if (auto nsToken = TheLine->First->getNamespaceToken()) {
276 int i = 0;
277 unsigned closingLine = TheLine->MatchingClosingBlockLineIndex - 1;
278 for (; I + 1 + i != E &&
279 nsToken->TokenText == getNamespaceTokenText(I[i + 1]) &&
280 closingLine == I[i + 1]->MatchingClosingBlockLineIndex &&
281 I[i + 1]->Last->TotalLength < Limit;
282 i++, closingLine--) {
283 // No extra indent for compacted namespaces
284 IndentTracker.skipLine(*I[i + 1]);
285
286 Limit -= I[i + 1]->Last->TotalLength;
287 }
288 return i;
289 }
290
291 if (auto nsToken = getMatchingNamespaceToken(TheLine, AnnotatedLines)) {
292 int i = 0;
293 unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1;
294 for (; I + 1 + i != E &&
295 nsToken->TokenText ==
296 getMatchingNamespaceTokenText(I[i + 1], AnnotatedLines) &&
297 openingLine == I[i + 1]->MatchingOpeningBlockLineIndex;
298 i++, openingLine--) {
299 // No space between consecutive braces
300 I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace);
301
302 // Indent like the outer-most namespace
303 IndentTracker.nextLine(*I[i + 1]);
304 }
305 return i;
306 }
307 }
308
309 // Try to merge a function block with left brace unwrapped
310 if (TheLine->Last->is(TT_FunctionLBrace) &&
311 TheLine->First != TheLine->Last) {
312 return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
313 }
314 // Try to merge a control statement block with left brace unwrapped
315 if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last &&
316 TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) {
317 return Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never
318 ? tryMergeSimpleBlock(I, E, Limit)
319 : 0;
320 }
321 // Try to merge a control statement block with left brace wrapped
322 if (I[1]->First->is(tok::l_brace) &&
323 (TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for,
324 tok::kw_switch, tok::kw_try, tok::kw_do,
325 TT_ForEachMacro) ||
326 (TheLine->First->is(tok::r_brace) && TheLine->First->Next &&
327 TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) &&
328 Style.BraceWrapping.AfterControlStatement ==
329 FormatStyle::BWACS_MultiLine) {
330 // If possible, merge the next line's wrapped left brace with the current
331 // line. Otherwise, leave it on the next line, as this is a multi-line
332 // control statement.
333 return (Style.ColumnLimit == 0 ||
334 TheLine->Last->TotalLength <= Style.ColumnLimit)
335 ? 1
336 : 0;
337 } else if (I[1]->First->is(tok::l_brace) &&
338 TheLine->First->isOneOf(tok::kw_if, tok::kw_while,
339 tok::kw_for)) {
340 return (Style.BraceWrapping.AfterControlStatement ==
341 FormatStyle::BWACS_Always)
342 ? tryMergeSimpleBlock(I, E, Limit)
343 : 0;
344 } else if (I[1]->First->is(tok::l_brace) &&
345 TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) &&
346 Style.BraceWrapping.AfterControlStatement ==
347 FormatStyle::BWACS_MultiLine) {
348 // This case if different from the upper BWACS_MultiLine processing
349 // in that a preceding r_brace is not on the same line as else/catch
350 // most likely because of BeforeElse/BeforeCatch set to true.
351 // If the line length doesn't fit ColumnLimit, leave l_brace on the
352 // next line to respect the BWACS_MultiLine.
353 return (Style.ColumnLimit == 0 ||
354 TheLine->Last->TotalLength <= Style.ColumnLimit)
355 ? 1
356 : 0;
357 }
358 // Don't merge block with left brace wrapped after ObjC special blocks
359 if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() &&
360 I[-1]->First->is(tok::at) && I[-1]->First->Next) {
361 tok::ObjCKeywordKind kwId = I[-1]->First->Next->Tok.getObjCKeywordID();
362 if (kwId == clang::tok::objc_autoreleasepool ||
363 kwId == clang::tok::objc_synchronized)
364 return 0;
365 }
366 // Don't merge block with left brace wrapped after case labels
367 if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() &&
368 I[-1]->First->isOneOf(tok::kw_case, tok::kw_default))
369 return 0;
370
371 // Don't merge an empty template class or struct if SplitEmptyRecords
372 // is defined.
373 if (Style.BraceWrapping.SplitEmptyRecord &&
374 TheLine->Last->is(tok::l_brace) && I != AnnotatedLines.begin() &&
375 I[-1]->Last) {
376 const FormatToken *Previous = I[-1]->Last;
377 if (Previous) {
378 if (Previous->is(tok::comment))
379 Previous = Previous->getPreviousNonComment();
380 if (Previous) {
381 if (Previous->is(tok::greater) && !I[-1]->InPPDirective)
382 return 0;
383 if (Previous->is(tok::identifier)) {
384 const FormatToken *PreviousPrevious =
385 Previous->getPreviousNonComment();
386 if (PreviousPrevious &&
387 PreviousPrevious->isOneOf(tok::kw_class, tok::kw_struct))
388 return 0;
389 }
390 }
391 }
392 }
393
394 // Try to merge a block with left brace wrapped that wasn't yet covered
395 if (TheLine->Last->is(tok::l_brace)) {
396 return !Style.BraceWrapping.AfterFunction ||
397 (I[1]->First->is(tok::r_brace) &&
398 !Style.BraceWrapping.SplitEmptyRecord)
399 ? tryMergeSimpleBlock(I, E, Limit)
400 : 0;
401 }
402 // Try to merge a function block with left brace wrapped
403 if (I[1]->First->is(TT_FunctionLBrace) &&
404 Style.BraceWrapping.AfterFunction) {
405 if (I[1]->Last->is(TT_LineComment))
406 return 0;
407
408 // Check for Limit <= 2 to account for the " {".
409 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
410 return 0;
411 Limit -= 2;
412
413 unsigned MergedLines = 0;
414 if (MergeShortFunctions ||
415 (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
416 I[1]->First == I[1]->Last && I + 2 != E &&
417 I[2]->First->is(tok::r_brace))) {
418 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
419 // If we managed to merge the block, count the function header, which is
420 // on a separate line.
421 if (MergedLines > 0)
422 ++MergedLines;
423 }
424 return MergedLines;
425 }
426 auto IsElseLine = [&TheLine]() -> bool {
427 const FormatToken *First = TheLine->First;
428 if (First->is(tok::kw_else))
429 return true;
430
431 return First->is(tok::r_brace) && First->Next &&
432 First->Next->is(tok::kw_else);
433 };
434 if (TheLine->First->is(tok::kw_if) ||
435 (IsElseLine() && (Style.AllowShortIfStatementsOnASingleLine ==
436 FormatStyle::SIS_AllIfsAndElse))) {
437 return Style.AllowShortIfStatementsOnASingleLine
438 ? tryMergeSimpleControlStatement(I, E, Limit)
439 : 0;
440 }
441 if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while, tok::kw_do)) {
442 return Style.AllowShortLoopsOnASingleLine
443 ? tryMergeSimpleControlStatement(I, E, Limit)
444 : 0;
445 }
446 if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
447 return Style.AllowShortCaseLabelsOnASingleLine
448 ? tryMergeShortCaseLabels(I, E, Limit)
449 : 0;
450 }
451 if (TheLine->InPPDirective &&
452 (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
453 return tryMergeSimplePPDirective(I, E, Limit);
454 }
455 return 0;
456 }
457
458 unsigned
tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine * >::const_iterator I,SmallVectorImpl<AnnotatedLine * >::const_iterator E,unsigned Limit)459 tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
460 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
461 unsigned Limit) {
462 if (Limit == 0)
463 return 0;
464 if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
465 return 0;
466 if (1 + I[1]->Last->TotalLength > Limit)
467 return 0;
468 return 1;
469 }
470
tryMergeSimpleControlStatement(SmallVectorImpl<AnnotatedLine * >::const_iterator I,SmallVectorImpl<AnnotatedLine * >::const_iterator E,unsigned Limit)471 unsigned tryMergeSimpleControlStatement(
472 SmallVectorImpl<AnnotatedLine *>::const_iterator I,
473 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
474 if (Limit == 0)
475 return 0;
476 if (Style.BraceWrapping.AfterControlStatement ==
477 FormatStyle::BWACS_Always &&
478 I[1]->First->is(tok::l_brace) &&
479 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
480 return 0;
481 if (I[1]->InPPDirective != (*I)->InPPDirective ||
482 (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
483 return 0;
484 Limit = limitConsideringMacros(I + 1, E, Limit);
485 AnnotatedLine &Line = **I;
486 if (!Line.First->is(tok::kw_do) && !Line.First->is(tok::kw_else) &&
487 !Line.Last->is(tok::kw_else) && Line.Last->isNot(tok::r_paren))
488 return 0;
489 // Only merge do while if do is the only statement on the line.
490 if (Line.First->is(tok::kw_do) && !Line.Last->is(tok::kw_do))
491 return 0;
492 if (1 + I[1]->Last->TotalLength > Limit)
493 return 0;
494 if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
495 TT_LineComment))
496 return 0;
497 // Only inline simple if's (no nested if or else), unless specified
498 if (Style.AllowShortIfStatementsOnASingleLine ==
499 FormatStyle::SIS_WithoutElse) {
500 if (I + 2 != E && Line.startsWith(tok::kw_if) &&
501 I[2]->First->is(tok::kw_else))
502 return 0;
503 }
504 return 1;
505 }
506
507 unsigned
tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine * >::const_iterator I,SmallVectorImpl<AnnotatedLine * >::const_iterator E,unsigned Limit)508 tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
509 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
510 unsigned Limit) {
511 if (Limit == 0 || I + 1 == E ||
512 I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
513 return 0;
514 if (I[0]->Last->is(tok::l_brace) || I[1]->First->is(tok::l_brace))
515 return 0;
516 unsigned NumStmts = 0;
517 unsigned Length = 0;
518 bool EndsWithComment = false;
519 bool InPPDirective = I[0]->InPPDirective;
520 const unsigned Level = I[0]->Level;
521 for (; NumStmts < 3; ++NumStmts) {
522 if (I + 1 + NumStmts == E)
523 break;
524 const AnnotatedLine *Line = I[1 + NumStmts];
525 if (Line->InPPDirective != InPPDirective)
526 break;
527 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
528 break;
529 if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
530 tok::kw_while) ||
531 EndsWithComment)
532 return 0;
533 if (Line->First->is(tok::comment)) {
534 if (Level != Line->Level)
535 return 0;
536 SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts;
537 for (; J != E; ++J) {
538 Line = *J;
539 if (Line->InPPDirective != InPPDirective)
540 break;
541 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
542 break;
543 if (Line->First->isNot(tok::comment) || Level != Line->Level)
544 return 0;
545 }
546 break;
547 }
548 if (Line->Last->is(tok::comment))
549 EndsWithComment = true;
550 Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
551 }
552 if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
553 return 0;
554 return NumStmts;
555 }
556
557 unsigned
tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine * >::const_iterator I,SmallVectorImpl<AnnotatedLine * >::const_iterator E,unsigned Limit)558 tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
559 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
560 unsigned Limit) {
561 AnnotatedLine &Line = **I;
562
563 // Don't merge ObjC @ keywords and methods.
564 // FIXME: If an option to allow short exception handling clauses on a single
565 // line is added, change this to not return for @try and friends.
566 if (Style.Language != FormatStyle::LK_Java &&
567 Line.First->isOneOf(tok::at, tok::minus, tok::plus))
568 return 0;
569
570 // Check that the current line allows merging. This depends on whether we
571 // are in a control flow statements as well as several style flags.
572 if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
573 (Line.First->Next && Line.First->Next->is(tok::kw_else)))
574 return 0;
575 // default: in switch statement
576 if (Line.First->is(tok::kw_default)) {
577 const FormatToken *Tok = Line.First->getNextNonComment();
578 if (Tok && Tok->is(tok::colon))
579 return 0;
580 }
581 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
582 tok::kw___try, tok::kw_catch, tok::kw___finally,
583 tok::kw_for, tok::r_brace, Keywords.kw___except)) {
584 if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
585 return 0;
586 // Don't merge when we can't except the case when
587 // the control statement block is empty
588 if (!Style.AllowShortIfStatementsOnASingleLine &&
589 Line.startsWith(tok::kw_if) &&
590 !Style.BraceWrapping.AfterControlStatement &&
591 !I[1]->First->is(tok::r_brace))
592 return 0;
593 if (!Style.AllowShortIfStatementsOnASingleLine &&
594 Line.startsWith(tok::kw_if) &&
595 Style.BraceWrapping.AfterControlStatement ==
596 FormatStyle::BWACS_Always &&
597 I + 2 != E && !I[2]->First->is(tok::r_brace))
598 return 0;
599 if (!Style.AllowShortLoopsOnASingleLine &&
600 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) &&
601 !Style.BraceWrapping.AfterControlStatement &&
602 !I[1]->First->is(tok::r_brace))
603 return 0;
604 if (!Style.AllowShortLoopsOnASingleLine &&
605 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) &&
606 Style.BraceWrapping.AfterControlStatement ==
607 FormatStyle::BWACS_Always &&
608 I + 2 != E && !I[2]->First->is(tok::r_brace))
609 return 0;
610 // FIXME: Consider an option to allow short exception handling clauses on
611 // a single line.
612 // FIXME: This isn't covered by tests.
613 // FIXME: For catch, __except, __finally the first token on the line
614 // is '}', so this isn't correct here.
615 if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
616 Keywords.kw___except, tok::kw___finally))
617 return 0;
618 }
619
620 if (Line.Last->is(tok::l_brace)) {
621 FormatToken *Tok = I[1]->First;
622 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
623 (Tok->getNextNonComment() == nullptr ||
624 Tok->getNextNonComment()->is(tok::semi))) {
625 // We merge empty blocks even if the line exceeds the column limit.
626 Tok->SpacesRequiredBefore = Style.SpaceInEmptyBlock ? 1 : 0;
627 Tok->CanBreakBefore = true;
628 return 1;
629 } else if (Limit != 0 && !Line.startsWithNamespace() &&
630 !startsExternCBlock(Line)) {
631 // We don't merge short records.
632 FormatToken *RecordTok = Line.First;
633 // Skip record modifiers.
634 while (RecordTok->Next &&
635 RecordTok->isOneOf(
636 tok::kw_typedef, tok::kw_export, Keywords.kw_declare,
637 Keywords.kw_abstract, tok::kw_default, tok::kw_public,
638 tok::kw_private, tok::kw_protected, Keywords.kw_internal))
639 RecordTok = RecordTok->Next;
640 if (RecordTok &&
641 RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
642 Keywords.kw_interface))
643 return 0;
644
645 // Check that we still have three lines and they fit into the limit.
646 if (I + 2 == E || I[2]->Type == LT_Invalid)
647 return 0;
648 Limit = limitConsideringMacros(I + 2, E, Limit);
649
650 if (!nextTwoLinesFitInto(I, Limit))
651 return 0;
652
653 // Second, check that the next line does not contain any braces - if it
654 // does, readability declines when putting it into a single line.
655 if (I[1]->Last->is(TT_LineComment))
656 return 0;
657 do {
658 if (Tok->is(tok::l_brace) && Tok->isNot(BK_BracedInit))
659 return 0;
660 Tok = Tok->Next;
661 } while (Tok);
662
663 // Last, check that the third line starts with a closing brace.
664 Tok = I[2]->First;
665 if (Tok->isNot(tok::r_brace))
666 return 0;
667
668 // Don't merge "if (a) { .. } else {".
669 if (Tok->Next && Tok->Next->is(tok::kw_else))
670 return 0;
671
672 // Don't merge a trailing multi-line control statement block like:
673 // } else if (foo &&
674 // bar)
675 // { <-- current Line
676 // baz();
677 // }
678 if (Line.First == Line.Last &&
679 Style.BraceWrapping.AfterControlStatement ==
680 FormatStyle::BWACS_MultiLine)
681 return 0;
682
683 return 2;
684 }
685 } else if (I[1]->First->is(tok::l_brace)) {
686 if (I[1]->Last->is(TT_LineComment))
687 return 0;
688
689 // Check for Limit <= 2 to account for the " {".
690 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I)))
691 return 0;
692 Limit -= 2;
693 unsigned MergedLines = 0;
694 if (Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never ||
695 (I[1]->First == I[1]->Last && I + 2 != E &&
696 I[2]->First->is(tok::r_brace))) {
697 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
698 // If we managed to merge the block, count the statement header, which
699 // is on a separate line.
700 if (MergedLines > 0)
701 ++MergedLines;
702 }
703 return MergedLines;
704 }
705 return 0;
706 }
707
708 /// Returns the modified column limit for \p I if it is inside a macro and
709 /// needs a trailing '\'.
710 unsigned
limitConsideringMacros(SmallVectorImpl<AnnotatedLine * >::const_iterator I,SmallVectorImpl<AnnotatedLine * >::const_iterator E,unsigned Limit)711 limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
712 SmallVectorImpl<AnnotatedLine *>::const_iterator E,
713 unsigned Limit) {
714 if (I[0]->InPPDirective && I + 1 != E &&
715 !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
716 return Limit < 2 ? 0 : Limit - 2;
717 }
718 return Limit;
719 }
720
nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine * >::const_iterator I,unsigned Limit)721 bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
722 unsigned Limit) {
723 if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
724 return false;
725 return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
726 }
727
containsMustBreak(const AnnotatedLine * Line)728 bool containsMustBreak(const AnnotatedLine *Line) {
729 for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
730 if (Tok->MustBreakBefore)
731 return true;
732 }
733 return false;
734 }
735
join(AnnotatedLine & A,const AnnotatedLine & B)736 void join(AnnotatedLine &A, const AnnotatedLine &B) {
737 assert(!A.Last->Next);
738 assert(!B.First->Previous);
739 if (B.Affected)
740 A.Affected = true;
741 A.Last->Next = B.First;
742 B.First->Previous = A.Last;
743 B.First->CanBreakBefore = true;
744 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
745 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
746 Tok->TotalLength += LengthA;
747 A.Last = Tok;
748 }
749 }
750
751 const FormatStyle &Style;
752 const AdditionalKeywords &Keywords;
753 const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
754
755 SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
756 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines;
757 };
758
markFinalized(FormatToken * Tok)759 static void markFinalized(FormatToken *Tok) {
760 for (; Tok; Tok = Tok->Next) {
761 Tok->Finalized = true;
762 for (AnnotatedLine *Child : Tok->Children)
763 markFinalized(Child->First);
764 }
765 }
766
767 #ifndef NDEBUG
printLineState(const LineState & State)768 static void printLineState(const LineState &State) {
769 llvm::dbgs() << "State: ";
770 for (const ParenState &P : State.Stack) {
771 llvm::dbgs() << (P.Tok ? P.Tok->TokenText : "F") << "|" << P.Indent << "|"
772 << P.LastSpace << "|" << P.NestedBlockIndent << " ";
773 }
774 llvm::dbgs() << State.NextToken->TokenText << "\n";
775 }
776 #endif
777
778 /// Base class for classes that format one \c AnnotatedLine.
779 class LineFormatter {
780 public:
LineFormatter(ContinuationIndenter * Indenter,WhitespaceManager * Whitespaces,const FormatStyle & Style,UnwrappedLineFormatter * BlockFormatter)781 LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
782 const FormatStyle &Style,
783 UnwrappedLineFormatter *BlockFormatter)
784 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
785 BlockFormatter(BlockFormatter) {}
~LineFormatter()786 virtual ~LineFormatter() {}
787
788 /// Formats an \c AnnotatedLine and returns the penalty.
789 ///
790 /// If \p DryRun is \c false, directly applies the changes.
791 virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
792 unsigned FirstStartColumn, bool DryRun) = 0;
793
794 protected:
795 /// If the \p State's next token is an r_brace closing a nested block,
796 /// format the nested block before it.
797 ///
798 /// Returns \c true if all children could be placed successfully and adapts
799 /// \p Penalty as well as \p State. If \p DryRun is false, also directly
800 /// creates changes using \c Whitespaces.
801 ///
802 /// The crucial idea here is that children always get formatted upon
803 /// encountering the closing brace right after the nested block. Now, if we
804 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
805 /// \c false), the entire block has to be kept on the same line (which is only
806 /// possible if it fits on the line, only contains a single statement, etc.
807 ///
808 /// If \p NewLine is true, we format the nested block on separate lines, i.e.
809 /// break after the "{", format all lines with correct indentation and the put
810 /// the closing "}" on yet another new line.
811 ///
812 /// This enables us to keep the simple structure of the
813 /// \c UnwrappedLineFormatter, where we only have two options for each token:
814 /// break or don't break.
formatChildren(LineState & State,bool NewLine,bool DryRun,unsigned & Penalty)815 bool formatChildren(LineState &State, bool NewLine, bool DryRun,
816 unsigned &Penalty) {
817 const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
818 FormatToken &Previous = *State.NextToken->Previous;
819 if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
820 Previous.Children.size() == 0)
821 // The previous token does not open a block. Nothing to do. We don't
822 // assert so that we can simply call this function for all tokens.
823 return true;
824
825 if (NewLine) {
826 const ParenState &P = State.Stack.back();
827
828 int AdditionalIndent =
829 P.Indent - Previous.Children[0]->Level * Style.IndentWidth;
830
831 if (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope &&
832 P.NestedBlockIndent == P.LastSpace) {
833 if (State.NextToken->MatchingParen &&
834 State.NextToken->MatchingParen->is(TT_LambdaLBrace)) {
835 State.Stack.pop_back();
836 }
837 if (LBrace->is(TT_LambdaLBrace))
838 AdditionalIndent = 0;
839 }
840
841 Penalty +=
842 BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
843 /*FixBadIndentation=*/true);
844 return true;
845 }
846
847 if (Previous.Children[0]->First->MustBreakBefore)
848 return false;
849
850 // Cannot merge into one line if this line ends on a comment.
851 if (Previous.is(tok::comment))
852 return false;
853
854 // Cannot merge multiple statements into a single line.
855 if (Previous.Children.size() > 1)
856 return false;
857
858 const AnnotatedLine *Child = Previous.Children[0];
859 // We can't put the closing "}" on a line with a trailing comment.
860 if (Child->Last->isTrailingComment())
861 return false;
862
863 // If the child line exceeds the column limit, we wouldn't want to merge it.
864 // We add +2 for the trailing " }".
865 if (Style.ColumnLimit > 0 &&
866 Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit)
867 return false;
868
869 if (!DryRun) {
870 Whitespaces->replaceWhitespace(
871 *Child->First, /*Newlines=*/0, /*Spaces=*/1,
872 /*StartOfTokenColumn=*/State.Column, /*IsAligned=*/false,
873 State.Line->InPPDirective);
874 }
875 Penalty +=
876 formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun);
877
878 State.Column += 1 + Child->Last->TotalLength;
879 return true;
880 }
881
882 ContinuationIndenter *Indenter;
883
884 private:
885 WhitespaceManager *Whitespaces;
886 const FormatStyle &Style;
887 UnwrappedLineFormatter *BlockFormatter;
888 };
889
890 /// Formatter that keeps the existing line breaks.
891 class NoColumnLimitLineFormatter : public LineFormatter {
892 public:
NoColumnLimitLineFormatter(ContinuationIndenter * Indenter,WhitespaceManager * Whitespaces,const FormatStyle & Style,UnwrappedLineFormatter * BlockFormatter)893 NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
894 WhitespaceManager *Whitespaces,
895 const FormatStyle &Style,
896 UnwrappedLineFormatter *BlockFormatter)
897 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
898
899 /// Formats the line, simply keeping all of the input's line breaking
900 /// decisions.
formatLine(const AnnotatedLine & Line,unsigned FirstIndent,unsigned FirstStartColumn,bool DryRun)901 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
902 unsigned FirstStartColumn, bool DryRun) override {
903 assert(!DryRun);
904 LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn,
905 &Line, /*DryRun=*/false);
906 while (State.NextToken) {
907 bool Newline =
908 Indenter->mustBreak(State) ||
909 (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
910 unsigned Penalty = 0;
911 formatChildren(State, Newline, /*DryRun=*/false, Penalty);
912 Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
913 }
914 return 0;
915 }
916 };
917
918 /// Formatter that puts all tokens into a single line without breaks.
919 class NoLineBreakFormatter : public LineFormatter {
920 public:
NoLineBreakFormatter(ContinuationIndenter * Indenter,WhitespaceManager * Whitespaces,const FormatStyle & Style,UnwrappedLineFormatter * BlockFormatter)921 NoLineBreakFormatter(ContinuationIndenter *Indenter,
922 WhitespaceManager *Whitespaces, const FormatStyle &Style,
923 UnwrappedLineFormatter *BlockFormatter)
924 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
925
926 /// Puts all tokens into a single line.
formatLine(const AnnotatedLine & Line,unsigned FirstIndent,unsigned FirstStartColumn,bool DryRun)927 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
928 unsigned FirstStartColumn, bool DryRun) override {
929 unsigned Penalty = 0;
930 LineState State =
931 Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
932 while (State.NextToken) {
933 formatChildren(State, /*NewLine=*/false, DryRun, Penalty);
934 Indenter->addTokenToState(
935 State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
936 }
937 return Penalty;
938 }
939 };
940
941 /// Finds the best way to break lines.
942 class OptimizingLineFormatter : public LineFormatter {
943 public:
OptimizingLineFormatter(ContinuationIndenter * Indenter,WhitespaceManager * Whitespaces,const FormatStyle & Style,UnwrappedLineFormatter * BlockFormatter)944 OptimizingLineFormatter(ContinuationIndenter *Indenter,
945 WhitespaceManager *Whitespaces,
946 const FormatStyle &Style,
947 UnwrappedLineFormatter *BlockFormatter)
948 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
949
950 /// Formats the line by finding the best line breaks with line lengths
951 /// below the column limit.
formatLine(const AnnotatedLine & Line,unsigned FirstIndent,unsigned FirstStartColumn,bool DryRun)952 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
953 unsigned FirstStartColumn, bool DryRun) override {
954 LineState State =
955 Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
956
957 // If the ObjC method declaration does not fit on a line, we should format
958 // it with one arg per line.
959 if (State.Line->Type == LT_ObjCMethodDecl)
960 State.Stack.back().BreakBeforeParameter = true;
961
962 // Find best solution in solution space.
963 return analyzeSolutionSpace(State, DryRun);
964 }
965
966 private:
967 struct CompareLineStatePointers {
operator ()clang::format::__anon3e51f00a0111::OptimizingLineFormatter::CompareLineStatePointers968 bool operator()(LineState *obj1, LineState *obj2) const {
969 return *obj1 < *obj2;
970 }
971 };
972
973 /// A pair of <penalty, count> that is used to prioritize the BFS on.
974 ///
975 /// In case of equal penalties, we want to prefer states that were inserted
976 /// first. During state generation we make sure that we insert states first
977 /// that break the line as late as possible.
978 typedef std::pair<unsigned, unsigned> OrderedPenalty;
979
980 /// An edge in the solution space from \c Previous->State to \c State,
981 /// inserting a newline dependent on the \c NewLine.
982 struct StateNode {
StateNodeclang::format::__anon3e51f00a0111::OptimizingLineFormatter::StateNode983 StateNode(const LineState &State, bool NewLine, StateNode *Previous)
984 : State(State), NewLine(NewLine), Previous(Previous) {}
985 LineState State;
986 bool NewLine;
987 StateNode *Previous;
988 };
989
990 /// An item in the prioritized BFS search queue. The \c StateNode's
991 /// \c State has the given \c OrderedPenalty.
992 typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
993
994 /// The BFS queue type.
995 typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
996 std::greater<QueueItem>>
997 QueueType;
998
999 /// Analyze the entire solution space starting from \p InitialState.
1000 ///
1001 /// This implements a variant of Dijkstra's algorithm on the graph that spans
1002 /// the solution space (\c LineStates are the nodes). The algorithm tries to
1003 /// find the shortest path (the one with lowest penalty) from \p InitialState
1004 /// to a state where all tokens are placed. Returns the penalty.
1005 ///
1006 /// If \p DryRun is \c false, directly applies the changes.
analyzeSolutionSpace(LineState & InitialState,bool DryRun)1007 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
1008 std::set<LineState *, CompareLineStatePointers> Seen;
1009
1010 // Increasing count of \c StateNode items we have created. This is used to
1011 // create a deterministic order independent of the container.
1012 unsigned Count = 0;
1013 QueueType Queue;
1014
1015 // Insert start element into queue.
1016 StateNode *Node =
1017 new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
1018 Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
1019 ++Count;
1020
1021 unsigned Penalty = 0;
1022
1023 // While not empty, take first element and follow edges.
1024 while (!Queue.empty()) {
1025 Penalty = Queue.top().first.first;
1026 StateNode *Node = Queue.top().second;
1027 if (!Node->State.NextToken) {
1028 LLVM_DEBUG(llvm::dbgs()
1029 << "\n---\nPenalty for line: " << Penalty << "\n");
1030 break;
1031 }
1032 Queue.pop();
1033
1034 // Cut off the analysis of certain solutions if the analysis gets too
1035 // complex. See description of IgnoreStackForComparison.
1036 if (Count > 50000)
1037 Node->State.IgnoreStackForComparison = true;
1038
1039 if (!Seen.insert(&Node->State).second)
1040 // State already examined with lower penalty.
1041 continue;
1042
1043 FormatDecision LastFormat = Node->State.NextToken->getDecision();
1044 if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
1045 addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
1046 if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
1047 addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
1048 }
1049
1050 if (Queue.empty()) {
1051 // We were unable to find a solution, do nothing.
1052 // FIXME: Add diagnostic?
1053 LLVM_DEBUG(llvm::dbgs() << "Could not find a solution.\n");
1054 return 0;
1055 }
1056
1057 // Reconstruct the solution.
1058 if (!DryRun)
1059 reconstructPath(InitialState, Queue.top().second);
1060
1061 LLVM_DEBUG(llvm::dbgs()
1062 << "Total number of analyzed states: " << Count << "\n");
1063 LLVM_DEBUG(llvm::dbgs() << "---\n");
1064
1065 return Penalty;
1066 }
1067
1068 /// Add the following state to the analysis queue \c Queue.
1069 ///
1070 /// Assume the current state is \p PreviousNode and has been reached with a
1071 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
addNextStateToQueue(unsigned Penalty,StateNode * PreviousNode,bool NewLine,unsigned * Count,QueueType * Queue)1072 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
1073 bool NewLine, unsigned *Count, QueueType *Queue) {
1074 if (NewLine && !Indenter->canBreak(PreviousNode->State))
1075 return;
1076 if (!NewLine && Indenter->mustBreak(PreviousNode->State))
1077 return;
1078
1079 StateNode *Node = new (Allocator.Allocate())
1080 StateNode(PreviousNode->State, NewLine, PreviousNode);
1081 if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
1082 return;
1083
1084 Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
1085
1086 Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
1087 ++(*Count);
1088 }
1089
1090 /// Applies the best formatting by reconstructing the path in the
1091 /// solution space that leads to \c Best.
reconstructPath(LineState & State,StateNode * Best)1092 void reconstructPath(LineState &State, StateNode *Best) {
1093 std::deque<StateNode *> Path;
1094 // We do not need a break before the initial token.
1095 while (Best->Previous) {
1096 Path.push_front(Best);
1097 Best = Best->Previous;
1098 }
1099 for (auto I = Path.begin(), E = Path.end(); I != E; ++I) {
1100 unsigned Penalty = 0;
1101 formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
1102 Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
1103
1104 LLVM_DEBUG({
1105 printLineState((*I)->Previous->State);
1106 if ((*I)->NewLine) {
1107 llvm::dbgs() << "Penalty for placing "
1108 << (*I)->Previous->State.NextToken->Tok.getName()
1109 << " on a new line: " << Penalty << "\n";
1110 }
1111 });
1112 }
1113 }
1114
1115 llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
1116 };
1117
1118 } // anonymous namespace
1119
format(const SmallVectorImpl<AnnotatedLine * > & Lines,bool DryRun,int AdditionalIndent,bool FixBadIndentation,unsigned FirstStartColumn,unsigned NextStartColumn,unsigned LastStartColumn)1120 unsigned UnwrappedLineFormatter::format(
1121 const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
1122 int AdditionalIndent, bool FixBadIndentation, unsigned FirstStartColumn,
1123 unsigned NextStartColumn, unsigned LastStartColumn) {
1124 LineJoiner Joiner(Style, Keywords, Lines);
1125
1126 // Try to look up already computed penalty in DryRun-mode.
1127 std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
1128 &Lines, AdditionalIndent);
1129 auto CacheIt = PenaltyCache.find(CacheKey);
1130 if (DryRun && CacheIt != PenaltyCache.end())
1131 return CacheIt->second;
1132
1133 assert(!Lines.empty());
1134 unsigned Penalty = 0;
1135 LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
1136 AdditionalIndent);
1137 const AnnotatedLine *PrevPrevLine = nullptr;
1138 const AnnotatedLine *PreviousLine = nullptr;
1139 const AnnotatedLine *NextLine = nullptr;
1140
1141 // The minimum level of consecutive lines that have been formatted.
1142 unsigned RangeMinLevel = UINT_MAX;
1143
1144 bool FirstLine = true;
1145 for (const AnnotatedLine *Line =
1146 Joiner.getNextMergedLine(DryRun, IndentTracker);
1147 Line; Line = NextLine, FirstLine = false) {
1148 const AnnotatedLine &TheLine = *Line;
1149 unsigned Indent = IndentTracker.getIndent();
1150
1151 // We continue formatting unchanged lines to adjust their indent, e.g. if a
1152 // scope was added. However, we need to carefully stop doing this when we
1153 // exit the scope of affected lines to prevent indenting a the entire
1154 // remaining file if it currently missing a closing brace.
1155 bool PreviousRBrace =
1156 PreviousLine && PreviousLine->startsWith(tok::r_brace);
1157 bool ContinueFormatting =
1158 TheLine.Level > RangeMinLevel ||
1159 (TheLine.Level == RangeMinLevel && !PreviousRBrace &&
1160 !TheLine.startsWith(tok::r_brace));
1161
1162 bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
1163 Indent != TheLine.First->OriginalColumn;
1164 bool ShouldFormat = TheLine.Affected || FixIndentation;
1165 // We cannot format this line; if the reason is that the line had a
1166 // parsing error, remember that.
1167 if (ShouldFormat && TheLine.Type == LT_Invalid && Status) {
1168 Status->FormatComplete = false;
1169 Status->Line =
1170 SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation());
1171 }
1172
1173 if (ShouldFormat && TheLine.Type != LT_Invalid) {
1174 if (!DryRun) {
1175 bool LastLine = Line->First->is(tok::eof);
1176 formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent,
1177 LastLine ? LastStartColumn : NextStartColumn + Indent);
1178 }
1179
1180 NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
1181 unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
1182 bool FitsIntoOneLine =
1183 TheLine.Last->TotalLength + Indent <= ColumnLimit ||
1184 (TheLine.Type == LT_ImportStatement &&
1185 (Style.Language != FormatStyle::LK_JavaScript ||
1186 !Style.JavaScriptWrapImports)) ||
1187 (Style.isCSharp() &&
1188 TheLine.InPPDirective); // don't split #regions in C#
1189 if (Style.ColumnLimit == 0)
1190 NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
1191 .formatLine(TheLine, NextStartColumn + Indent,
1192 FirstLine ? FirstStartColumn : 0, DryRun);
1193 else if (FitsIntoOneLine)
1194 Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
1195 .formatLine(TheLine, NextStartColumn + Indent,
1196 FirstLine ? FirstStartColumn : 0, DryRun);
1197 else
1198 Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
1199 .formatLine(TheLine, NextStartColumn + Indent,
1200 FirstLine ? FirstStartColumn : 0, DryRun);
1201 RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
1202 } else {
1203 // If no token in the current line is affected, we still need to format
1204 // affected children.
1205 if (TheLine.ChildrenAffected)
1206 for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next)
1207 if (!Tok->Children.empty())
1208 format(Tok->Children, DryRun);
1209
1210 // Adapt following lines on the current indent level to the same level
1211 // unless the current \c AnnotatedLine is not at the beginning of a line.
1212 bool StartsNewLine =
1213 TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
1214 if (StartsNewLine)
1215 IndentTracker.adjustToUnmodifiedLine(TheLine);
1216 if (!DryRun) {
1217 bool ReformatLeadingWhitespace =
1218 StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
1219 TheLine.LeadingEmptyLinesAffected);
1220 // Format the first token.
1221 if (ReformatLeadingWhitespace)
1222 formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines,
1223 TheLine.First->OriginalColumn,
1224 TheLine.First->OriginalColumn);
1225 else
1226 Whitespaces->addUntouchableToken(*TheLine.First,
1227 TheLine.InPPDirective);
1228
1229 // Notify the WhitespaceManager about the unchanged whitespace.
1230 for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
1231 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
1232 }
1233 NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
1234 RangeMinLevel = UINT_MAX;
1235 }
1236 if (!DryRun)
1237 markFinalized(TheLine.First);
1238 PrevPrevLine = PreviousLine;
1239 PreviousLine = &TheLine;
1240 }
1241 PenaltyCache[CacheKey] = Penalty;
1242 return Penalty;
1243 }
1244
formatFirstToken(const AnnotatedLine & Line,const AnnotatedLine * PreviousLine,const AnnotatedLine * PrevPrevLine,const SmallVectorImpl<AnnotatedLine * > & Lines,unsigned Indent,unsigned NewlineIndent)1245 void UnwrappedLineFormatter::formatFirstToken(
1246 const AnnotatedLine &Line, const AnnotatedLine *PreviousLine,
1247 const AnnotatedLine *PrevPrevLine,
1248 const SmallVectorImpl<AnnotatedLine *> &Lines, unsigned Indent,
1249 unsigned NewlineIndent) {
1250 FormatToken &RootToken = *Line.First;
1251 if (RootToken.is(tok::eof)) {
1252 unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
1253 unsigned TokenIndent = Newlines ? NewlineIndent : 0;
1254 Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent,
1255 TokenIndent);
1256 return;
1257 }
1258 unsigned Newlines =
1259 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
1260 // Remove empty lines before "}" where applicable.
1261 if (RootToken.is(tok::r_brace) &&
1262 (!RootToken.Next ||
1263 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) &&
1264 // Do not remove empty lines before namespace closing "}".
1265 !getNamespaceToken(&Line, Lines))
1266 Newlines = std::min(Newlines, 1u);
1267 // Remove empty lines at the start of nested blocks (lambdas/arrow functions)
1268 if (PreviousLine == nullptr && Line.Level > 0)
1269 Newlines = std::min(Newlines, 1u);
1270 if (Newlines == 0 && !RootToken.IsFirst)
1271 Newlines = 1;
1272 if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
1273 Newlines = 0;
1274
1275 // Remove empty lines after "{".
1276 if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
1277 PreviousLine->Last->is(tok::l_brace) &&
1278 !PreviousLine->startsWithNamespace() &&
1279 !(PrevPrevLine && PrevPrevLine->startsWithNamespace() &&
1280 PreviousLine->startsWith(tok::l_brace)) &&
1281 !startsExternCBlock(*PreviousLine))
1282 Newlines = 1;
1283
1284 // Insert or remove empty line before access specifiers.
1285 if (PreviousLine && RootToken.isAccessSpecifier()) {
1286 switch (Style.EmptyLineBeforeAccessModifier) {
1287 case FormatStyle::ELBAMS_Never:
1288 if (Newlines > 1)
1289 Newlines = 1;
1290 break;
1291 case FormatStyle::ELBAMS_Leave:
1292 Newlines = std::max(RootToken.NewlinesBefore, 1u);
1293 break;
1294 case FormatStyle::ELBAMS_LogicalBlock:
1295 if (PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && Newlines <= 1)
1296 Newlines = 2;
1297 if (PreviousLine->First->isAccessSpecifier())
1298 Newlines = 1; // Previous is an access modifier remove all new lines.
1299 break;
1300 case FormatStyle::ELBAMS_Always: {
1301 const FormatToken *previousToken;
1302 if (PreviousLine->Last->is(tok::comment))
1303 previousToken = PreviousLine->Last->getPreviousNonComment();
1304 else
1305 previousToken = PreviousLine->Last;
1306 if ((!previousToken || !previousToken->is(tok::l_brace)) && Newlines <= 1)
1307 Newlines = 2;
1308 } break;
1309 }
1310 }
1311
1312 // Insert or remove empty line after access specifiers.
1313 if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
1314 (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) {
1315 // EmptyLineBeforeAccessModifier is handling the case when two access
1316 // modifiers follow each other.
1317 if (!RootToken.isAccessSpecifier()) {
1318 switch (Style.EmptyLineAfterAccessModifier) {
1319 case FormatStyle::ELAAMS_Never:
1320 Newlines = 1;
1321 break;
1322 case FormatStyle::ELAAMS_Leave:
1323 Newlines = std::max(Newlines, 1u);
1324 break;
1325 case FormatStyle::ELAAMS_Always:
1326 if (RootToken.is(tok::r_brace)) // Do not add at end of class.
1327 Newlines = 1u;
1328 else
1329 Newlines = std::max(Newlines, 2u);
1330 break;
1331 }
1332 }
1333 }
1334
1335 if (Newlines)
1336 Indent = NewlineIndent;
1337
1338 // Preprocessor directives get indented before the hash only if specified
1339 if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
1340 (Line.Type == LT_PreprocessorDirective ||
1341 Line.Type == LT_ImportStatement))
1342 Indent = 0;
1343
1344 Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent,
1345 /*IsAligned=*/false,
1346 Line.InPPDirective &&
1347 !RootToken.HasUnescapedNewline);
1348 }
1349
1350 unsigned
getColumnLimit(bool InPPDirective,const AnnotatedLine * NextLine) const1351 UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
1352 const AnnotatedLine *NextLine) const {
1353 // In preprocessor directives reserve two chars for trailing " \" if the
1354 // next line continues the preprocessor directive.
1355 bool ContinuesPPDirective =
1356 InPPDirective &&
1357 // If there is no next line, this is likely a child line and the parent
1358 // continues the preprocessor directive.
1359 (!NextLine ||
1360 (NextLine->InPPDirective &&
1361 // If there is an unescaped newline between this line and the next, the
1362 // next line starts a new preprocessor directive.
1363 !NextLine->First->HasUnescapedNewline));
1364 return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
1365 }
1366
1367 } // namespace format
1368 } // namespace clang
1369