1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file implements WhitespaceManager class.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WhitespaceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 
18 namespace clang {
19 namespace format {
20 
21 bool WhitespaceManager::Change::IsBeforeInFile::
operator ()(const Change & C1,const Change & C2) const22 operator()(const Change &C1, const Change &C2) const {
23   return SourceMgr.isBeforeInTranslationUnit(
24       C1.OriginalWhitespaceRange.getBegin(),
25       C2.OriginalWhitespaceRange.getBegin());
26 }
27 
Change(const FormatToken & Tok,bool CreateReplacement,SourceRange OriginalWhitespaceRange,int Spaces,unsigned StartOfTokenColumn,unsigned NewlinesBefore,StringRef PreviousLinePostfix,StringRef CurrentLinePrefix,bool ContinuesPPDirective,bool IsInsideToken)28 WhitespaceManager::Change::Change(const FormatToken &Tok,
29                                   bool CreateReplacement,
30                                   SourceRange OriginalWhitespaceRange,
31                                   int Spaces, unsigned StartOfTokenColumn,
32                                   unsigned NewlinesBefore,
33                                   StringRef PreviousLinePostfix,
34                                   StringRef CurrentLinePrefix,
35                                   bool ContinuesPPDirective, bool IsInsideToken)
36     : Tok(&Tok), CreateReplacement(CreateReplacement),
37       OriginalWhitespaceRange(OriginalWhitespaceRange),
38       StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
39       PreviousLinePostfix(PreviousLinePostfix),
40       CurrentLinePrefix(CurrentLinePrefix),
41       ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces),
42       IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0),
43       PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
44       StartOfBlockComment(nullptr), IndentationOffset(0) {}
45 
replaceWhitespace(FormatToken & Tok,unsigned Newlines,unsigned Spaces,unsigned StartOfTokenColumn,bool InPPDirective)46 void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
47                                           unsigned Spaces,
48                                           unsigned StartOfTokenColumn,
49                                           bool InPPDirective) {
50   if (Tok.Finalized)
51     return;
52   Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
53   Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
54                            Spaces, StartOfTokenColumn, Newlines, "", "",
55                            InPPDirective && !Tok.IsFirst,
56                            /*IsInsideToken=*/false));
57 }
58 
addUntouchableToken(const FormatToken & Tok,bool InPPDirective)59 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
60                                             bool InPPDirective) {
61   if (Tok.Finalized)
62     return;
63   Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
64                            Tok.WhitespaceRange, /*Spaces=*/0,
65                            Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
66                            InPPDirective && !Tok.IsFirst,
67                            /*IsInsideToken=*/false));
68 }
69 
70 llvm::Error
addReplacement(const tooling::Replacement & Replacement)71 WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) {
72   return Replaces.add(Replacement);
73 }
74 
replaceWhitespaceInToken(const FormatToken & Tok,unsigned Offset,unsigned ReplaceChars,StringRef PreviousPostfix,StringRef CurrentPrefix,bool InPPDirective,unsigned Newlines,int Spaces)75 void WhitespaceManager::replaceWhitespaceInToken(
76     const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
77     StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
78     unsigned Newlines, int Spaces) {
79   if (Tok.Finalized)
80     return;
81   SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
82   Changes.push_back(
83       Change(Tok, /*CreateReplacement=*/true,
84              SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces,
85              std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix,
86              InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true));
87 }
88 
generateReplacements()89 const tooling::Replacements &WhitespaceManager::generateReplacements() {
90   if (Changes.empty())
91     return Replaces;
92 
93   llvm::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
94   calculateLineBreakInformation();
95   alignConsecutiveDeclarations();
96   alignConsecutiveAssignments();
97   alignTrailingComments();
98   alignEscapedNewlines();
99   generateChanges();
100 
101   return Replaces;
102 }
103 
calculateLineBreakInformation()104 void WhitespaceManager::calculateLineBreakInformation() {
105   Changes[0].PreviousEndOfTokenColumn = 0;
106   Change *LastOutsideTokenChange = &Changes[0];
107   for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
108     SourceLocation OriginalWhitespaceStart =
109         Changes[i].OriginalWhitespaceRange.getBegin();
110     SourceLocation PreviousOriginalWhitespaceEnd =
111         Changes[i - 1].OriginalWhitespaceRange.getEnd();
112     unsigned OriginalWhitespaceStartOffset =
113         SourceMgr.getFileOffset(OriginalWhitespaceStart);
114     unsigned PreviousOriginalWhitespaceEndOffset =
115         SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd);
116     assert(PreviousOriginalWhitespaceEndOffset <=
117            OriginalWhitespaceStartOffset);
118     const char *const PreviousOriginalWhitespaceEndData =
119         SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd);
120     StringRef Text(PreviousOriginalWhitespaceEndData,
121                    SourceMgr.getCharacterData(OriginalWhitespaceStart) -
122                        PreviousOriginalWhitespaceEndData);
123     // Usually consecutive changes would occur in consecutive tokens. This is
124     // not the case however when analyzing some preprocessor runs of the
125     // annotated lines. For example, in this code:
126     //
127     // #if A // line 1
128     // int i = 1;
129     // #else B // line 2
130     // int i = 2;
131     // #endif // line 3
132     //
133     // one of the runs will produce the sequence of lines marked with line 1, 2
134     // and 3. So the two consecutive whitespace changes just before '// line 2'
135     // and before '#endif // line 3' span multiple lines and tokens:
136     //
137     // #else B{change X}[// line 2
138     // int i = 2;
139     // ]{change Y}#endif // line 3
140     //
141     // For this reason, if the text between consecutive changes spans multiple
142     // newlines, the token length must be adjusted to the end of the original
143     // line of the token.
144     auto NewlinePos = Text.find_first_of('\n');
145     if (NewlinePos == StringRef::npos) {
146       Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset -
147                                    PreviousOriginalWhitespaceEndOffset +
148                                    Changes[i].PreviousLinePostfix.size() +
149                                    Changes[i - 1].CurrentLinePrefix.size();
150     } else {
151       Changes[i - 1].TokenLength =
152           NewlinePos + Changes[i - 1].CurrentLinePrefix.size();
153     }
154 
155     // If there are multiple changes in this token, sum up all the changes until
156     // the end of the line.
157     if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0)
158       LastOutsideTokenChange->TokenLength +=
159           Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
160     else
161       LastOutsideTokenChange = &Changes[i - 1];
162 
163     Changes[i].PreviousEndOfTokenColumn =
164         Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
165 
166     Changes[i - 1].IsTrailingComment =
167         (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) ||
168          (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) &&
169         Changes[i - 1].Tok->is(tok::comment) &&
170         // FIXME: This is a dirty hack. The problem is that
171         // BreakableLineCommentSection does comment reflow changes and here is
172         // the aligning of trailing comments. Consider the case where we reflow
173         // the second line up in this example:
174         //
175         // // line 1
176         // // line 2
177         //
178         // That amounts to 2 changes by BreakableLineCommentSection:
179         //  - the first, delimited by (), for the whitespace between the tokens,
180         //  - and second, delimited by [], for the whitespace at the beginning
181         //  of the second token:
182         //
183         // // line 1(
184         // )[// ]line 2
185         //
186         // So in the end we have two changes like this:
187         //
188         // // line1()[ ]line 2
189         //
190         // Note that the OriginalWhitespaceStart of the second change is the
191         // same as the PreviousOriginalWhitespaceEnd of the first change.
192         // In this case, the below check ensures that the second change doesn't
193         // get treated as a trailing comment change here, since this might
194         // trigger additional whitespace to be wrongly inserted before "line 2"
195         // by the comment aligner here.
196         //
197         // For a proper solution we need a mechanism to say to WhitespaceManager
198         // that a particular change breaks the current sequence of trailing
199         // comments.
200         OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
201   }
202   // FIXME: The last token is currently not always an eof token; in those
203   // cases, setting TokenLength of the last token to 0 is wrong.
204   Changes.back().TokenLength = 0;
205   Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment);
206 
207   const WhitespaceManager::Change *LastBlockComment = nullptr;
208   for (auto &Change : Changes) {
209     // Reset the IsTrailingComment flag for changes inside of trailing comments
210     // so they don't get realigned later. Comment line breaks however still need
211     // to be aligned.
212     if (Change.IsInsideToken && Change.NewlinesBefore == 0)
213       Change.IsTrailingComment = false;
214     Change.StartOfBlockComment = nullptr;
215     Change.IndentationOffset = 0;
216     if (Change.Tok->is(tok::comment)) {
217       if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken)
218         LastBlockComment = &Change;
219       else {
220         if ((Change.StartOfBlockComment = LastBlockComment))
221           Change.IndentationOffset =
222               Change.StartOfTokenColumn -
223               Change.StartOfBlockComment->StartOfTokenColumn;
224       }
225     } else {
226       LastBlockComment = nullptr;
227     }
228   }
229 }
230 
231 // Align a single sequence of tokens, see AlignTokens below.
232 template <typename F>
233 static void
AlignTokenSequence(unsigned Start,unsigned End,unsigned Column,F && Matches,SmallVector<WhitespaceManager::Change,16> & Changes)234 AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
235                    SmallVector<WhitespaceManager::Change, 16> &Changes) {
236   bool FoundMatchOnLine = false;
237   int Shift = 0;
238 
239   // ScopeStack keeps track of the current scope depth. It contains indices of
240   // the first token on each scope.
241   // We only run the "Matches" function on tokens from the outer-most scope.
242   // However, we do need to pay special attention to one class of tokens
243   // that are not in the outer-most scope, and that is function parameters
244   // which are split across multiple lines, as illustrated by this example:
245   //   double a(int x);
246   //   int    b(int  y,
247   //          double z);
248   // In the above example, we need to take special care to ensure that
249   // 'double z' is indented along with it's owning function 'b'.
250   SmallVector<unsigned, 16> ScopeStack;
251 
252   for (unsigned i = Start; i != End; ++i) {
253     if (ScopeStack.size() != 0 &&
254         Changes[i].indentAndNestingLevel() <
255             Changes[ScopeStack.back()].indentAndNestingLevel())
256       ScopeStack.pop_back();
257 
258     if (i != Start && Changes[i].indentAndNestingLevel() >
259                           Changes[i - 1].indentAndNestingLevel())
260       ScopeStack.push_back(i);
261 
262     bool InsideNestedScope = ScopeStack.size() != 0;
263 
264     if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) {
265       Shift = 0;
266       FoundMatchOnLine = false;
267     }
268 
269     // If this is the first matching token to be aligned, remember by how many
270     // spaces it has to be shifted, so the rest of the changes on the line are
271     // shifted by the same amount
272     if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) {
273       FoundMatchOnLine = true;
274       Shift = Column - Changes[i].StartOfTokenColumn;
275       Changes[i].Spaces += Shift;
276     }
277 
278     // This is for function parameters that are split across multiple lines,
279     // as mentioned in the ScopeStack comment.
280     if (InsideNestedScope && Changes[i].NewlinesBefore > 0) {
281       unsigned ScopeStart = ScopeStack.back();
282       if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) ||
283           (ScopeStart > Start + 1 &&
284            Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)))
285         Changes[i].Spaces += Shift;
286     }
287 
288     assert(Shift >= 0);
289     Changes[i].StartOfTokenColumn += Shift;
290     if (i + 1 != Changes.size())
291       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
292   }
293 }
294 
295 // Walk through a subset of the changes, starting at StartAt, and find
296 // sequences of matching tokens to align. To do so, keep track of the lines and
297 // whether or not a matching token was found on a line. If a matching token is
298 // found, extend the current sequence. If the current line cannot be part of a
299 // sequence, e.g. because there is an empty line before it or it contains only
300 // non-matching tokens, finalize the previous sequence.
301 // The value returned is the token on which we stopped, either because we
302 // exhausted all items inside Changes, or because we hit a scope level higher
303 // than our initial scope.
304 // This function is recursive. Each invocation processes only the scope level
305 // equal to the initial level, which is the level of Changes[StartAt].
306 // If we encounter a scope level greater than the initial level, then we call
307 // ourselves recursively, thereby avoiding the pollution of the current state
308 // with the alignment requirements of the nested sub-level. This recursive
309 // behavior is necessary for aligning function prototypes that have one or more
310 // arguments.
311 // If this function encounters a scope level less than the initial level,
312 // it returns the current position.
313 // There is a non-obvious subtlety in the recursive behavior: Even though we
314 // defer processing of nested levels to recursive invocations of this
315 // function, when it comes time to align a sequence of tokens, we run the
316 // alignment on the entire sequence, including the nested levels.
317 // When doing so, most of the nested tokens are skipped, because their
318 // alignment was already handled by the recursive invocations of this function.
319 // However, the special exception is that we do NOT skip function parameters
320 // that are split across multiple lines. See the test case in FormatTest.cpp
321 // that mentions "split function parameter alignment" for an example of this.
322 template <typename F>
AlignTokens(const FormatStyle & Style,F && Matches,SmallVector<WhitespaceManager::Change,16> & Changes,unsigned StartAt)323 static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
324                             SmallVector<WhitespaceManager::Change, 16> &Changes,
325                             unsigned StartAt) {
326   unsigned MinColumn = 0;
327   unsigned MaxColumn = UINT_MAX;
328 
329   // Line number of the start and the end of the current token sequence.
330   unsigned StartOfSequence = 0;
331   unsigned EndOfSequence = 0;
332 
333   // Measure the scope level (i.e. depth of (), [], {}) of the first token, and
334   // abort when we hit any token in a higher scope than the starting one.
335   auto IndentAndNestingLevel = StartAt < Changes.size()
336                                    ? Changes[StartAt].indentAndNestingLevel()
337                                    : std::pair<unsigned, unsigned>(0, 0);
338 
339   // Keep track of the number of commas before the matching tokens, we will only
340   // align a sequence of matching tokens if they are preceded by the same number
341   // of commas.
342   unsigned CommasBeforeLastMatch = 0;
343   unsigned CommasBeforeMatch = 0;
344 
345   // Whether a matching token has been found on the current line.
346   bool FoundMatchOnLine = false;
347 
348   // Aligns a sequence of matching tokens, on the MinColumn column.
349   //
350   // Sequences start from the first matching token to align, and end at the
351   // first token of the first line that doesn't need to be aligned.
352   //
353   // We need to adjust the StartOfTokenColumn of each Change that is on a line
354   // containing any matching token to be aligned and located after such token.
355   auto AlignCurrentSequence = [&] {
356     if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
357       AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
358                          Changes);
359     MinColumn = 0;
360     MaxColumn = UINT_MAX;
361     StartOfSequence = 0;
362     EndOfSequence = 0;
363   };
364 
365   unsigned i = StartAt;
366   for (unsigned e = Changes.size(); i != e; ++i) {
367     if (Changes[i].indentAndNestingLevel() < IndentAndNestingLevel)
368       break;
369 
370     if (Changes[i].NewlinesBefore != 0) {
371       CommasBeforeMatch = 0;
372       EndOfSequence = i;
373       // If there is a blank line, or if the last line didn't contain any
374       // matching token, the sequence ends here.
375       if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
376         AlignCurrentSequence();
377 
378       FoundMatchOnLine = false;
379     }
380 
381     if (Changes[i].Tok->is(tok::comma)) {
382       ++CommasBeforeMatch;
383     } else if (Changes[i].indentAndNestingLevel() > IndentAndNestingLevel) {
384       // Call AlignTokens recursively, skipping over this scope block.
385       unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
386       i = StoppedAt - 1;
387       continue;
388     }
389 
390     if (!Matches(Changes[i]))
391       continue;
392 
393     // If there is more than one matching token per line, or if the number of
394     // preceding commas, do not match anymore, end the sequence.
395     if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch)
396       AlignCurrentSequence();
397 
398     CommasBeforeLastMatch = CommasBeforeMatch;
399     FoundMatchOnLine = true;
400 
401     if (StartOfSequence == 0)
402       StartOfSequence = i;
403 
404     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
405     int LineLengthAfter = -Changes[i].Spaces;
406     for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
407       LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
408     unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
409 
410     // If we are restricted by the maximum column width, end the sequence.
411     if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
412         CommasBeforeLastMatch != CommasBeforeMatch) {
413       AlignCurrentSequence();
414       StartOfSequence = i;
415     }
416 
417     MinColumn = std::max(MinColumn, ChangeMinColumn);
418     MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
419   }
420 
421   EndOfSequence = i;
422   AlignCurrentSequence();
423   return i;
424 }
425 
alignConsecutiveAssignments()426 void WhitespaceManager::alignConsecutiveAssignments() {
427   if (!Style.AlignConsecutiveAssignments)
428     return;
429 
430   AlignTokens(Style,
431               [&](const Change &C) {
432                 // Do not align on equal signs that are first on a line.
433                 if (C.NewlinesBefore > 0)
434                   return false;
435 
436                 // Do not align on equal signs that are last on a line.
437                 if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
438                   return false;
439 
440                 return C.Tok->is(tok::equal);
441               },
442               Changes, /*StartAt=*/0);
443 }
444 
alignConsecutiveDeclarations()445 void WhitespaceManager::alignConsecutiveDeclarations() {
446   if (!Style.AlignConsecutiveDeclarations)
447     return;
448 
449   // FIXME: Currently we don't handle properly the PointerAlignment: Right
450   // The * and & are not aligned and are left dangling. Something has to be done
451   // about it, but it raises the question of alignment of code like:
452   //   const char* const* v1;
453   //   float const* v2;
454   //   SomeVeryLongType const& v3;
455   AlignTokens(Style,
456               [](Change const &C) {
457                 // tok::kw_operator is necessary for aligning operator overload
458                 // definitions.
459                 return C.Tok->is(TT_StartOfName) ||
460                        C.Tok->is(TT_FunctionDeclarationName) ||
461                        C.Tok->is(tok::kw_operator);
462               },
463               Changes, /*StartAt=*/0);
464 }
465 
alignTrailingComments()466 void WhitespaceManager::alignTrailingComments() {
467   unsigned MinColumn = 0;
468   unsigned MaxColumn = UINT_MAX;
469   unsigned StartOfSequence = 0;
470   bool BreakBeforeNext = false;
471   unsigned Newlines = 0;
472   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
473     if (Changes[i].StartOfBlockComment)
474       continue;
475     Newlines += Changes[i].NewlinesBefore;
476     if (!Changes[i].IsTrailingComment)
477       continue;
478 
479     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
480     unsigned ChangeMaxColumn;
481 
482     if (Style.ColumnLimit == 0)
483       ChangeMaxColumn = UINT_MAX;
484     else if (Style.ColumnLimit >= Changes[i].TokenLength)
485       ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
486     else
487       ChangeMaxColumn = ChangeMinColumn;
488 
489     // If we don't create a replacement for this change, we have to consider
490     // it to be immovable.
491     if (!Changes[i].CreateReplacement)
492       ChangeMaxColumn = ChangeMinColumn;
493 
494     if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
495       ChangeMaxColumn -= 2;
496     // If this comment follows an } in column 0, it probably documents the
497     // closing of a namespace and we don't want to align it.
498     bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
499                                   Changes[i - 1].Tok->is(tok::r_brace) &&
500                                   Changes[i - 1].StartOfTokenColumn == 0;
501     bool WasAlignedWithStartOfNextLine = false;
502     if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
503       unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
504           Changes[i].OriginalWhitespaceRange.getEnd());
505       for (unsigned j = i + 1; j != e; ++j) {
506         if (Changes[j].Tok->is(tok::comment))
507           continue;
508 
509         unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
510             Changes[j].OriginalWhitespaceRange.getEnd());
511         // The start of the next token was previously aligned with the
512         // start of this comment.
513         WasAlignedWithStartOfNextLine =
514             CommentColumn == NextColumn ||
515             CommentColumn == NextColumn + Style.IndentWidth;
516         break;
517       }
518     }
519     if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
520       alignTrailingComments(StartOfSequence, i, MinColumn);
521       MinColumn = ChangeMinColumn;
522       MaxColumn = ChangeMinColumn;
523       StartOfSequence = i;
524     } else if (BreakBeforeNext || Newlines > 1 ||
525                (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
526                // Break the comment sequence if the previous line did not end
527                // in a trailing comment.
528                (Changes[i].NewlinesBefore == 1 && i > 0 &&
529                 !Changes[i - 1].IsTrailingComment) ||
530                WasAlignedWithStartOfNextLine) {
531       alignTrailingComments(StartOfSequence, i, MinColumn);
532       MinColumn = ChangeMinColumn;
533       MaxColumn = ChangeMaxColumn;
534       StartOfSequence = i;
535     } else {
536       MinColumn = std::max(MinColumn, ChangeMinColumn);
537       MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
538     }
539     BreakBeforeNext =
540         (i == 0) || (Changes[i].NewlinesBefore > 1) ||
541         // Never start a sequence with a comment at the beginning of
542         // the line.
543         (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
544     Newlines = 0;
545   }
546   alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
547 }
548 
alignTrailingComments(unsigned Start,unsigned End,unsigned Column)549 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
550                                               unsigned Column) {
551   for (unsigned i = Start; i != End; ++i) {
552     int Shift = 0;
553     if (Changes[i].IsTrailingComment) {
554       Shift = Column - Changes[i].StartOfTokenColumn;
555     }
556     if (Changes[i].StartOfBlockComment) {
557       Shift = Changes[i].IndentationOffset +
558               Changes[i].StartOfBlockComment->StartOfTokenColumn -
559               Changes[i].StartOfTokenColumn;
560     }
561     assert(Shift >= 0);
562     Changes[i].Spaces += Shift;
563     if (i + 1 != Changes.size())
564       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
565     Changes[i].StartOfTokenColumn += Shift;
566   }
567 }
568 
alignEscapedNewlines()569 void WhitespaceManager::alignEscapedNewlines() {
570   if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign)
571     return;
572 
573   bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left;
574   unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
575   unsigned StartOfMacro = 0;
576   for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
577     Change &C = Changes[i];
578     if (C.NewlinesBefore > 0) {
579       if (C.ContinuesPPDirective) {
580         MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
581       } else {
582         alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
583         MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
584         StartOfMacro = i;
585       }
586     }
587   }
588   alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
589 }
590 
alignEscapedNewlines(unsigned Start,unsigned End,unsigned Column)591 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
592                                              unsigned Column) {
593   for (unsigned i = Start; i < End; ++i) {
594     Change &C = Changes[i];
595     if (C.NewlinesBefore > 0) {
596       assert(C.ContinuesPPDirective);
597       if (C.PreviousEndOfTokenColumn + 1 > Column)
598         C.EscapedNewlineColumn = 0;
599       else
600         C.EscapedNewlineColumn = Column;
601     }
602   }
603 }
604 
generateChanges()605 void WhitespaceManager::generateChanges() {
606   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
607     const Change &C = Changes[i];
608     if (i > 0) {
609       assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
610                  C.OriginalWhitespaceRange.getBegin() &&
611              "Generating two replacements for the same location");
612     }
613     if (C.CreateReplacement) {
614       std::string ReplacementText = C.PreviousLinePostfix;
615       if (C.ContinuesPPDirective)
616         appendEscapedNewlineText(ReplacementText, C.NewlinesBefore,
617                                  C.PreviousEndOfTokenColumn,
618                                  C.EscapedNewlineColumn);
619       else
620         appendNewlineText(ReplacementText, C.NewlinesBefore);
621       appendIndentText(ReplacementText, C.Tok->IndentLevel,
622                        std::max(0, C.Spaces),
623                        C.StartOfTokenColumn - std::max(0, C.Spaces));
624       ReplacementText.append(C.CurrentLinePrefix);
625       storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
626     }
627   }
628 }
629 
storeReplacement(SourceRange Range,StringRef Text)630 void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) {
631   unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
632                               SourceMgr.getFileOffset(Range.getBegin());
633   // Don't create a replacement, if it does not change anything.
634   if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
635                 WhitespaceLength) == Text)
636     return;
637   auto Err = Replaces.add(tooling::Replacement(
638       SourceMgr, CharSourceRange::getCharRange(Range), Text));
639   // FIXME: better error handling. For now, just print an error message in the
640   // release version.
641   if (Err) {
642     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
643     assert(false);
644   }
645 }
646 
appendNewlineText(std::string & Text,unsigned Newlines)647 void WhitespaceManager::appendNewlineText(std::string &Text,
648                                           unsigned Newlines) {
649   for (unsigned i = 0; i < Newlines; ++i)
650     Text.append(UseCRLF ? "\r\n" : "\n");
651 }
652 
appendEscapedNewlineText(std::string & Text,unsigned Newlines,unsigned PreviousEndOfTokenColumn,unsigned EscapedNewlineColumn)653 void WhitespaceManager::appendEscapedNewlineText(
654     std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn,
655     unsigned EscapedNewlineColumn) {
656   if (Newlines > 0) {
657     unsigned Spaces =
658         std::max<int>(1, EscapedNewlineColumn - PreviousEndOfTokenColumn - 1);
659     for (unsigned i = 0; i < Newlines; ++i) {
660       Text.append(Spaces, ' ');
661       Text.append(UseCRLF ? "\\\r\n" : "\\\n");
662       Spaces = std::max<int>(0, EscapedNewlineColumn - 1);
663     }
664   }
665 }
666 
appendIndentText(std::string & Text,unsigned IndentLevel,unsigned Spaces,unsigned WhitespaceStartColumn)667 void WhitespaceManager::appendIndentText(std::string &Text,
668                                          unsigned IndentLevel, unsigned Spaces,
669                                          unsigned WhitespaceStartColumn) {
670   switch (Style.UseTab) {
671   case FormatStyle::UT_Never:
672     Text.append(Spaces, ' ');
673     break;
674   case FormatStyle::UT_Always: {
675     unsigned FirstTabWidth =
676         Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
677     // Indent with tabs only when there's at least one full tab.
678     if (FirstTabWidth + Style.TabWidth <= Spaces) {
679       Spaces -= FirstTabWidth;
680       Text.append("\t");
681     }
682     Text.append(Spaces / Style.TabWidth, '\t');
683     Text.append(Spaces % Style.TabWidth, ' ');
684     break;
685   }
686   case FormatStyle::UT_ForIndentation:
687     if (WhitespaceStartColumn == 0) {
688       unsigned Indentation = IndentLevel * Style.IndentWidth;
689       // This happens, e.g. when a line in a block comment is indented less than
690       // the first one.
691       if (Indentation > Spaces)
692         Indentation = Spaces;
693       unsigned Tabs = Indentation / Style.TabWidth;
694       Text.append(Tabs, '\t');
695       Spaces -= Tabs * Style.TabWidth;
696     }
697     Text.append(Spaces, ' ');
698     break;
699   case FormatStyle::UT_ForContinuationAndIndentation:
700     if (WhitespaceStartColumn == 0) {
701       unsigned Tabs = Spaces / Style.TabWidth;
702       Text.append(Tabs, '\t');
703       Spaces -= Tabs * Style.TabWidth;
704     }
705     Text.append(Spaces, ' ');
706     break;
707   }
708 }
709 
710 } // namespace format
711 } // namespace clang
712