1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements an indenter that manages the indentation of
12 /// continuations.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
18 
19 #include "Encoding.h"
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
23 
24 namespace clang {
25 class SourceManager;
26 
27 namespace format {
28 
29 class AnnotatedLine;
30 struct FormatToken;
31 struct LineState;
32 struct ParenState;
33 class WhitespaceManager;
34 
35 class ContinuationIndenter {
36 public:
37   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38   /// column \p FirstIndent.
39   ContinuationIndenter(const FormatStyle &Style,
40                        const AdditionalKeywords &Keywords,
41                        SourceManager &SourceMgr, WhitespaceManager &Whitespaces,
42                        encoding::Encoding Encoding,
43                        bool BinPackInconclusiveFunctions);
44 
45   /// \brief Get the initial state, i.e. the state after placing \p Line's
46   /// first token at \p FirstIndent.
47   LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
48                             bool DryRun);
49 
50   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
51   // better home.
52   /// \brief Returns \c true, if a line break after \p State is allowed.
53   bool canBreak(const LineState &State);
54 
55   /// \brief Returns \c true, if a line break after \p State is mandatory.
56   bool mustBreak(const LineState &State);
57 
58   /// \brief Appends the next token to \p State and updates information
59   /// necessary for indentation.
60   ///
61   /// Puts the token on the current line if \p Newline is \c false and adds a
62   /// line break and necessary indentation otherwise.
63   ///
64   /// If \p DryRun is \c false, also creates and stores the required
65   /// \c Replacement.
66   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
67                            unsigned ExtraSpaces = 0);
68 
69   /// \brief Get the column limit for this line. This is the style's column
70   /// limit, potentially reduced for preprocessor definitions.
71   unsigned getColumnLimit(const LineState &State) const;
72 
73 private:
74   /// \brief Mark the next token as consumed in \p State and modify its stacks
75   /// accordingly.
76   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
77 
78   /// \brief Update 'State' according to the next token's fake left parentheses.
79   void moveStatePastFakeLParens(LineState &State, bool Newline);
80   /// \brief Update 'State' according to the next token's fake r_parens.
81   void moveStatePastFakeRParens(LineState &State);
82 
83   /// \brief Update 'State' according to the next token being one of "(<{[".
84   void moveStatePastScopeOpener(LineState &State, bool Newline);
85   /// \brief Update 'State' according to the next token being one of ")>}]".
86   void moveStatePastScopeCloser(LineState &State);
87   /// \brief Update 'State' with the next token opening a nested block.
88   void moveStateToNewBlock(LineState &State);
89 
90   /// \brief If the current token sticks out over the end of the line, break
91   /// it if possible.
92   ///
93   /// \returns An extra penalty if a token was broken, otherwise 0.
94   ///
95   /// The returned penalty will cover the cost of the additional line breaks and
96   /// column limit violation in all lines except for the last one. The penalty
97   /// for the column limit violation in the last line (and in single line
98   /// tokens) is handled in \c addNextStateToQueue.
99   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
100                                 bool DryRun);
101 
102   /// \brief Appends the next token to \p State and updates information
103   /// necessary for indentation.
104   ///
105   /// Puts the token on the current line.
106   ///
107   /// If \p DryRun is \c false, also creates and stores the required
108   /// \c Replacement.
109   void addTokenOnCurrentLine(LineState &State, bool DryRun,
110                              unsigned ExtraSpaces);
111 
112   /// \brief Appends the next token to \p State and updates information
113   /// necessary for indentation.
114   ///
115   /// Adds a line break and necessary indentation.
116   ///
117   /// If \p DryRun is \c false, also creates and stores the required
118   /// \c Replacement.
119   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
120 
121   /// \brief Calculate the new column for a line wrap before the next token.
122   unsigned getNewLineColumn(const LineState &State);
123 
124   /// \brief Adds a multiline token to the \p State.
125   ///
126   /// \returns Extra penalty for the first line of the literal: last line is
127   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
128   /// matter, as we don't change them.
129   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
130 
131   /// \brief Returns \c true if the next token starts a multiline string
132   /// literal.
133   ///
134   /// This includes implicitly concatenated strings, strings that will be broken
135   /// by clang-format and string literals with escaped newlines.
136   bool nextIsMultilineString(const LineState &State);
137 
138   FormatStyle Style;
139   const AdditionalKeywords &Keywords;
140   SourceManager &SourceMgr;
141   WhitespaceManager &Whitespaces;
142   encoding::Encoding Encoding;
143   bool BinPackInconclusiveFunctions;
144   llvm::Regex CommentPragmasRegex;
145 };
146 
147 struct ParenState {
ParenStateParenState148   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
149              bool AvoidBinPacking, bool NoLineBreak)
150       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
151         NestedBlockIndent(Indent), FirstLessLess(0),
152         BreakBeforeClosingBrace(false), QuestionColumn(0),
153         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
154         NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
155         StartOfFunctionCall(0), StartOfArraySubscripts(0),
156         NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
157         ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
158         AlignColons(true), ObjCSelectorNameFound(false),
159         HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
160 
161   /// \brief The position to which a specific parenthesis level needs to be
162   /// indented.
163   unsigned Indent;
164 
165   /// \brief The number of indentation levels of the block.
166   unsigned IndentLevel;
167 
168   /// \brief The position of the last space on each level.
169   ///
170   /// Used e.g. to break like:
171   /// functionCall(Parameter, otherCall(
172   ///                             OtherParameter));
173   unsigned LastSpace;
174 
175   /// \brief If a block relative to this parenthesis level gets wrapped, indent
176   /// it this much.
177   unsigned NestedBlockIndent;
178 
179   /// \brief The position the first "<<" operator encountered on each level.
180   ///
181   /// Used to align "<<" operators. 0 if no such operator has been encountered
182   /// on a level.
183   unsigned FirstLessLess;
184 
185   /// \brief Whether a newline needs to be inserted before the block's closing
186   /// brace.
187   ///
188   /// We only want to insert a newline before the closing brace if there also
189   /// was a newline after the beginning left brace.
190   bool BreakBeforeClosingBrace;
191 
192   /// \brief The column of a \c ? in a conditional expression;
193   unsigned QuestionColumn;
194 
195   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
196   /// lines, in this context.
197   bool AvoidBinPacking;
198 
199   /// \brief Break after the next comma (or all the commas in this context if
200   /// \c AvoidBinPacking is \c true).
201   bool BreakBeforeParameter;
202 
203   /// \brief Line breaking in this context would break a formatting rule.
204   bool NoLineBreak;
205 
206   /// \brief True if the last binary operator on this level was wrapped to the
207   /// next line.
208   bool LastOperatorWrapped;
209 
210   /// \brief The position of the colon in an ObjC method declaration/call.
211   unsigned ColonPos;
212 
213   /// \brief The start of the most recent function in a builder-type call.
214   unsigned StartOfFunctionCall;
215 
216   /// \brief Contains the start of array subscript expressions, so that they
217   /// can be aligned.
218   unsigned StartOfArraySubscripts;
219 
220   /// \brief If a nested name specifier was broken over multiple lines, this
221   /// contains the start column of the second line. Otherwise 0.
222   unsigned NestedNameSpecifierContinuation;
223 
224   /// \brief If a call expression was broken over multiple lines, this
225   /// contains the start column of the second line. Otherwise 0.
226   unsigned CallContinuation;
227 
228   /// \brief The column of the first variable name in a variable declaration.
229   ///
230   /// Used to align further variables if necessary.
231   unsigned VariablePos;
232 
233   /// \brief \c true if this \c ParenState already contains a line-break.
234   ///
235   /// The first line break in a certain \c ParenState causes extra penalty so
236   /// that clang-format prefers similar breaks, i.e. breaks in the same
237   /// parenthesis.
238   bool ContainsLineBreak;
239 
240   /// \brief \c true if this \c ParenState contains multiple segments of a
241   /// builder-type call on one line.
242   bool ContainsUnwrappedBuilder;
243 
244   /// \brief \c true if the colons of the curren ObjC method expression should
245   /// be aligned.
246   ///
247   /// Not considered for memoization as it will always have the same value at
248   /// the same token.
249   bool AlignColons;
250 
251   /// \brief \c true if at least one selector name was found in the current
252   /// ObjC method expression.
253   ///
254   /// Not considered for memoization as it will always have the same value at
255   /// the same token.
256   bool ObjCSelectorNameFound;
257 
258   /// \brief \c true if there are multiple nested blocks inside these parens.
259   ///
260   /// Not considered for memoization as it will always have the same value at
261   /// the same token.
262   bool HasMultipleNestedBlocks;
263 
264   // \brief The start of a nested block (e.g. lambda introducer in C++ or
265   // "function" in JavaScript) is not wrapped to a new line.
266   bool NestedBlockInlined;
267 
268   bool operator<(const ParenState &Other) const {
269     if (Indent != Other.Indent)
270       return Indent < Other.Indent;
271     if (LastSpace != Other.LastSpace)
272       return LastSpace < Other.LastSpace;
273     if (NestedBlockIndent != Other.NestedBlockIndent)
274       return NestedBlockIndent < Other.NestedBlockIndent;
275     if (FirstLessLess != Other.FirstLessLess)
276       return FirstLessLess < Other.FirstLessLess;
277     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
278       return BreakBeforeClosingBrace;
279     if (QuestionColumn != Other.QuestionColumn)
280       return QuestionColumn < Other.QuestionColumn;
281     if (AvoidBinPacking != Other.AvoidBinPacking)
282       return AvoidBinPacking;
283     if (BreakBeforeParameter != Other.BreakBeforeParameter)
284       return BreakBeforeParameter;
285     if (NoLineBreak != Other.NoLineBreak)
286       return NoLineBreak;
287     if (LastOperatorWrapped != Other.LastOperatorWrapped)
288       return LastOperatorWrapped;
289     if (ColonPos != Other.ColonPos)
290       return ColonPos < Other.ColonPos;
291     if (StartOfFunctionCall != Other.StartOfFunctionCall)
292       return StartOfFunctionCall < Other.StartOfFunctionCall;
293     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
294       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
295     if (CallContinuation != Other.CallContinuation)
296       return CallContinuation < Other.CallContinuation;
297     if (VariablePos != Other.VariablePos)
298       return VariablePos < Other.VariablePos;
299     if (ContainsLineBreak != Other.ContainsLineBreak)
300       return ContainsLineBreak < Other.ContainsLineBreak;
301     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
302       return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
303     if (NestedBlockInlined != Other.NestedBlockInlined)
304       return NestedBlockInlined < Other.NestedBlockInlined;
305     return false;
306   }
307 };
308 
309 /// \brief The current state when indenting a unwrapped line.
310 ///
311 /// As the indenting tries different combinations this is copied by value.
312 struct LineState {
313   /// \brief The number of used columns in the current line.
314   unsigned Column;
315 
316   /// \brief The token that needs to be next formatted.
317   FormatToken *NextToken;
318 
319   /// \brief \c true if this line contains a continued for-loop section.
320   bool LineContainsContinuedForLoopSection;
321 
322   /// \brief The \c NestingLevel at the start of this line.
323   unsigned StartOfLineLevel;
324 
325   /// \brief The lowest \c NestingLevel on the current line.
326   unsigned LowestLevelOnLine;
327 
328   /// \brief The start column of the string literal, if we're in a string
329   /// literal sequence, 0 otherwise.
330   unsigned StartOfStringLiteral;
331 
332   /// \brief A stack keeping track of properties applying to parenthesis
333   /// levels.
334   std::vector<ParenState> Stack;
335 
336   /// \brief Ignore the stack of \c ParenStates for state comparison.
337   ///
338   /// In long and deeply nested unwrapped lines, the current algorithm can
339   /// be insufficient for finding the best formatting with a reasonable amount
340   /// of time and memory. Setting this flag will effectively lead to the
341   /// algorithm not analyzing some combinations. However, these combinations
342   /// rarely contain the optimal solution: In short, accepting a higher
343   /// penalty early would need to lead to different values in the \c
344   /// ParenState stack (in an otherwise identical state) and these different
345   /// values would need to lead to a significant amount of avoided penalty
346   /// later.
347   ///
348   /// FIXME: Come up with a better algorithm instead.
349   bool IgnoreStackForComparison;
350 
351   /// \brief The indent of the first token.
352   unsigned FirstIndent;
353 
354   /// \brief The line that is being formatted.
355   ///
356   /// Does not need to be considered for memoization because it doesn't change.
357   const AnnotatedLine *Line;
358 
359   /// \brief Comparison operator to be able to used \c LineState in \c map.
360   bool operator<(const LineState &Other) const {
361     if (NextToken != Other.NextToken)
362       return NextToken < Other.NextToken;
363     if (Column != Other.Column)
364       return Column < Other.Column;
365     if (LineContainsContinuedForLoopSection !=
366         Other.LineContainsContinuedForLoopSection)
367       return LineContainsContinuedForLoopSection;
368     if (StartOfLineLevel != Other.StartOfLineLevel)
369       return StartOfLineLevel < Other.StartOfLineLevel;
370     if (LowestLevelOnLine != Other.LowestLevelOnLine)
371       return LowestLevelOnLine < Other.LowestLevelOnLine;
372     if (StartOfStringLiteral != Other.StartOfStringLiteral)
373       return StartOfStringLiteral < Other.StartOfStringLiteral;
374     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
375       return false;
376     return Stack < Other.Stack;
377   }
378 };
379 
380 } // end namespace format
381 } // end namespace clang
382 
383 #endif
384