1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
22 #include <map>
23 #include <tuple>
24 
25 namespace clang {
26 class SourceManager;
27 
28 namespace format {
29 
30 class AnnotatedLine;
31 class BreakableToken;
32 struct FormatToken;
33 struct LineState;
34 struct ParenState;
35 struct RawStringFormatStyleManager;
36 class WhitespaceManager;
37 
38 struct RawStringFormatStyleManager {
39   llvm::StringMap<FormatStyle> DelimiterStyle;
40   llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
41 
42   RawStringFormatStyleManager(const FormatStyle &CodeStyle);
43 
44   llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
45 
46   llvm::Optional<FormatStyle>
47   getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
48 };
49 
50 class ContinuationIndenter {
51 public:
52   /// Constructs a \c ContinuationIndenter to format \p Line starting in
53   /// column \p FirstIndent.
54   ContinuationIndenter(const FormatStyle &Style,
55                        const AdditionalKeywords &Keywords,
56                        const SourceManager &SourceMgr,
57                        WhitespaceManager &Whitespaces,
58                        encoding::Encoding Encoding,
59                        bool BinPackInconclusiveFunctions);
60 
61   /// Get the initial state, i.e. the state after placing \p Line's
62   /// first token at \p FirstIndent. When reformatting a fragment of code, as in
63   /// the case of formatting inside raw string literals, \p FirstStartColumn is
64   /// the column at which the state of the parent formatter is.
65   LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
66                             const AnnotatedLine *Line, bool DryRun);
67 
68   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
69   // better home.
70   /// Returns \c true, if a line break after \p State is allowed.
71   bool canBreak(const LineState &State);
72 
73   /// Returns \c true, if a line break after \p State is mandatory.
74   bool mustBreak(const LineState &State);
75 
76   /// Appends the next token to \p State and updates information
77   /// necessary for indentation.
78   ///
79   /// Puts the token on the current line if \p Newline is \c false and adds a
80   /// line break and necessary indentation otherwise.
81   ///
82   /// If \p DryRun is \c false, also creates and stores the required
83   /// \c Replacement.
84   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
85                            unsigned ExtraSpaces = 0);
86 
87   /// Get the column limit for this line. This is the style's column
88   /// limit, potentially reduced for preprocessor definitions.
89   unsigned getColumnLimit(const LineState &State) const;
90 
91 private:
92   /// Mark the next token as consumed in \p State and modify its stacks
93   /// accordingly.
94   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
95 
96   /// Update 'State' according to the next token's fake left parentheses.
97   void moveStatePastFakeLParens(LineState &State, bool Newline);
98   /// Update 'State' according to the next token's fake r_parens.
99   void moveStatePastFakeRParens(LineState &State);
100 
101   /// Update 'State' according to the next token being one of "(<{[".
102   void moveStatePastScopeOpener(LineState &State, bool Newline);
103   /// Update 'State' according to the next token being one of ")>}]".
104   void moveStatePastScopeCloser(LineState &State);
105   /// Update 'State' with the next token opening a nested block.
106   void moveStateToNewBlock(LineState &State);
107 
108   /// Reformats a raw string literal.
109   ///
110   /// \returns An extra penalty induced by reformatting the token.
111   unsigned reformatRawStringLiteral(const FormatToken &Current,
112                                     LineState &State,
113                                     const FormatStyle &RawStringStyle,
114                                     bool DryRun, bool Newline);
115 
116   /// If the current token is at the end of the current line, handle
117   /// the transition to the next line.
118   unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
119                            bool DryRun, bool AllowBreak, bool Newline);
120 
121   /// If \p Current is a raw string that is configured to be reformatted,
122   /// return the style to be used.
123   llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
124                                                 const LineState &State);
125 
126   /// If the current token sticks out over the end of the line, break
127   /// it if possible.
128   ///
129   /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
130   /// when tokens are broken or lines exceed the column limit, and exceeded
131   /// indicates whether the algorithm purposefully left lines exceeding the
132   /// column limit.
133   ///
134   /// The returned penalty will cover the cost of the additional line breaks
135   /// and column limit violation in all lines except for the last one. The
136   /// penalty for the column limit violation in the last line (and in single
137   /// line tokens) is handled in \c addNextStateToQueue.
138   ///
139   /// \p Strict indicates whether reflowing is allowed to leave characters
140   /// protruding the column limit; if true, lines will be split strictly within
141   /// the column limit where possible; if false, words are allowed to protrude
142   /// over the column limit as long as the penalty is less than the penalty
143   /// of a break.
144   std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
145                                                  LineState &State,
146                                                  bool AllowBreak, bool DryRun,
147                                                  bool Strict);
148 
149   /// Returns the \c BreakableToken starting at \p Current, or nullptr
150   /// if the current token cannot be broken.
151   std::unique_ptr<BreakableToken>
152   createBreakableToken(const FormatToken &Current, LineState &State,
153                        bool AllowBreak);
154 
155   /// Appends the next token to \p State and updates information
156   /// necessary for indentation.
157   ///
158   /// Puts the token on the current line.
159   ///
160   /// If \p DryRun is \c false, also creates and stores the required
161   /// \c Replacement.
162   void addTokenOnCurrentLine(LineState &State, bool DryRun,
163                              unsigned ExtraSpaces);
164 
165   /// Appends the next token to \p State and updates information
166   /// necessary for indentation.
167   ///
168   /// Adds a line break and necessary indentation.
169   ///
170   /// If \p DryRun is \c false, also creates and stores the required
171   /// \c Replacement.
172   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
173 
174   /// Calculate the new column for a line wrap before the next token.
175   unsigned getNewLineColumn(const LineState &State);
176 
177   /// Adds a multiline token to the \p State.
178   ///
179   /// \returns Extra penalty for the first line of the literal: last line is
180   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
181   /// matter, as we don't change them.
182   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
183 
184   /// Returns \c true if the next token starts a multiline string
185   /// literal.
186   ///
187   /// This includes implicitly concatenated strings, strings that will be broken
188   /// by clang-format and string literals with escaped newlines.
189   bool nextIsMultilineString(const LineState &State);
190 
191   FormatStyle Style;
192   const AdditionalKeywords &Keywords;
193   const SourceManager &SourceMgr;
194   WhitespaceManager &Whitespaces;
195   encoding::Encoding Encoding;
196   bool BinPackInconclusiveFunctions;
197   llvm::Regex CommentPragmasRegex;
198   const RawStringFormatStyleManager RawStringFormats;
199 };
200 
201 struct ParenState {
202   ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
203              bool AvoidBinPacking, bool NoLineBreak)
204       : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
205         NestedBlockIndent(Indent), IsAligned(false),
206         BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
207         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
208         NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
209         LastOperatorWrapped(true), ContainsLineBreak(false),
210         ContainsUnwrappedBuilder(false), AlignColons(true),
211         ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
212         NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
213         IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
214         IsWrappedConditional(false), UnindentOperator(false) {}
215 
216   /// \brief The token opening this parenthesis level, or nullptr if this level
217   /// is opened by fake parenthesis.
218   ///
219   /// Not considered for memoization as it will always have the same value at
220   /// the same token.
221   const FormatToken *Tok;
222 
223   /// The position to which a specific parenthesis level needs to be
224   /// indented.
225   unsigned Indent;
226 
227   /// The position of the last space on each level.
228   ///
229   /// Used e.g. to break like:
230   /// functionCall(Parameter, otherCall(
231   ///                             OtherParameter));
232   unsigned LastSpace;
233 
234   /// If a block relative to this parenthesis level gets wrapped, indent
235   /// it this much.
236   unsigned NestedBlockIndent;
237 
238   /// The position the first "<<" operator encountered on each level.
239   ///
240   /// Used to align "<<" operators. 0 if no such operator has been encountered
241   /// on a level.
242   unsigned FirstLessLess = 0;
243 
244   /// The column of a \c ? in a conditional expression;
245   unsigned QuestionColumn = 0;
246 
247   /// The position of the colon in an ObjC method declaration/call.
248   unsigned ColonPos = 0;
249 
250   /// The start of the most recent function in a builder-type call.
251   unsigned StartOfFunctionCall = 0;
252 
253   /// Contains the start of array subscript expressions, so that they
254   /// can be aligned.
255   unsigned StartOfArraySubscripts = 0;
256 
257   /// If a nested name specifier was broken over multiple lines, this
258   /// contains the start column of the second line. Otherwise 0.
259   unsigned NestedNameSpecifierContinuation = 0;
260 
261   /// If a call expression was broken over multiple lines, this
262   /// contains the start column of the second line. Otherwise 0.
263   unsigned CallContinuation = 0;
264 
265   /// The column of the first variable name in a variable declaration.
266   ///
267   /// Used to align further variables if necessary.
268   unsigned VariablePos = 0;
269 
270   /// Whether this block's indentation is used for alignment.
271   bool IsAligned : 1;
272 
273   /// Whether a newline needs to be inserted before the block's closing
274   /// brace.
275   ///
276   /// We only want to insert a newline before the closing brace if there also
277   /// was a newline after the beginning left brace.
278   bool BreakBeforeClosingBrace : 1;
279 
280   /// Whether a newline needs to be inserted before the block's closing
281   /// paren.
282   ///
283   /// We only want to insert a newline before the closing paren if there also
284   /// was a newline after the beginning left paren.
285   bool BreakBeforeClosingParen : 1;
286 
287   /// Avoid bin packing, i.e. multiple parameters/elements on multiple
288   /// lines, in this context.
289   bool AvoidBinPacking : 1;
290 
291   /// Break after the next comma (or all the commas in this context if
292   /// \c AvoidBinPacking is \c true).
293   bool BreakBeforeParameter : 1;
294 
295   /// Line breaking in this context would break a formatting rule.
296   bool NoLineBreak : 1;
297 
298   /// Same as \c NoLineBreak, but is restricted until the end of the
299   /// operand (including the next ",").
300   bool NoLineBreakInOperand : 1;
301 
302   /// True if the last binary operator on this level was wrapped to the
303   /// next line.
304   bool LastOperatorWrapped : 1;
305 
306   /// \c true if this \c ParenState already contains a line-break.
307   ///
308   /// The first line break in a certain \c ParenState causes extra penalty so
309   /// that clang-format prefers similar breaks, i.e. breaks in the same
310   /// parenthesis.
311   bool ContainsLineBreak : 1;
312 
313   /// \c true if this \c ParenState contains multiple segments of a
314   /// builder-type call on one line.
315   bool ContainsUnwrappedBuilder : 1;
316 
317   /// \c true if the colons of the curren ObjC method expression should
318   /// be aligned.
319   ///
320   /// Not considered for memoization as it will always have the same value at
321   /// the same token.
322   bool AlignColons : 1;
323 
324   /// \c true if at least one selector name was found in the current
325   /// ObjC method expression.
326   ///
327   /// Not considered for memoization as it will always have the same value at
328   /// the same token.
329   bool ObjCSelectorNameFound : 1;
330 
331   /// \c true if there are multiple nested blocks inside these parens.
332   ///
333   /// Not considered for memoization as it will always have the same value at
334   /// the same token.
335   bool HasMultipleNestedBlocks : 1;
336 
337   /// The start of a nested block (e.g. lambda introducer in C++ or
338   /// "function" in JavaScript) is not wrapped to a new line.
339   bool NestedBlockInlined : 1;
340 
341   /// \c true if the current \c ParenState represents an Objective-C
342   /// array literal.
343   bool IsInsideObjCArrayLiteral : 1;
344 
345   bool IsCSharpGenericTypeConstraint : 1;
346 
347   /// \brief true if the current \c ParenState represents the false branch of
348   /// a chained conditional expression (e.g. else-if)
349   bool IsChainedConditional : 1;
350 
351   /// \brief true if there conditionnal was wrapped on the first operator (the
352   /// question mark)
353   bool IsWrappedConditional : 1;
354 
355   /// \brief Indicates the indent should be reduced by the length of the
356   /// operator.
357   bool UnindentOperator : 1;
358 
359   bool operator<(const ParenState &Other) const {
360     if (Indent != Other.Indent)
361       return Indent < Other.Indent;
362     if (LastSpace != Other.LastSpace)
363       return LastSpace < Other.LastSpace;
364     if (NestedBlockIndent != Other.NestedBlockIndent)
365       return NestedBlockIndent < Other.NestedBlockIndent;
366     if (FirstLessLess != Other.FirstLessLess)
367       return FirstLessLess < Other.FirstLessLess;
368     if (IsAligned != Other.IsAligned)
369       return IsAligned;
370     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
371       return BreakBeforeClosingBrace;
372     if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
373       return BreakBeforeClosingParen;
374     if (QuestionColumn != Other.QuestionColumn)
375       return QuestionColumn < Other.QuestionColumn;
376     if (AvoidBinPacking != Other.AvoidBinPacking)
377       return AvoidBinPacking;
378     if (BreakBeforeParameter != Other.BreakBeforeParameter)
379       return BreakBeforeParameter;
380     if (NoLineBreak != Other.NoLineBreak)
381       return NoLineBreak;
382     if (LastOperatorWrapped != Other.LastOperatorWrapped)
383       return LastOperatorWrapped;
384     if (ColonPos != Other.ColonPos)
385       return ColonPos < Other.ColonPos;
386     if (StartOfFunctionCall != Other.StartOfFunctionCall)
387       return StartOfFunctionCall < Other.StartOfFunctionCall;
388     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
389       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
390     if (CallContinuation != Other.CallContinuation)
391       return CallContinuation < Other.CallContinuation;
392     if (VariablePos != Other.VariablePos)
393       return VariablePos < Other.VariablePos;
394     if (ContainsLineBreak != Other.ContainsLineBreak)
395       return ContainsLineBreak;
396     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
397       return ContainsUnwrappedBuilder;
398     if (NestedBlockInlined != Other.NestedBlockInlined)
399       return NestedBlockInlined;
400     if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
401       return IsCSharpGenericTypeConstraint;
402     if (IsChainedConditional != Other.IsChainedConditional)
403       return IsChainedConditional;
404     if (IsWrappedConditional != Other.IsWrappedConditional)
405       return IsWrappedConditional;
406     if (UnindentOperator != Other.UnindentOperator)
407       return UnindentOperator;
408     return false;
409   }
410 };
411 
412 /// The current state when indenting a unwrapped line.
413 ///
414 /// As the indenting tries different combinations this is copied by value.
415 struct LineState {
416   /// The number of used columns in the current line.
417   unsigned Column;
418 
419   /// The token that needs to be next formatted.
420   FormatToken *NextToken;
421 
422   /// \c true if \p NextToken should not continue this line.
423   bool NoContinuation;
424 
425   /// The \c NestingLevel at the start of this line.
426   unsigned StartOfLineLevel;
427 
428   /// The lowest \c NestingLevel on the current line.
429   unsigned LowestLevelOnLine;
430 
431   /// The start column of the string literal, if we're in a string
432   /// literal sequence, 0 otherwise.
433   unsigned StartOfStringLiteral;
434 
435   /// A stack keeping track of properties applying to parenthesis
436   /// levels.
437   std::vector<ParenState> Stack;
438 
439   /// Ignore the stack of \c ParenStates for state comparison.
440   ///
441   /// In long and deeply nested unwrapped lines, the current algorithm can
442   /// be insufficient for finding the best formatting with a reasonable amount
443   /// of time and memory. Setting this flag will effectively lead to the
444   /// algorithm not analyzing some combinations. However, these combinations
445   /// rarely contain the optimal solution: In short, accepting a higher
446   /// penalty early would need to lead to different values in the \c
447   /// ParenState stack (in an otherwise identical state) and these different
448   /// values would need to lead to a significant amount of avoided penalty
449   /// later.
450   ///
451   /// FIXME: Come up with a better algorithm instead.
452   bool IgnoreStackForComparison;
453 
454   /// The indent of the first token.
455   unsigned FirstIndent;
456 
457   /// The line that is being formatted.
458   ///
459   /// Does not need to be considered for memoization because it doesn't change.
460   const AnnotatedLine *Line;
461 
462   /// Comparison operator to be able to used \c LineState in \c map.
463   bool operator<(const LineState &Other) const {
464     if (NextToken != Other.NextToken)
465       return NextToken < Other.NextToken;
466     if (Column != Other.Column)
467       return Column < Other.Column;
468     if (NoContinuation != Other.NoContinuation)
469       return NoContinuation;
470     if (StartOfLineLevel != Other.StartOfLineLevel)
471       return StartOfLineLevel < Other.StartOfLineLevel;
472     if (LowestLevelOnLine != Other.LowestLevelOnLine)
473       return LowestLevelOnLine < Other.LowestLevelOnLine;
474     if (StartOfStringLiteral != Other.StartOfStringLiteral)
475       return StartOfStringLiteral < Other.StartOfStringLiteral;
476     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
477       return false;
478     return Stack < Other.Stack;
479   }
480 };
481 
482 } // end namespace format
483 } // end namespace clang
484 
485 #endif
486