1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
22 #include <map>
23 #include <optional>
24 #include <tuple>
25 
26 namespace clang {
27 class SourceManager;
28 
29 namespace format {
30 
31 class AnnotatedLine;
32 class BreakableToken;
33 struct FormatToken;
34 struct LineState;
35 struct ParenState;
36 struct RawStringFormatStyleManager;
37 class WhitespaceManager;
38 
39 struct RawStringFormatStyleManager {
40   llvm::StringMap<FormatStyle> DelimiterStyle;
41   llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
42 
43   RawStringFormatStyleManager(const FormatStyle &CodeStyle);
44 
45   std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
46 
47   std::optional<FormatStyle>
48   getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
49 };
50 
51 class ContinuationIndenter {
52 public:
53   /// Constructs a \c ContinuationIndenter to format \p Line starting in
54   /// column \p FirstIndent.
55   ContinuationIndenter(const FormatStyle &Style,
56                        const AdditionalKeywords &Keywords,
57                        const SourceManager &SourceMgr,
58                        WhitespaceManager &Whitespaces,
59                        encoding::Encoding Encoding,
60                        bool BinPackInconclusiveFunctions);
61 
62   /// Get the initial state, i.e. the state after placing \p Line's
63   /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64   /// the case of formatting inside raw string literals, \p FirstStartColumn is
65   /// the column at which the state of the parent formatter is.
66   LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67                             const AnnotatedLine *Line, bool DryRun);
68 
69   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70   // better home.
71   /// Returns \c true, if a line break after \p State is allowed.
72   bool canBreak(const LineState &State);
73 
74   /// Returns \c true, if a line break after \p State is mandatory.
75   bool mustBreak(const LineState &State);
76 
77   /// Appends the next token to \p State and updates information
78   /// necessary for indentation.
79   ///
80   /// Puts the token on the current line if \p Newline is \c false and adds a
81   /// line break and necessary indentation otherwise.
82   ///
83   /// If \p DryRun is \c false, also creates and stores the required
84   /// \c Replacement.
85   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86                            unsigned ExtraSpaces = 0);
87 
88   /// Get the column limit for this line. This is the style's column
89   /// limit, potentially reduced for preprocessor definitions.
90   unsigned getColumnLimit(const LineState &State) const;
91 
92 private:
93   /// Mark the next token as consumed in \p State and modify its stacks
94   /// accordingly.
95   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
96 
97   /// Update 'State' according to the next token's fake left parentheses.
98   void moveStatePastFakeLParens(LineState &State, bool Newline);
99   /// Update 'State' according to the next token's fake r_parens.
100   void moveStatePastFakeRParens(LineState &State);
101 
102   /// Update 'State' according to the next token being one of "(<{[".
103   void moveStatePastScopeOpener(LineState &State, bool Newline);
104   /// Update 'State' according to the next token being one of ")>}]".
105   void moveStatePastScopeCloser(LineState &State);
106   /// Update 'State' with the next token opening a nested block.
107   void moveStateToNewBlock(LineState &State);
108 
109   /// Reformats a raw string literal.
110   ///
111   /// \returns An extra penalty induced by reformatting the token.
112   unsigned reformatRawStringLiteral(const FormatToken &Current,
113                                     LineState &State,
114                                     const FormatStyle &RawStringStyle,
115                                     bool DryRun, bool Newline);
116 
117   /// If the current token is at the end of the current line, handle
118   /// the transition to the next line.
119   unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120                            bool DryRun, bool AllowBreak, bool Newline);
121 
122   /// If \p Current is a raw string that is configured to be reformatted,
123   /// return the style to be used.
124   std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125                                                const LineState &State);
126 
127   /// If the current token sticks out over the end of the line, break
128   /// it if possible.
129   ///
130   /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131   /// when tokens are broken or lines exceed the column limit, and exceeded
132   /// indicates whether the algorithm purposefully left lines exceeding the
133   /// column limit.
134   ///
135   /// The returned penalty will cover the cost of the additional line breaks
136   /// and column limit violation in all lines except for the last one. The
137   /// penalty for the column limit violation in the last line (and in single
138   /// line tokens) is handled in \c addNextStateToQueue.
139   ///
140   /// \p Strict indicates whether reflowing is allowed to leave characters
141   /// protruding the column limit; if true, lines will be split strictly within
142   /// the column limit where possible; if false, words are allowed to protrude
143   /// over the column limit as long as the penalty is less than the penalty
144   /// of a break.
145   std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146                                                  LineState &State,
147                                                  bool AllowBreak, bool DryRun,
148                                                  bool Strict);
149 
150   /// Returns the \c BreakableToken starting at \p Current, or nullptr
151   /// if the current token cannot be broken.
152   std::unique_ptr<BreakableToken>
153   createBreakableToken(const FormatToken &Current, LineState &State,
154                        bool AllowBreak);
155 
156   /// Appends the next token to \p State and updates information
157   /// necessary for indentation.
158   ///
159   /// Puts the token on the current line.
160   ///
161   /// If \p DryRun is \c false, also creates and stores the required
162   /// \c Replacement.
163   void addTokenOnCurrentLine(LineState &State, bool DryRun,
164                              unsigned ExtraSpaces);
165 
166   /// Appends the next token to \p State and updates information
167   /// necessary for indentation.
168   ///
169   /// Adds a line break and necessary indentation.
170   ///
171   /// If \p DryRun is \c false, also creates and stores the required
172   /// \c Replacement.
173   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
174 
175   /// Calculate the new column for a line wrap before the next token.
176   unsigned getNewLineColumn(const LineState &State);
177 
178   /// Adds a multiline token to the \p State.
179   ///
180   /// \returns Extra penalty for the first line of the literal: last line is
181   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182   /// matter, as we don't change them.
183   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
184 
185   /// Returns \c true if the next token starts a multiline string
186   /// literal.
187   ///
188   /// This includes implicitly concatenated strings, strings that will be broken
189   /// by clang-format and string literals with escaped newlines.
190   bool nextIsMultilineString(const LineState &State);
191 
192   FormatStyle Style;
193   const AdditionalKeywords &Keywords;
194   const SourceManager &SourceMgr;
195   WhitespaceManager &Whitespaces;
196   encoding::Encoding Encoding;
197   bool BinPackInconclusiveFunctions;
198   llvm::Regex CommentPragmasRegex;
199   const RawStringFormatStyleManager RawStringFormats;
200 };
201 
202 struct ParenState {
203   ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204              bool AvoidBinPacking, bool NoLineBreak)
205       : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
206         NestedBlockIndent(Indent), IsAligned(false),
207         BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
208         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
209         NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
210         LastOperatorWrapped(true), ContainsLineBreak(false),
211         ContainsUnwrappedBuilder(false), AlignColons(true),
212         ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
213         NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
214         IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
215         IsWrappedConditional(false), UnindentOperator(false) {}
216 
217   /// \brief The token opening this parenthesis level, or nullptr if this level
218   /// is opened by fake parenthesis.
219   ///
220   /// Not considered for memoization as it will always have the same value at
221   /// the same token.
222   const FormatToken *Tok;
223 
224   /// The position to which a specific parenthesis level needs to be
225   /// indented.
226   unsigned Indent;
227 
228   /// The position of the last space on each level.
229   ///
230   /// Used e.g. to break like:
231   /// functionCall(Parameter, otherCall(
232   ///                             OtherParameter));
233   unsigned LastSpace;
234 
235   /// If a block relative to this parenthesis level gets wrapped, indent
236   /// it this much.
237   unsigned NestedBlockIndent;
238 
239   /// The position the first "<<" operator encountered on each level.
240   ///
241   /// Used to align "<<" operators. 0 if no such operator has been encountered
242   /// on a level.
243   unsigned FirstLessLess = 0;
244 
245   /// The column of a \c ? in a conditional expression;
246   unsigned QuestionColumn = 0;
247 
248   /// The position of the colon in an ObjC method declaration/call.
249   unsigned ColonPos = 0;
250 
251   /// The start of the most recent function in a builder-type call.
252   unsigned StartOfFunctionCall = 0;
253 
254   /// Contains the start of array subscript expressions, so that they
255   /// can be aligned.
256   unsigned StartOfArraySubscripts = 0;
257 
258   /// If a nested name specifier was broken over multiple lines, this
259   /// contains the start column of the second line. Otherwise 0.
260   unsigned NestedNameSpecifierContinuation = 0;
261 
262   /// If a call expression was broken over multiple lines, this
263   /// contains the start column of the second line. Otherwise 0.
264   unsigned CallContinuation = 0;
265 
266   /// The column of the first variable name in a variable declaration.
267   ///
268   /// Used to align further variables if necessary.
269   unsigned VariablePos = 0;
270 
271   /// Whether this block's indentation is used for alignment.
272   bool IsAligned : 1;
273 
274   /// Whether a newline needs to be inserted before the block's closing
275   /// brace.
276   ///
277   /// We only want to insert a newline before the closing brace if there also
278   /// was a newline after the beginning left brace.
279   bool BreakBeforeClosingBrace : 1;
280 
281   /// Whether a newline needs to be inserted before the block's closing
282   /// paren.
283   ///
284   /// We only want to insert a newline before the closing paren if there also
285   /// was a newline after the beginning left paren.
286   bool BreakBeforeClosingParen : 1;
287 
288   /// Avoid bin packing, i.e. multiple parameters/elements on multiple
289   /// lines, in this context.
290   bool AvoidBinPacking : 1;
291 
292   /// Break after the next comma (or all the commas in this context if
293   /// \c AvoidBinPacking is \c true).
294   bool BreakBeforeParameter : 1;
295 
296   /// Line breaking in this context would break a formatting rule.
297   bool NoLineBreak : 1;
298 
299   /// Same as \c NoLineBreak, but is restricted until the end of the
300   /// operand (including the next ",").
301   bool NoLineBreakInOperand : 1;
302 
303   /// True if the last binary operator on this level was wrapped to the
304   /// next line.
305   bool LastOperatorWrapped : 1;
306 
307   /// \c true if this \c ParenState already contains a line-break.
308   ///
309   /// The first line break in a certain \c ParenState causes extra penalty so
310   /// that clang-format prefers similar breaks, i.e. breaks in the same
311   /// parenthesis.
312   bool ContainsLineBreak : 1;
313 
314   /// \c true if this \c ParenState contains multiple segments of a
315   /// builder-type call on one line.
316   bool ContainsUnwrappedBuilder : 1;
317 
318   /// \c true if the colons of the curren ObjC method expression should
319   /// be aligned.
320   ///
321   /// Not considered for memoization as it will always have the same value at
322   /// the same token.
323   bool AlignColons : 1;
324 
325   /// \c true if at least one selector name was found in the current
326   /// ObjC method expression.
327   ///
328   /// Not considered for memoization as it will always have the same value at
329   /// the same token.
330   bool ObjCSelectorNameFound : 1;
331 
332   /// \c true if there are multiple nested blocks inside these parens.
333   ///
334   /// Not considered for memoization as it will always have the same value at
335   /// the same token.
336   bool HasMultipleNestedBlocks : 1;
337 
338   /// The start of a nested block (e.g. lambda introducer in C++ or
339   /// "function" in JavaScript) is not wrapped to a new line.
340   bool NestedBlockInlined : 1;
341 
342   /// \c true if the current \c ParenState represents an Objective-C
343   /// array literal.
344   bool IsInsideObjCArrayLiteral : 1;
345 
346   bool IsCSharpGenericTypeConstraint : 1;
347 
348   /// \brief true if the current \c ParenState represents the false branch of
349   /// a chained conditional expression (e.g. else-if)
350   bool IsChainedConditional : 1;
351 
352   /// \brief true if there conditionnal was wrapped on the first operator (the
353   /// question mark)
354   bool IsWrappedConditional : 1;
355 
356   /// \brief Indicates the indent should be reduced by the length of the
357   /// operator.
358   bool UnindentOperator : 1;
359 
360   bool operator<(const ParenState &Other) const {
361     if (Indent != Other.Indent)
362       return Indent < Other.Indent;
363     if (LastSpace != Other.LastSpace)
364       return LastSpace < Other.LastSpace;
365     if (NestedBlockIndent != Other.NestedBlockIndent)
366       return NestedBlockIndent < Other.NestedBlockIndent;
367     if (FirstLessLess != Other.FirstLessLess)
368       return FirstLessLess < Other.FirstLessLess;
369     if (IsAligned != Other.IsAligned)
370       return IsAligned;
371     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
372       return BreakBeforeClosingBrace;
373     if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
374       return BreakBeforeClosingParen;
375     if (QuestionColumn != Other.QuestionColumn)
376       return QuestionColumn < Other.QuestionColumn;
377     if (AvoidBinPacking != Other.AvoidBinPacking)
378       return AvoidBinPacking;
379     if (BreakBeforeParameter != Other.BreakBeforeParameter)
380       return BreakBeforeParameter;
381     if (NoLineBreak != Other.NoLineBreak)
382       return NoLineBreak;
383     if (LastOperatorWrapped != Other.LastOperatorWrapped)
384       return LastOperatorWrapped;
385     if (ColonPos != Other.ColonPos)
386       return ColonPos < Other.ColonPos;
387     if (StartOfFunctionCall != Other.StartOfFunctionCall)
388       return StartOfFunctionCall < Other.StartOfFunctionCall;
389     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
390       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
391     if (CallContinuation != Other.CallContinuation)
392       return CallContinuation < Other.CallContinuation;
393     if (VariablePos != Other.VariablePos)
394       return VariablePos < Other.VariablePos;
395     if (ContainsLineBreak != Other.ContainsLineBreak)
396       return ContainsLineBreak;
397     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
398       return ContainsUnwrappedBuilder;
399     if (NestedBlockInlined != Other.NestedBlockInlined)
400       return NestedBlockInlined;
401     if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
402       return IsCSharpGenericTypeConstraint;
403     if (IsChainedConditional != Other.IsChainedConditional)
404       return IsChainedConditional;
405     if (IsWrappedConditional != Other.IsWrappedConditional)
406       return IsWrappedConditional;
407     if (UnindentOperator != Other.UnindentOperator)
408       return UnindentOperator;
409     return false;
410   }
411 };
412 
413 /// The current state when indenting a unwrapped line.
414 ///
415 /// As the indenting tries different combinations this is copied by value.
416 struct LineState {
417   /// The number of used columns in the current line.
418   unsigned Column;
419 
420   /// The token that needs to be next formatted.
421   FormatToken *NextToken;
422 
423   /// \c true if \p NextToken should not continue this line.
424   bool NoContinuation;
425 
426   /// The \c NestingLevel at the start of this line.
427   unsigned StartOfLineLevel;
428 
429   /// The lowest \c NestingLevel on the current line.
430   unsigned LowestLevelOnLine;
431 
432   /// The start column of the string literal, if we're in a string
433   /// literal sequence, 0 otherwise.
434   unsigned StartOfStringLiteral;
435 
436   /// A stack keeping track of properties applying to parenthesis
437   /// levels.
438   SmallVector<ParenState> Stack;
439 
440   /// Ignore the stack of \c ParenStates for state comparison.
441   ///
442   /// In long and deeply nested unwrapped lines, the current algorithm can
443   /// be insufficient for finding the best formatting with a reasonable amount
444   /// of time and memory. Setting this flag will effectively lead to the
445   /// algorithm not analyzing some combinations. However, these combinations
446   /// rarely contain the optimal solution: In short, accepting a higher
447   /// penalty early would need to lead to different values in the \c
448   /// ParenState stack (in an otherwise identical state) and these different
449   /// values would need to lead to a significant amount of avoided penalty
450   /// later.
451   ///
452   /// FIXME: Come up with a better algorithm instead.
453   bool IgnoreStackForComparison;
454 
455   /// The indent of the first token.
456   unsigned FirstIndent;
457 
458   /// The line that is being formatted.
459   ///
460   /// Does not need to be considered for memoization because it doesn't change.
461   const AnnotatedLine *Line;
462 
463   /// Comparison operator to be able to used \c LineState in \c map.
464   bool operator<(const LineState &Other) const {
465     if (NextToken != Other.NextToken)
466       return NextToken < Other.NextToken;
467     if (Column != Other.Column)
468       return Column < Other.Column;
469     if (NoContinuation != Other.NoContinuation)
470       return NoContinuation;
471     if (StartOfLineLevel != Other.StartOfLineLevel)
472       return StartOfLineLevel < Other.StartOfLineLevel;
473     if (LowestLevelOnLine != Other.LowestLevelOnLine)
474       return LowestLevelOnLine < Other.LowestLevelOnLine;
475     if (StartOfStringLiteral != Other.StartOfStringLiteral)
476       return StartOfStringLiteral < Other.StartOfStringLiteral;
477     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
478       return false;
479     return Stack < Other.Stack;
480   }
481 };
482 
483 } // end namespace format
484 } // end namespace clang
485 
486 #endif
487