1 //===--- BreakableToken.h - Format C++ code -------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares BreakableToken, BreakableStringLiteral, and
12 /// BreakableBlockComment classes, that contain token type-specific logic to
13 /// break long lines in tokens.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
18 #define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
19 
20 #include "Encoding.h"
21 #include "TokenAnnotator.h"
22 #include "WhitespaceManager.h"
23 #include <utility>
24 
25 namespace clang {
26 namespace format {
27 
28 struct FormatStyle;
29 
30 /// \brief Base class for strategies on how to break tokens.
31 ///
32 /// FIXME: The interface seems set in stone, so we might want to just pull the
33 /// strategy into the class, instead of controlling it from the outside.
34 class BreakableToken {
35 public:
36   /// \brief Contains starting character index and length of split.
37   typedef std::pair<StringRef::size_type, unsigned> Split;
38 
39   virtual ~BreakableToken() {}
40 
41   /// \brief Returns the number of lines in this token in the original code.
42   virtual unsigned getLineCount() const = 0;
43 
44   /// \brief Returns the number of columns required to format the piece of line
45   /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46   ///
47   /// Note that previous breaks are not taken into account. \p Offset is always
48   /// specified from the start of the (original) line.
49   /// \p Length can be set to StringRef::npos, which means "to the end of line".
50   virtual unsigned
51   getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52                           StringRef::size_type Length) const = 0;
53 
54   /// \brief Returns a range (offset, length) at which to break the line at
55   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56   /// violate \p ColumnLimit.
57   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58                          unsigned ColumnLimit) const = 0;
59 
60   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
62                            WhitespaceManager &Whitespaces) = 0;
63 
64   /// \brief Replaces the whitespace range described by \p Split with a single
65   /// space.
66   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
67                                  Split Split,
68                                  WhitespaceManager &Whitespaces) = 0;
69 
70   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
71   virtual void replaceWhitespaceBefore(unsigned LineIndex,
72                                        WhitespaceManager &Whitespaces) {}
73 
74 protected:
75   BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
76                  bool InPPDirective, encoding::Encoding Encoding,
77                  const FormatStyle &Style)
78       : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
79         Encoding(Encoding), Style(Style) {}
80 
81   const FormatToken &Tok;
82   const unsigned IndentLevel;
83   const bool InPPDirective;
84   const encoding::Encoding Encoding;
85   const FormatStyle &Style;
86 };
87 
88 /// \brief Base class for single line tokens that can be broken.
89 ///
90 /// \c getSplit() needs to be implemented by child classes.
91 class BreakableSingleLineToken : public BreakableToken {
92 public:
93   virtual unsigned getLineCount() const;
94   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
95                                            unsigned TailOffset,
96                                            StringRef::size_type Length) const;
97 
98 protected:
99   BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
100                            unsigned StartColumn, StringRef Prefix,
101                            StringRef Postfix, bool InPPDirective,
102                            encoding::Encoding Encoding,
103                            const FormatStyle &Style);
104 
105   // The column in which the token starts.
106   unsigned StartColumn;
107   // The prefix a line needs after a break in the token.
108   StringRef Prefix;
109   // The postfix a line needs before introducing a break.
110   StringRef Postfix;
111   // The token text excluding the prefix and postfix.
112   StringRef Line;
113 };
114 
115 class BreakableStringLiteral : public BreakableSingleLineToken {
116 public:
117   /// \brief Creates a breakable token for a single line string literal.
118   ///
119   /// \p StartColumn specifies the column in which the token will start
120   /// after formatting.
121   BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
122                          unsigned StartColumn, StringRef Prefix,
123                          StringRef Postfix, bool InPPDirective,
124                          encoding::Encoding Encoding, const FormatStyle &Style);
125 
126   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
127                          unsigned ColumnLimit) const;
128   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
129                            WhitespaceManager &Whitespaces);
130   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
131                                  Split Split,
132                                  WhitespaceManager &Whitespaces) {}
133 };
134 
135 class BreakableLineComment : public BreakableSingleLineToken {
136 public:
137   /// \brief Creates a breakable token for a line comment.
138   ///
139   /// \p StartColumn specifies the column in which the comment will start
140   /// after formatting.
141   BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
142                        unsigned StartColumn, bool InPPDirective,
143                        encoding::Encoding Encoding, const FormatStyle &Style);
144 
145   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
146                          unsigned ColumnLimit) const;
147   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
148                            WhitespaceManager &Whitespaces);
149   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
150                                  Split Split,
151                                  WhitespaceManager &Whitespaces);
152   virtual void replaceWhitespaceBefore(unsigned LineIndex,
153                                        WhitespaceManager &Whitespaces);
154 
155 private:
156   // The prefix without an additional space if one was added.
157   StringRef OriginalPrefix;
158 };
159 
160 class BreakableBlockComment : public BreakableToken {
161 public:
162   /// \brief Creates a breakable token for a block comment.
163   ///
164   /// \p StartColumn specifies the column in which the comment will start
165   /// after formatting, while \p OriginalStartColumn specifies in which
166   /// column the comment started before formatting.
167   /// If the comment starts a line after formatting, set \p FirstInLine to true.
168   BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
169                         unsigned StartColumn, unsigned OriginaStartColumn,
170                         bool FirstInLine, bool InPPDirective,
171                         encoding::Encoding Encoding, const FormatStyle &Style);
172 
173   virtual unsigned getLineCount() const;
174   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
175                                            unsigned TailOffset,
176                                            StringRef::size_type Length) const;
177   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
178                          unsigned ColumnLimit) const;
179   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
180                            WhitespaceManager &Whitespaces);
181   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
182                                  Split Split,
183                                  WhitespaceManager &Whitespaces);
184   virtual void replaceWhitespaceBefore(unsigned LineIndex,
185                                        WhitespaceManager &Whitespaces);
186 
187 private:
188   // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
189   // so that all whitespace between the lines is accounted to Lines[LineIndex]
190   // as leading whitespace:
191   // - Lines[LineIndex] points to the text after that whitespace
192   // - Lines[LineIndex-1] shrinks by its trailing whitespace
193   // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
194   //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
195   //
196   // Sets StartOfLineColumn to the intended column in which the text at
197   // Lines[LineIndex] starts (note that the decoration, if present, is not
198   // considered part of the text).
199   void adjustWhitespace(unsigned LineIndex, int IndentDelta);
200 
201   // Returns the column at which the text in line LineIndex starts, when broken
202   // at TailOffset. Note that the decoration (if present) is not considered part
203   // of the text.
204   unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
205 
206   // Contains the text of the lines of the block comment, excluding the leading
207   // /* in the first line and trailing */ in the last line, and excluding all
208   // trailing whitespace between the lines. Note that the decoration (if
209   // present) is also not considered part of the text.
210   SmallVector<StringRef, 16> Lines;
211 
212   // LeadingWhitespace[i] is the number of characters regarded as whitespace in
213   // front of Lines[i]. Note that this can include "* " sequences, which we
214   // regard as whitespace when all lines have a "*" prefix.
215   SmallVector<unsigned, 16> LeadingWhitespace;
216 
217   // StartOfLineColumn[i] is the target column at which Line[i] should be.
218   // Note that this excludes a leading "* " or "*" in case all lines have
219   // a "*" prefix.
220   SmallVector<unsigned, 16> StartOfLineColumn;
221 
222   // The column at which the text of a broken line should start.
223   // Note that an optional decoration would go before that column.
224   // IndentAtLineBreak is a uniform position for all lines in a block comment,
225   // regardless of their relative position.
226   // FIXME: Revisit the decision to do this; the main reason was to support
227   // patterns like
228   // /**************//**
229   //  * Comment
230   // We could also support such patterns by special casing the first line
231   // instead.
232   unsigned IndentAtLineBreak;
233 
234   // This is to distinguish between the case when the last line was empty and
235   // the case when it started with a decoration ("*" or "* ").
236   bool LastLineNeedsDecoration;
237 
238   // Either "* " if all lines begin with a "*", or empty.
239   StringRef Decoration;
240 };
241 
242 } // namespace format
243 } // namespace clang
244 
245 #endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
246