1 //===--- BreakableToken.h - Format C++ code -------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares BreakableToken, BreakableStringLiteral, and
12 /// BreakableBlockComment classes, that contain token type-specific logic to
13 /// break long lines in tokens.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
18 #define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
19 
20 #include "Encoding.h"
21 #include "TokenAnnotator.h"
22 #include "WhitespaceManager.h"
23 #include <utility>
24 
25 namespace clang {
26 namespace format {
27 
28 struct FormatStyle;
29 
30 /// \brief Base class for strategies on how to break tokens.
31 ///
32 /// FIXME: The interface seems set in stone, so we might want to just pull the
33 /// strategy into the class, instead of controlling it from the outside.
34 class BreakableToken {
35 public:
36   /// \brief Contains starting character index and length of split.
37   typedef std::pair<StringRef::size_type, unsigned> Split;
38 
~BreakableToken()39   virtual ~BreakableToken() {}
40 
41   /// \brief Returns the number of lines in this token in the original code.
42   virtual unsigned getLineCount() const = 0;
43 
44   /// \brief Returns the number of columns required to format the piece of line
45   /// at \p LineIndex, from byte offset \p Offset with length \p Length.
46   ///
47   /// Note that previous breaks are not taken into account. \p Offset is always
48   /// specified from the start of the (original) line.
49   /// \p Length can be set to StringRef::npos, which means "to the end of line".
50   virtual unsigned
51   getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
52                           StringRef::size_type Length) const = 0;
53 
54   /// \brief Returns a range (offset, length) at which to break the line at
55   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
56   /// violate \p ColumnLimit.
57   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
58                          unsigned ColumnLimit) const = 0;
59 
60   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
61   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
62                            WhitespaceManager &Whitespaces) = 0;
63 
64   /// \brief Replaces the whitespace range described by \p Split with a single
65   /// space.
66   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
67                                  Split Split,
68                                  WhitespaceManager &Whitespaces) = 0;
69 
70   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
replaceWhitespaceBefore(unsigned LineIndex,WhitespaceManager & Whitespaces)71   virtual void replaceWhitespaceBefore(unsigned LineIndex,
72                                        WhitespaceManager &Whitespaces) {}
73 
74 protected:
BreakableToken(const FormatToken & Tok,unsigned IndentLevel,bool InPPDirective,encoding::Encoding Encoding,const FormatStyle & Style)75   BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
76                  bool InPPDirective, encoding::Encoding Encoding,
77                  const FormatStyle &Style)
78       : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
79         Encoding(Encoding), Style(Style) {}
80 
81   const FormatToken &Tok;
82   const unsigned IndentLevel;
83   const bool InPPDirective;
84   const encoding::Encoding Encoding;
85   const FormatStyle &Style;
86 };
87 
88 /// \brief Base class for single line tokens that can be broken.
89 ///
90 /// \c getSplit() needs to be implemented by child classes.
91 class BreakableSingleLineToken : public BreakableToken {
92 public:
93   unsigned getLineCount() const override;
94   unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
95                                    StringRef::size_type Length) const override;
96 
97 protected:
98   BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
99                            unsigned StartColumn, StringRef Prefix,
100                            StringRef Postfix, bool InPPDirective,
101                            encoding::Encoding Encoding,
102                            const FormatStyle &Style);
103 
104   // The column in which the token starts.
105   unsigned StartColumn;
106   // The prefix a line needs after a break in the token.
107   StringRef Prefix;
108   // The postfix a line needs before introducing a break.
109   StringRef Postfix;
110   // The token text excluding the prefix and postfix.
111   StringRef Line;
112 };
113 
114 class BreakableStringLiteral : public BreakableSingleLineToken {
115 public:
116   /// \brief Creates a breakable token for a single line string literal.
117   ///
118   /// \p StartColumn specifies the column in which the token will start
119   /// after formatting.
120   BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
121                          unsigned StartColumn, StringRef Prefix,
122                          StringRef Postfix, bool InPPDirective,
123                          encoding::Encoding Encoding, const FormatStyle &Style);
124 
125   Split getSplit(unsigned LineIndex, unsigned TailOffset,
126                  unsigned ColumnLimit) const override;
127   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
128                    WhitespaceManager &Whitespaces) override;
replaceWhitespace(unsigned LineIndex,unsigned TailOffset,Split Split,WhitespaceManager & Whitespaces)129   void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
130                          WhitespaceManager &Whitespaces) override {}
131 };
132 
133 class BreakableLineComment : public BreakableSingleLineToken {
134 public:
135   /// \brief Creates a breakable token for a line comment.
136   ///
137   /// \p StartColumn specifies the column in which the comment will start
138   /// after formatting.
139   BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
140                        unsigned StartColumn, bool InPPDirective,
141                        encoding::Encoding Encoding, const FormatStyle &Style);
142 
143   Split getSplit(unsigned LineIndex, unsigned TailOffset,
144                  unsigned ColumnLimit) const override;
145   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
146                    WhitespaceManager &Whitespaces) override;
147   void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
148                          WhitespaceManager &Whitespaces) override;
149   void replaceWhitespaceBefore(unsigned LineIndex,
150                                WhitespaceManager &Whitespaces) override;
151 
152 private:
153   // The prefix without an additional space if one was added.
154   StringRef OriginalPrefix;
155 };
156 
157 class BreakableBlockComment : public BreakableToken {
158 public:
159   /// \brief Creates a breakable token for a block comment.
160   ///
161   /// \p StartColumn specifies the column in which the comment will start
162   /// after formatting, while \p OriginalStartColumn specifies in which
163   /// column the comment started before formatting.
164   /// If the comment starts a line after formatting, set \p FirstInLine to true.
165   BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
166                         unsigned StartColumn, unsigned OriginaStartColumn,
167                         bool FirstInLine, bool InPPDirective,
168                         encoding::Encoding Encoding, const FormatStyle &Style);
169 
170   unsigned getLineCount() const override;
171   unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
172                                    StringRef::size_type Length) const override;
173   Split getSplit(unsigned LineIndex, unsigned TailOffset,
174                  unsigned ColumnLimit) const override;
175   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
176                    WhitespaceManager &Whitespaces) override;
177   void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
178                          WhitespaceManager &Whitespaces) override;
179   void replaceWhitespaceBefore(unsigned LineIndex,
180                                WhitespaceManager &Whitespaces) override;
181 
182 private:
183   // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
184   // so that all whitespace between the lines is accounted to Lines[LineIndex]
185   // as leading whitespace:
186   // - Lines[LineIndex] points to the text after that whitespace
187   // - Lines[LineIndex-1] shrinks by its trailing whitespace
188   // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
189   //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
190   //
191   // Sets StartOfLineColumn to the intended column in which the text at
192   // Lines[LineIndex] starts (note that the decoration, if present, is not
193   // considered part of the text).
194   void adjustWhitespace(unsigned LineIndex, int IndentDelta);
195 
196   // Returns the column at which the text in line LineIndex starts, when broken
197   // at TailOffset. Note that the decoration (if present) is not considered part
198   // of the text.
199   unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
200 
201   // Contains the text of the lines of the block comment, excluding the leading
202   // /* in the first line and trailing */ in the last line, and excluding all
203   // trailing whitespace between the lines. Note that the decoration (if
204   // present) is also not considered part of the text.
205   SmallVector<StringRef, 16> Lines;
206 
207   // LeadingWhitespace[i] is the number of characters regarded as whitespace in
208   // front of Lines[i]. Note that this can include "* " sequences, which we
209   // regard as whitespace when all lines have a "*" prefix.
210   SmallVector<unsigned, 16> LeadingWhitespace;
211 
212   // StartOfLineColumn[i] is the target column at which Line[i] should be.
213   // Note that this excludes a leading "* " or "*" in case all lines have
214   // a "*" prefix.
215   // The first line's target column is always positive. The remaining lines'
216   // target columns are relative to the first line to allow correct indentation
217   // of comments in \c WhitespaceManager. Thus they can be negative as well (in
218   // case the first line needs to be unindented more than there's actual
219   // whitespace in another line).
220   SmallVector<int, 16> StartOfLineColumn;
221 
222   // The column at which the text of a broken line should start.
223   // Note that an optional decoration would go before that column.
224   // IndentAtLineBreak is a uniform position for all lines in a block comment,
225   // regardless of their relative position.
226   // FIXME: Revisit the decision to do this; the main reason was to support
227   // patterns like
228   // /**************//**
229   //  * Comment
230   // We could also support such patterns by special casing the first line
231   // instead.
232   unsigned IndentAtLineBreak;
233 
234   // This is to distinguish between the case when the last line was empty and
235   // the case when it started with a decoration ("*" or "* ").
236   bool LastLineNeedsDecoration;
237 
238   // Either "* " if all lines begin with a "*", or empty.
239   StringRef Decoration;
240 };
241 
242 } // namespace format
243 } // namespace clang
244 
245 #endif
246