1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements specific functions of \c FormatTokens and their
11 /// roles.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Debug.h"
19 #include <climits>
20
21 namespace clang {
22 namespace format {
23
getTokenTypeName(TokenType Type)24 const char *getTokenTypeName(TokenType Type) {
25 static const char *const TokNames[] = {
26 #define TYPE(X) #X,
27 LIST_TOKEN_TYPES
28 #undef TYPE
29 nullptr};
30
31 if (Type < NUM_TOKEN_TYPES)
32 return TokNames[Type];
33 llvm_unreachable("unknown TokenType");
34 return nullptr;
35 }
36
37 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
38 // duplication.
isSimpleTypeSpecifier() const39 bool FormatToken::isSimpleTypeSpecifier() const {
40 switch (Tok.getKind()) {
41 case tok::kw_short:
42 case tok::kw_long:
43 case tok::kw___int64:
44 case tok::kw___int128:
45 case tok::kw_signed:
46 case tok::kw_unsigned:
47 case tok::kw_void:
48 case tok::kw_char:
49 case tok::kw_int:
50 case tok::kw_half:
51 case tok::kw_float:
52 case tok::kw_double:
53 case tok::kw___bf16:
54 case tok::kw__Float16:
55 case tok::kw___float128:
56 case tok::kw_wchar_t:
57 case tok::kw_bool:
58 case tok::kw___underlying_type:
59 case tok::annot_typename:
60 case tok::kw_char8_t:
61 case tok::kw_char16_t:
62 case tok::kw_char32_t:
63 case tok::kw_typeof:
64 case tok::kw_decltype:
65 case tok::kw__Atomic:
66 return true;
67 default:
68 return false;
69 }
70 }
71
~TokenRole()72 TokenRole::~TokenRole() {}
73
precomputeFormattingInfos(const FormatToken * Token)74 void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
75
formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)76 unsigned CommaSeparatedList::formatAfterToken(LineState &State,
77 ContinuationIndenter *Indenter,
78 bool DryRun) {
79 if (State.NextToken == nullptr || !State.NextToken->Previous)
80 return 0;
81
82 if (Formats.size() == 1)
83 return 0; // Handled by formatFromToken
84
85 // Ensure that we start on the opening brace.
86 const FormatToken *LBrace =
87 State.NextToken->Previous->getPreviousNonComment();
88 if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
89 LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
90 LBrace->Next->is(TT_DesignatedInitializerPeriod))
91 return 0;
92
93 // Calculate the number of code points we have to format this list. As the
94 // first token is already placed, we have to subtract it.
95 unsigned RemainingCodePoints =
96 Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
97
98 // Find the best ColumnFormat, i.e. the best number of columns to use.
99 const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
100
101 // If no ColumnFormat can be used, the braced list would generally be
102 // bin-packed. Add a severe penalty to this so that column layouts are
103 // preferred if possible.
104 if (!Format)
105 return 10000;
106
107 // Format the entire list.
108 unsigned Penalty = 0;
109 unsigned Column = 0;
110 unsigned Item = 0;
111 while (State.NextToken != LBrace->MatchingParen) {
112 bool NewLine = false;
113 unsigned ExtraSpaces = 0;
114
115 // If the previous token was one of our commas, we are now on the next item.
116 if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
117 if (!State.NextToken->isTrailingComment()) {
118 ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
119 ++Column;
120 }
121 ++Item;
122 }
123
124 if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
125 Column = 0;
126 NewLine = true;
127 }
128
129 // Place token using the continuation indenter and store the penalty.
130 Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
131 }
132 return Penalty;
133 }
134
formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)135 unsigned CommaSeparatedList::formatFromToken(LineState &State,
136 ContinuationIndenter *Indenter,
137 bool DryRun) {
138 // Formatting with 1 Column isn't really a column layout, so we don't need the
139 // special logic here. We can just avoid bin packing any of the parameters.
140 if (Formats.size() == 1 || HasNestedBracedList)
141 State.Stack.back().AvoidBinPacking = true;
142 return 0;
143 }
144
145 // Returns the lengths in code points between Begin and End (both included),
146 // assuming that the entire sequence is put on a single line.
CodePointsBetween(const FormatToken * Begin,const FormatToken * End)147 static unsigned CodePointsBetween(const FormatToken *Begin,
148 const FormatToken *End) {
149 assert(End->TotalLength >= Begin->TotalLength);
150 return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
151 }
152
precomputeFormattingInfos(const FormatToken * Token)153 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
154 // FIXME: At some point we might want to do this for other lists, too.
155 if (!Token->MatchingParen ||
156 !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
157 return;
158
159 // In C++11 braced list style, we should not format in columns unless they
160 // have many items (20 or more) or we allow bin-packing of function call
161 // arguments.
162 if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
163 Commas.size() < 19)
164 return;
165
166 // Limit column layout for JavaScript array initializers to 20 or more items
167 // for now to introduce it carefully. We can become more aggressive if this
168 // necessary.
169 if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
170 return;
171
172 // Column format doesn't really make sense if we don't align after brackets.
173 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
174 return;
175
176 FormatToken *ItemBegin = Token->Next;
177 while (ItemBegin->isTrailingComment())
178 ItemBegin = ItemBegin->Next;
179 SmallVector<bool, 8> MustBreakBeforeItem;
180
181 // The lengths of an item if it is put at the end of the line. This includes
182 // trailing comments which are otherwise ignored for column alignment.
183 SmallVector<unsigned, 8> EndOfLineItemLength;
184
185 bool HasSeparatingComment = false;
186 for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
187 // Skip comments on their own line.
188 while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
189 ItemBegin = ItemBegin->Next;
190 HasSeparatingComment = i > 0;
191 }
192
193 MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
194 if (ItemBegin->is(tok::l_brace))
195 HasNestedBracedList = true;
196 const FormatToken *ItemEnd = nullptr;
197 if (i == Commas.size()) {
198 ItemEnd = Token->MatchingParen;
199 const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
200 ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
201 if (Style.Cpp11BracedListStyle &&
202 !ItemEnd->Previous->isTrailingComment()) {
203 // In Cpp11 braced list style, the } and possibly other subsequent
204 // tokens will need to stay on a line with the last element.
205 while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
206 ItemEnd = ItemEnd->Next;
207 } else {
208 // In other braced lists styles, the "}" can be wrapped to the new line.
209 ItemEnd = Token->MatchingParen->Previous;
210 }
211 } else {
212 ItemEnd = Commas[i];
213 // The comma is counted as part of the item when calculating the length.
214 ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
215
216 // Consume trailing comments so the are included in EndOfLineItemLength.
217 if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
218 ItemEnd->Next->isTrailingComment())
219 ItemEnd = ItemEnd->Next;
220 }
221 EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
222 // If there is a trailing comma in the list, the next item will start at the
223 // closing brace. Don't create an extra item for this.
224 if (ItemEnd->getNextNonComment() == Token->MatchingParen)
225 break;
226 ItemBegin = ItemEnd->Next;
227 }
228
229 // Don't use column layout for lists with few elements and in presence of
230 // separating comments.
231 if (Commas.size() < 5 || HasSeparatingComment)
232 return;
233
234 if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
235 return;
236
237 // We can never place more than ColumnLimit / 3 items in a row (because of the
238 // spaces and the comma).
239 unsigned MaxItems = Style.ColumnLimit / 3;
240 std::vector<unsigned> MinSizeInColumn;
241 MinSizeInColumn.reserve(MaxItems);
242 for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
243 ColumnFormat Format;
244 Format.Columns = Columns;
245 Format.ColumnSizes.resize(Columns);
246 MinSizeInColumn.assign(Columns, UINT_MAX);
247 Format.LineCount = 1;
248 bool HasRowWithSufficientColumns = false;
249 unsigned Column = 0;
250 for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
251 assert(i < MustBreakBeforeItem.size());
252 if (MustBreakBeforeItem[i] || Column == Columns) {
253 ++Format.LineCount;
254 Column = 0;
255 }
256 if (Column == Columns - 1)
257 HasRowWithSufficientColumns = true;
258 unsigned Length =
259 (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
260 Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
261 MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
262 ++Column;
263 }
264 // If all rows are terminated early (e.g. by trailing comments), we don't
265 // need to look further.
266 if (!HasRowWithSufficientColumns)
267 break;
268 Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
269
270 for (unsigned i = 0; i < Columns; ++i)
271 Format.TotalWidth += Format.ColumnSizes[i];
272
273 // Don't use this Format, if the difference between the longest and shortest
274 // element in a column exceeds a threshold to avoid excessive spaces.
275 if ([&] {
276 for (unsigned i = 0; i < Columns - 1; ++i)
277 if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
278 return true;
279 return false;
280 }())
281 continue;
282
283 // Ignore layouts that are bound to violate the column limit.
284 if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
285 continue;
286
287 Formats.push_back(Format);
288 }
289 }
290
291 const CommaSeparatedList::ColumnFormat *
getColumnFormat(unsigned RemainingCharacters) const292 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
293 const ColumnFormat *BestFormat = nullptr;
294 for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
295 I = Formats.rbegin(),
296 E = Formats.rend();
297 I != E; ++I) {
298 if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
299 if (BestFormat && I->LineCount > BestFormat->LineCount)
300 break;
301 BestFormat = &*I;
302 }
303 }
304 return BestFormat;
305 }
306
307 } // namespace format
308 } // namespace clang
309