1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements specific functions of \c FormatTokens and their
11 /// roles.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Support/Debug.h"
19 #include <climits>
20 
21 namespace clang {
22 namespace format {
23 
24 const char *getTokenTypeName(TokenType Type) {
25   static const char *const TokNames[] = {
26 #define TYPE(X) #X,
27       LIST_TOKEN_TYPES
28 #undef TYPE
29       nullptr};
30 
31   if (Type < NUM_TOKEN_TYPES)
32     return TokNames[Type];
33   llvm_unreachable("unknown TokenType");
34   return nullptr;
35 }
36 
37 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
38 // duplication.
39 bool FormatToken::isSimpleTypeSpecifier() const {
40   switch (Tok.getKind()) {
41   case tok::kw_short:
42   case tok::kw_long:
43   case tok::kw___int64:
44   case tok::kw___int128:
45   case tok::kw_signed:
46   case tok::kw_unsigned:
47   case tok::kw_void:
48   case tok::kw_char:
49   case tok::kw_int:
50   case tok::kw_half:
51   case tok::kw_float:
52   case tok::kw_double:
53   case tok::kw__Float16:
54   case tok::kw___float128:
55   case tok::kw_wchar_t:
56   case tok::kw_bool:
57   case tok::kw___underlying_type:
58   case tok::annot_typename:
59   case tok::kw_char8_t:
60   case tok::kw_char16_t:
61   case tok::kw_char32_t:
62   case tok::kw_typeof:
63   case tok::kw_decltype:
64     return true;
65   default:
66     return false;
67   }
68 }
69 
70 TokenRole::~TokenRole() {}
71 
72 void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
73 
74 unsigned CommaSeparatedList::formatAfterToken(LineState &State,
75                                               ContinuationIndenter *Indenter,
76                                               bool DryRun) {
77   if (State.NextToken == nullptr || !State.NextToken->Previous)
78     return 0;
79 
80   if (Formats.size() == 1)
81     return 0; // Handled by formatFromToken
82 
83   // Ensure that we start on the opening brace.
84   const FormatToken *LBrace =
85       State.NextToken->Previous->getPreviousNonComment();
86   if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
87       LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
88       LBrace->Next->Type == TT_DesignatedInitializerPeriod)
89     return 0;
90 
91   // Calculate the number of code points we have to format this list. As the
92   // first token is already placed, we have to subtract it.
93   unsigned RemainingCodePoints =
94       Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
95 
96   // Find the best ColumnFormat, i.e. the best number of columns to use.
97   const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
98 
99   // If no ColumnFormat can be used, the braced list would generally be
100   // bin-packed. Add a severe penalty to this so that column layouts are
101   // preferred if possible.
102   if (!Format)
103     return 10000;
104 
105   // Format the entire list.
106   unsigned Penalty = 0;
107   unsigned Column = 0;
108   unsigned Item = 0;
109   while (State.NextToken != LBrace->MatchingParen) {
110     bool NewLine = false;
111     unsigned ExtraSpaces = 0;
112 
113     // If the previous token was one of our commas, we are now on the next item.
114     if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
115       if (!State.NextToken->isTrailingComment()) {
116         ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
117         ++Column;
118       }
119       ++Item;
120     }
121 
122     if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
123       Column = 0;
124       NewLine = true;
125     }
126 
127     // Place token using the continuation indenter and store the penalty.
128     Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
129   }
130   return Penalty;
131 }
132 
133 unsigned CommaSeparatedList::formatFromToken(LineState &State,
134                                              ContinuationIndenter *Indenter,
135                                              bool DryRun) {
136   // Formatting with 1 Column isn't really a column layout, so we don't need the
137   // special logic here. We can just avoid bin packing any of the parameters.
138   if (Formats.size() == 1 || HasNestedBracedList)
139     State.Stack.back().AvoidBinPacking = true;
140   return 0;
141 }
142 
143 // Returns the lengths in code points between Begin and End (both included),
144 // assuming that the entire sequence is put on a single line.
145 static unsigned CodePointsBetween(const FormatToken *Begin,
146                                   const FormatToken *End) {
147   assert(End->TotalLength >= Begin->TotalLength);
148   return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
149 }
150 
151 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
152   // FIXME: At some point we might want to do this for other lists, too.
153   if (!Token->MatchingParen ||
154       !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
155     return;
156 
157   // In C++11 braced list style, we should not format in columns unless they
158   // have many items (20 or more) or we allow bin-packing of function call
159   // arguments.
160   if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
161       Commas.size() < 19)
162     return;
163 
164   // Limit column layout for JavaScript array initializers to 20 or more items
165   // for now to introduce it carefully. We can become more aggressive if this
166   // necessary.
167   if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
168     return;
169 
170   // Column format doesn't really make sense if we don't align after brackets.
171   if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
172     return;
173 
174   FormatToken *ItemBegin = Token->Next;
175   while (ItemBegin->isTrailingComment())
176     ItemBegin = ItemBegin->Next;
177   SmallVector<bool, 8> MustBreakBeforeItem;
178 
179   // The lengths of an item if it is put at the end of the line. This includes
180   // trailing comments which are otherwise ignored for column alignment.
181   SmallVector<unsigned, 8> EndOfLineItemLength;
182 
183   bool HasSeparatingComment = false;
184   for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
185     // Skip comments on their own line.
186     while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
187       ItemBegin = ItemBegin->Next;
188       HasSeparatingComment = i > 0;
189     }
190 
191     MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
192     if (ItemBegin->is(tok::l_brace))
193       HasNestedBracedList = true;
194     const FormatToken *ItemEnd = nullptr;
195     if (i == Commas.size()) {
196       ItemEnd = Token->MatchingParen;
197       const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
198       ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
199       if (Style.Cpp11BracedListStyle &&
200           !ItemEnd->Previous->isTrailingComment()) {
201         // In Cpp11 braced list style, the } and possibly other subsequent
202         // tokens will need to stay on a line with the last element.
203         while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
204           ItemEnd = ItemEnd->Next;
205       } else {
206         // In other braced lists styles, the "}" can be wrapped to the new line.
207         ItemEnd = Token->MatchingParen->Previous;
208       }
209     } else {
210       ItemEnd = Commas[i];
211       // The comma is counted as part of the item when calculating the length.
212       ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
213 
214       // Consume trailing comments so the are included in EndOfLineItemLength.
215       if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
216           ItemEnd->Next->isTrailingComment())
217         ItemEnd = ItemEnd->Next;
218     }
219     EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
220     // If there is a trailing comma in the list, the next item will start at the
221     // closing brace. Don't create an extra item for this.
222     if (ItemEnd->getNextNonComment() == Token->MatchingParen)
223       break;
224     ItemBegin = ItemEnd->Next;
225   }
226 
227   // Don't use column layout for lists with few elements and in presence of
228   // separating comments.
229   if (Commas.size() < 5 || HasSeparatingComment)
230     return;
231 
232   if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
233     return;
234 
235   // We can never place more than ColumnLimit / 3 items in a row (because of the
236   // spaces and the comma).
237   unsigned MaxItems = Style.ColumnLimit / 3;
238   std::vector<unsigned> MinSizeInColumn;
239   MinSizeInColumn.reserve(MaxItems);
240   for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
241     ColumnFormat Format;
242     Format.Columns = Columns;
243     Format.ColumnSizes.resize(Columns);
244     MinSizeInColumn.assign(Columns, UINT_MAX);
245     Format.LineCount = 1;
246     bool HasRowWithSufficientColumns = false;
247     unsigned Column = 0;
248     for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
249       assert(i < MustBreakBeforeItem.size());
250       if (MustBreakBeforeItem[i] || Column == Columns) {
251         ++Format.LineCount;
252         Column = 0;
253       }
254       if (Column == Columns - 1)
255         HasRowWithSufficientColumns = true;
256       unsigned Length =
257           (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
258       Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
259       MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
260       ++Column;
261     }
262     // If all rows are terminated early (e.g. by trailing comments), we don't
263     // need to look further.
264     if (!HasRowWithSufficientColumns)
265       break;
266     Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
267 
268     for (unsigned i = 0; i < Columns; ++i)
269       Format.TotalWidth += Format.ColumnSizes[i];
270 
271     // Don't use this Format, if the difference between the longest and shortest
272     // element in a column exceeds a threshold to avoid excessive spaces.
273     if ([&] {
274           for (unsigned i = 0; i < Columns - 1; ++i)
275             if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
276               return true;
277           return false;
278         }())
279       continue;
280 
281     // Ignore layouts that are bound to violate the column limit.
282     if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
283       continue;
284 
285     Formats.push_back(Format);
286   }
287 }
288 
289 const CommaSeparatedList::ColumnFormat *
290 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
291   const ColumnFormat *BestFormat = nullptr;
292   for (SmallVector<ColumnFormat, 4>::const_reverse_iterator
293            I = Formats.rbegin(),
294            E = Formats.rend();
295        I != E; ++I) {
296     if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) {
297       if (BestFormat && I->LineCount > BestFormat->LineCount)
298         break;
299       BestFormat = &*I;
300     }
301   }
302   return BestFormat;
303 }
304 
305 } // namespace format
306 } // namespace clang
307