1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 namespace format {
23 
24 enum LineType {
25   LT_Invalid,
26   LT_ImportStatement,
27   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28   LT_ObjCMethodDecl,
29   LT_ObjCProperty, // An @property line.
30   LT_Other,
31   LT_PreprocessorDirective,
32   LT_VirtualFunctionDecl,
33   LT_ArrayOfStructInitializer,
34   LT_CommentAbovePPDirective,
35 };
36 
37 enum ScopeType {
38   // Contained in class declaration/definition.
39   ST_Class,
40   // Contained within function definition.
41   ST_Function,
42   // Contained within other scope block (loop, if/else, etc).
43   ST_Other,
44 };
45 
46 class AnnotatedLine {
47 public:
AnnotatedLine(const UnwrappedLine & Line)48   AnnotatedLine(const UnwrappedLine &Line)
49       : First(Line.Tokens.front().Tok), Level(Line.Level),
50         PPLevel(Line.PPLevel),
51         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
52         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
53         InPPDirective(Line.InPPDirective),
54         InPragmaDirective(Line.InPragmaDirective),
55         InMacroBody(Line.InMacroBody),
56         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
57         IsMultiVariableDeclStmt(false), Affected(false),
58         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
59         ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
60         FirstStartColumn(Line.FirstStartColumn) {
61     assert(!Line.Tokens.empty());
62 
63     // Calculate Next and Previous for all tokens. Note that we must overwrite
64     // Next and Previous for every token, as previous formatting runs might have
65     // left them in a different state.
66     First->Previous = nullptr;
67     FormatToken *Current = First;
68     addChildren(Line.Tokens.front(), Current);
69     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
70       if (Node.Tok->MacroParent)
71         ContainsMacroCall = true;
72       Current->Next = Node.Tok;
73       Node.Tok->Previous = Current;
74       Current = Current->Next;
75       addChildren(Node, Current);
76       // FIXME: if we add children, previous will point to the token before
77       // the children; changing this requires significant changes across
78       // clang-format.
79     }
80     Last = Current;
81     Last->Next = nullptr;
82   }
83 
addChildren(const UnwrappedLineNode & Node,FormatToken * Current)84   void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
85     Current->Children.clear();
86     for (const auto &Child : Node.Children) {
87       Children.push_back(new AnnotatedLine(Child));
88       if (Children.back()->ContainsMacroCall)
89         ContainsMacroCall = true;
90       Current->Children.push_back(Children.back());
91     }
92   }
93 
size()94   size_t size() const {
95     size_t Size = 1;
96     for (const auto *Child : Children)
97       Size += Child->size();
98     return Size;
99   }
100 
~AnnotatedLine()101   ~AnnotatedLine() {
102     for (AnnotatedLine *Child : Children)
103       delete Child;
104     FormatToken *Current = First;
105     while (Current) {
106       Current->Children.clear();
107       Current->Role.reset();
108       Current = Current->Next;
109     }
110   }
111 
isComment()112   bool isComment() const {
113     return First && First->is(tok::comment) && !First->getNextNonComment();
114   }
115 
116   /// \c true if this line starts with the given tokens in order, ignoring
117   /// comments.
startsWith(Ts...Tokens)118   template <typename... Ts> bool startsWith(Ts... Tokens) const {
119     return First && First->startsSequence(Tokens...);
120   }
121 
122   /// \c true if this line ends with the given tokens in reversed order,
123   /// ignoring comments.
124   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
125   /// this line is like "... T3 T2 T1".
endsWith(Ts...Tokens)126   template <typename... Ts> bool endsWith(Ts... Tokens) const {
127     return Last && Last->endsSequence(Tokens...);
128   }
129 
130   /// \c true if this line looks like a function definition instead of a
131   /// function declaration. Asserts MightBeFunctionDecl.
mightBeFunctionDefinition()132   bool mightBeFunctionDefinition() const {
133     assert(MightBeFunctionDecl);
134     // Try to determine if the end of a stream of tokens is either the
135     // Definition or the Declaration for a function. It does this by looking for
136     // the ';' in foo(); and using that it ends with a ; to know this is the
137     // Definition, however the line could end with
138     //    foo(); /* comment */
139     // or
140     //    foo(); // comment
141     // or
142     //    foo() // comment
143     // endsWith() ignores the comment.
144     return !endsWith(tok::semi);
145   }
146 
147   /// \c true if this line starts a namespace definition.
startsWithNamespace()148   bool startsWithNamespace() const {
149     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
150            startsWith(tok::kw_inline, tok::kw_namespace) ||
151            startsWith(tok::kw_export, tok::kw_namespace);
152   }
153 
getFirstNonComment()154   FormatToken *getFirstNonComment() const {
155     assert(First);
156     return First->is(tok::comment) ? First->getNextNonComment() : First;
157   }
158 
getLastNonComment()159   FormatToken *getLastNonComment() const {
160     assert(Last);
161     return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
162   }
163 
164   FormatToken *First;
165   FormatToken *Last;
166 
167   SmallVector<AnnotatedLine *, 0> Children;
168 
169   LineType Type;
170   unsigned Level;
171   unsigned PPLevel;
172   size_t MatchingOpeningBlockLineIndex;
173   size_t MatchingClosingBlockLineIndex;
174   bool InPPDirective;
175   bool InPragmaDirective;
176   bool InMacroBody;
177   bool MustBeDeclaration;
178   bool MightBeFunctionDecl;
179   bool IsMultiVariableDeclStmt;
180 
181   /// \c True if this line contains a macro call for which an expansion exists.
182   bool ContainsMacroCall = false;
183 
184   /// \c True if this line should be formatted, i.e. intersects directly or
185   /// indirectly with one of the input ranges.
186   bool Affected;
187 
188   /// \c True if the leading empty lines of this line intersect with one of the
189   /// input ranges.
190   bool LeadingEmptyLinesAffected;
191 
192   /// \c True if one of this line's children intersects with an input range.
193   bool ChildrenAffected;
194 
195   /// \c True if breaking after last attribute group in function return type.
196   bool ReturnTypeWrapped;
197 
198   /// \c True if this line should be indented by ContinuationIndent in addition
199   /// to the normal indention level.
200   bool IsContinuation;
201 
202   unsigned FirstStartColumn;
203 
204 private:
205   // Disallow copying.
206   AnnotatedLine(const AnnotatedLine &) = delete;
207   void operator=(const AnnotatedLine &) = delete;
208 };
209 
210 /// Determines extra information about the tokens comprising an
211 /// \c UnwrappedLine.
212 class TokenAnnotator {
213 public:
TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)214   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
215       : Style(Style), Keywords(Keywords) {}
216 
217   /// Adapts the indent levels of comment lines to the indent of the
218   /// subsequent line.
219   // FIXME: Can/should this be done in the UnwrappedLineParser?
220   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
221 
222   void annotate(AnnotatedLine &Line);
223   void calculateFormattingInformation(AnnotatedLine &Line) const;
224 
225 private:
226   /// Calculate the penalty for splitting before \c Tok.
227   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
228                         bool InFunctionDecl) const;
229 
230   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
231 
232   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
233                             const FormatToken &Right) const;
234 
235   bool spaceRequiredBefore(const AnnotatedLine &Line,
236                            const FormatToken &Right) const;
237 
238   bool mustBreakBefore(const AnnotatedLine &Line,
239                        const FormatToken &Right) const;
240 
241   bool canBreakBefore(const AnnotatedLine &Line,
242                       const FormatToken &Right) const;
243 
244   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
245 
246   void printDebugInfo(const AnnotatedLine &Line) const;
247 
248   void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
249 
250   void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
251 
252   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
253                                               FormatToken *CurrentToken,
254                                               unsigned Depth) const;
255   FormatStyle::PointerAlignmentStyle
256   getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
257 
258   FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
259       const FormatToken &PointerOrReference) const;
260 
261   const FormatStyle &Style;
262 
263   const AdditionalKeywords &Keywords;
264 
265   SmallVector<ScopeType> Scopes;
266 };
267 
268 } // end namespace format
269 } // end namespace clang
270 
271 #endif
272