1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 namespace format {
23 
24 enum LineType {
25   LT_Invalid,
26   LT_ImportStatement,
27   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28   LT_ObjCMethodDecl,
29   LT_ObjCProperty, // An @property line.
30   LT_Other,
31   LT_PreprocessorDirective,
32   LT_VirtualFunctionDecl,
33   LT_ArrayOfStructInitializer,
34   LT_CommentAbovePPDirective,
35 };
36 
37 class AnnotatedLine {
38 public:
39   AnnotatedLine(const UnwrappedLine &Line)
40       : First(Line.Tokens.front().Tok), Level(Line.Level),
41         PPLevel(Line.PPLevel),
42         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
43         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
44         InPPDirective(Line.InPPDirective),
45         InPragmaDirective(Line.InPragmaDirective),
46         InMacroBody(Line.InMacroBody),
47         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
48         IsMultiVariableDeclStmt(false), Affected(false),
49         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
50         ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
51         FirstStartColumn(Line.FirstStartColumn) {
52     assert(!Line.Tokens.empty());
53 
54     // Calculate Next and Previous for all tokens. Note that we must overwrite
55     // Next and Previous for every token, as previous formatting runs might have
56     // left them in a different state.
57     First->Previous = nullptr;
58     FormatToken *Current = First;
59     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
60       Current->Next = Node.Tok;
61       Node.Tok->Previous = Current;
62       Current = Current->Next;
63       Current->Children.clear();
64       for (const auto &Child : Node.Children) {
65         Children.push_back(new AnnotatedLine(Child));
66         Current->Children.push_back(Children.back());
67       }
68     }
69     Last = Current;
70     Last->Next = nullptr;
71   }
72 
73   ~AnnotatedLine() {
74     for (AnnotatedLine *Child : Children)
75       delete Child;
76     FormatToken *Current = First;
77     while (Current) {
78       Current->Children.clear();
79       Current->Role.reset();
80       Current = Current->Next;
81     }
82   }
83 
84   bool isComment() const {
85     return First && First->is(tok::comment) && !First->getNextNonComment();
86   }
87 
88   /// \c true if this line starts with the given tokens in order, ignoring
89   /// comments.
90   template <typename... Ts> bool startsWith(Ts... Tokens) const {
91     return First && First->startsSequence(Tokens...);
92   }
93 
94   /// \c true if this line ends with the given tokens in reversed order,
95   /// ignoring comments.
96   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
97   /// this line is like "... T3 T2 T1".
98   template <typename... Ts> bool endsWith(Ts... Tokens) const {
99     return Last && Last->endsSequence(Tokens...);
100   }
101 
102   /// \c true if this line looks like a function definition instead of a
103   /// function declaration. Asserts MightBeFunctionDecl.
104   bool mightBeFunctionDefinition() const {
105     assert(MightBeFunctionDecl);
106     // Try to determine if the end of a stream of tokens is either the
107     // Definition or the Declaration for a function. It does this by looking for
108     // the ';' in foo(); and using that it ends with a ; to know this is the
109     // Definition, however the line could end with
110     //    foo(); /* comment */
111     // or
112     //    foo(); // comment
113     // or
114     //    foo() // comment
115     // endsWith() ignores the comment.
116     return !endsWith(tok::semi);
117   }
118 
119   /// \c true if this line starts a namespace definition.
120   bool startsWithNamespace() const {
121     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
122            startsWith(tok::kw_inline, tok::kw_namespace) ||
123            startsWith(tok::kw_export, tok::kw_namespace);
124   }
125 
126   FormatToken *First;
127   FormatToken *Last;
128 
129   SmallVector<AnnotatedLine *, 0> Children;
130 
131   LineType Type;
132   unsigned Level;
133   unsigned PPLevel;
134   size_t MatchingOpeningBlockLineIndex;
135   size_t MatchingClosingBlockLineIndex;
136   bool InPPDirective;
137   bool InPragmaDirective;
138   bool InMacroBody;
139   bool MustBeDeclaration;
140   bool MightBeFunctionDecl;
141   bool IsMultiVariableDeclStmt;
142 
143   /// \c True if this line should be formatted, i.e. intersects directly or
144   /// indirectly with one of the input ranges.
145   bool Affected;
146 
147   /// \c True if the leading empty lines of this line intersect with one of the
148   /// input ranges.
149   bool LeadingEmptyLinesAffected;
150 
151   /// \c True if one of this line's children intersects with an input range.
152   bool ChildrenAffected;
153 
154   /// \c True if breaking after last attribute group in function return type.
155   bool ReturnTypeWrapped;
156 
157   /// \c True if this line should be indented by ContinuationIndent in addition
158   /// to the normal indention level.
159   bool IsContinuation;
160 
161   unsigned FirstStartColumn;
162 
163 private:
164   // Disallow copying.
165   AnnotatedLine(const AnnotatedLine &) = delete;
166   void operator=(const AnnotatedLine &) = delete;
167 };
168 
169 /// Determines extra information about the tokens comprising an
170 /// \c UnwrappedLine.
171 class TokenAnnotator {
172 public:
173   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
174       : Style(Style), Keywords(Keywords) {}
175 
176   /// Adapts the indent levels of comment lines to the indent of the
177   /// subsequent line.
178   // FIXME: Can/should this be done in the UnwrappedLineParser?
179   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
180 
181   void annotate(AnnotatedLine &Line) const;
182   void calculateFormattingInformation(AnnotatedLine &Line) const;
183 
184 private:
185   /// Calculate the penalty for splitting before \c Tok.
186   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
187                         bool InFunctionDecl) const;
188 
189   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
190 
191   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
192                             const FormatToken &Right) const;
193 
194   bool spaceRequiredBefore(const AnnotatedLine &Line,
195                            const FormatToken &Right) const;
196 
197   bool mustBreakBefore(const AnnotatedLine &Line,
198                        const FormatToken &Right) const;
199 
200   bool canBreakBefore(const AnnotatedLine &Line,
201                       const FormatToken &Right) const;
202 
203   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
204 
205   void printDebugInfo(const AnnotatedLine &Line) const;
206 
207   void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
208 
209   void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
210 
211   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
212                                               FormatToken *CurrentToken,
213                                               unsigned Depth) const;
214   FormatStyle::PointerAlignmentStyle
215   getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
216 
217   FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
218       const FormatToken &PointerOrReference) const;
219 
220   const FormatStyle &Style;
221 
222   const AdditionalKeywords &Keywords;
223 };
224 
225 } // end namespace format
226 } // end namespace clang
227 
228 #endif
229