1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 namespace format {
23 
24 enum LineType {
25   LT_Invalid,
26   LT_ImportStatement,
27   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
28   LT_ObjCMethodDecl,
29   LT_ObjCProperty, // An @property line.
30   LT_Other,
31   LT_PreprocessorDirective,
32   LT_VirtualFunctionDecl,
33   LT_ArrayOfStructInitializer,
34 };
35 
36 class AnnotatedLine {
37 public:
38   AnnotatedLine(const UnwrappedLine &Line)
39       : First(Line.Tokens.front().Tok), Level(Line.Level),
40         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
41         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
42         InPPDirective(Line.InPPDirective),
43         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
44         IsMultiVariableDeclStmt(false), Affected(false),
45         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
46         FirstStartColumn(Line.FirstStartColumn) {
47     assert(!Line.Tokens.empty());
48 
49     // Calculate Next and Previous for all tokens. Note that we must overwrite
50     // Next and Previous for every token, as previous formatting runs might have
51     // left them in a different state.
52     First->Previous = nullptr;
53     FormatToken *Current = First;
54     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
55       Current->Next = Node.Tok;
56       Node.Tok->Previous = Current;
57       Current = Current->Next;
58       Current->Children.clear();
59       for (const auto &Child : Node.Children) {
60         Children.push_back(new AnnotatedLine(Child));
61         Current->Children.push_back(Children.back());
62       }
63     }
64     Last = Current;
65     Last->Next = nullptr;
66   }
67 
68   ~AnnotatedLine() {
69     for (AnnotatedLine *Child : Children)
70       delete Child;
71     FormatToken *Current = First;
72     while (Current) {
73       Current->Children.clear();
74       Current->Role.reset();
75       Current = Current->Next;
76     }
77   }
78 
79   /// \c true if this line starts with the given tokens in order, ignoring
80   /// comments.
81   template <typename... Ts> bool startsWith(Ts... Tokens) const {
82     return First && First->startsSequence(Tokens...);
83   }
84 
85   /// \c true if this line ends with the given tokens in reversed order,
86   /// ignoring comments.
87   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
88   /// this line is like "... T3 T2 T1".
89   template <typename... Ts> bool endsWith(Ts... Tokens) const {
90     return Last && Last->endsSequence(Tokens...);
91   }
92 
93   /// \c true if this line looks like a function definition instead of a
94   /// function declaration. Asserts MightBeFunctionDecl.
95   bool mightBeFunctionDefinition() const {
96     assert(MightBeFunctionDecl);
97     // Try to determine if the end of a stream of tokens is either the
98     // Definition or the Declaration for a function. It does this by looking for
99     // the ';' in foo(); and using that it ends with a ; to know this is the
100     // Definition, however the line could end with
101     //    foo(); /* comment */
102     // or
103     //    foo(); // comment
104     // or
105     //    foo() // comment
106     // endsWith() ignores the comment.
107     return !endsWith(tok::semi);
108   }
109 
110   /// \c true if this line starts a namespace definition.
111   bool startsWithNamespace() const {
112     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
113            startsWith(tok::kw_inline, tok::kw_namespace) ||
114            startsWith(tok::kw_export, tok::kw_namespace);
115   }
116 
117   FormatToken *First;
118   FormatToken *Last;
119 
120   SmallVector<AnnotatedLine *, 0> Children;
121 
122   LineType Type;
123   unsigned Level;
124   size_t MatchingOpeningBlockLineIndex;
125   size_t MatchingClosingBlockLineIndex;
126   bool InPPDirective;
127   bool MustBeDeclaration;
128   bool MightBeFunctionDecl;
129   bool IsMultiVariableDeclStmt;
130 
131   /// \c True if this line should be formatted, i.e. intersects directly or
132   /// indirectly with one of the input ranges.
133   bool Affected;
134 
135   /// \c True if the leading empty lines of this line intersect with one of the
136   /// input ranges.
137   bool LeadingEmptyLinesAffected;
138 
139   /// \c True if one of this line's children intersects with an input range.
140   bool ChildrenAffected;
141 
142   unsigned FirstStartColumn;
143 
144 private:
145   // Disallow copying.
146   AnnotatedLine(const AnnotatedLine &) = delete;
147   void operator=(const AnnotatedLine &) = delete;
148 };
149 
150 /// Determines extra information about the tokens comprising an
151 /// \c UnwrappedLine.
152 class TokenAnnotator {
153 public:
154   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
155       : Style(Style), Keywords(Keywords) {}
156 
157   /// Adapts the indent levels of comment lines to the indent of the
158   /// subsequent line.
159   // FIXME: Can/should this be done in the UnwrappedLineParser?
160   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
161 
162   void annotate(AnnotatedLine &Line);
163   void calculateFormattingInformation(AnnotatedLine &Line);
164 
165 private:
166   /// Calculate the penalty for splitting before \c Tok.
167   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
168                         bool InFunctionDecl);
169 
170   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
171 
172   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
173                             const FormatToken &Right);
174 
175   bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);
176 
177   bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
178 
179   bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
180 
181   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
182 
183   void printDebugInfo(const AnnotatedLine &Line);
184 
185   void calculateUnbreakableTailLengths(AnnotatedLine &Line);
186 
187   void calculateArrayInitializerColumnList(AnnotatedLine &Line);
188 
189   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
190                                               FormatToken *CurrentToken,
191                                               unsigned Depth);
192   FormatStyle::PointerAlignmentStyle
193   getTokenReferenceAlignment(const FormatToken &PointerOrReference);
194 
195   FormatStyle::PointerAlignmentStyle
196   getTokenPointerOrReferenceAlignment(const FormatToken &PointerOrReference);
197 
198   const FormatStyle &Style;
199 
200   const AdditionalKeywords &Keywords;
201 };
202 
203 } // end namespace format
204 } // end namespace clang
205 
206 #endif
207