1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a token annotator, i.e. creates 11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 17 18 #include "UnwrappedLineParser.h" 19 #include "clang/Format/Format.h" 20 21 namespace clang { 22 namespace format { 23 24 enum LineType { 25 LT_Invalid, 26 LT_ImportStatement, 27 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 28 LT_ObjCMethodDecl, 29 LT_ObjCProperty, // An @property line. 30 LT_Other, 31 LT_PreprocessorDirective, 32 LT_VirtualFunctionDecl, 33 LT_ArrayOfStructInitializer, 34 LT_CommentAbovePPDirective, 35 }; 36 37 class AnnotatedLine { 38 public: AnnotatedLine(const UnwrappedLine & Line)39 AnnotatedLine(const UnwrappedLine &Line) 40 : First(Line.Tokens.front().Tok), Level(Line.Level), 41 PPLevel(Line.PPLevel), 42 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 43 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 44 InPPDirective(Line.InPPDirective), 45 InPragmaDirective(Line.InPragmaDirective), 46 InMacroBody(Line.InMacroBody), 47 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 48 IsMultiVariableDeclStmt(false), Affected(false), 49 LeadingEmptyLinesAffected(false), ChildrenAffected(false), 50 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), 51 FirstStartColumn(Line.FirstStartColumn) { 52 assert(!Line.Tokens.empty()); 53 54 // Calculate Next and Previous for all tokens. Note that we must overwrite 55 // Next and Previous for every token, as previous formatting runs might have 56 // left them in a different state. 57 First->Previous = nullptr; 58 FormatToken *Current = First; 59 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { 60 Current->Next = Node.Tok; 61 Node.Tok->Previous = Current; 62 Current = Current->Next; 63 Current->Children.clear(); 64 for (const auto &Child : Node.Children) { 65 Children.push_back(new AnnotatedLine(Child)); 66 Current->Children.push_back(Children.back()); 67 } 68 } 69 Last = Current; 70 Last->Next = nullptr; 71 } 72 ~AnnotatedLine()73 ~AnnotatedLine() { 74 for (AnnotatedLine *Child : Children) 75 delete Child; 76 FormatToken *Current = First; 77 while (Current) { 78 Current->Children.clear(); 79 Current->Role.reset(); 80 Current = Current->Next; 81 } 82 } 83 isComment()84 bool isComment() const { 85 return First && First->is(tok::comment) && !First->getNextNonComment(); 86 } 87 88 /// \c true if this line starts with the given tokens in order, ignoring 89 /// comments. startsWith(Ts...Tokens)90 template <typename... Ts> bool startsWith(Ts... Tokens) const { 91 return First && First->startsSequence(Tokens...); 92 } 93 94 /// \c true if this line ends with the given tokens in reversed order, 95 /// ignoring comments. 96 /// For example, given tokens [T1, T2, T3, ...], the function returns true if 97 /// this line is like "... T3 T2 T1". endsWith(Ts...Tokens)98 template <typename... Ts> bool endsWith(Ts... Tokens) const { 99 return Last && Last->endsSequence(Tokens...); 100 } 101 102 /// \c true if this line looks like a function definition instead of a 103 /// function declaration. Asserts MightBeFunctionDecl. mightBeFunctionDefinition()104 bool mightBeFunctionDefinition() const { 105 assert(MightBeFunctionDecl); 106 // Try to determine if the end of a stream of tokens is either the 107 // Definition or the Declaration for a function. It does this by looking for 108 // the ';' in foo(); and using that it ends with a ; to know this is the 109 // Definition, however the line could end with 110 // foo(); /* comment */ 111 // or 112 // foo(); // comment 113 // or 114 // foo() // comment 115 // endsWith() ignores the comment. 116 return !endsWith(tok::semi); 117 } 118 119 /// \c true if this line starts a namespace definition. startsWithNamespace()120 bool startsWithNamespace() const { 121 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 122 startsWith(tok::kw_inline, tok::kw_namespace) || 123 startsWith(tok::kw_export, tok::kw_namespace); 124 } 125 126 FormatToken *First; 127 FormatToken *Last; 128 129 SmallVector<AnnotatedLine *, 0> Children; 130 131 LineType Type; 132 unsigned Level; 133 unsigned PPLevel; 134 size_t MatchingOpeningBlockLineIndex; 135 size_t MatchingClosingBlockLineIndex; 136 bool InPPDirective; 137 bool InPragmaDirective; 138 bool InMacroBody; 139 bool MustBeDeclaration; 140 bool MightBeFunctionDecl; 141 bool IsMultiVariableDeclStmt; 142 143 /// \c True if this line should be formatted, i.e. intersects directly or 144 /// indirectly with one of the input ranges. 145 bool Affected; 146 147 /// \c True if the leading empty lines of this line intersect with one of the 148 /// input ranges. 149 bool LeadingEmptyLinesAffected; 150 151 /// \c True if one of this line's children intersects with an input range. 152 bool ChildrenAffected; 153 154 /// \c True if breaking after last attribute group in function return type. 155 bool ReturnTypeWrapped; 156 157 /// \c True if this line should be indented by ContinuationIndent in addition 158 /// to the normal indention level. 159 bool IsContinuation; 160 161 unsigned FirstStartColumn; 162 163 private: 164 // Disallow copying. 165 AnnotatedLine(const AnnotatedLine &) = delete; 166 void operator=(const AnnotatedLine &) = delete; 167 }; 168 169 /// Determines extra information about the tokens comprising an 170 /// \c UnwrappedLine. 171 class TokenAnnotator { 172 public: TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)173 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 174 : Style(Style), Keywords(Keywords) {} 175 176 /// Adapts the indent levels of comment lines to the indent of the 177 /// subsequent line. 178 // FIXME: Can/should this be done in the UnwrappedLineParser? 179 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const; 180 181 void annotate(AnnotatedLine &Line) const; 182 void calculateFormattingInformation(AnnotatedLine &Line) const; 183 184 private: 185 /// Calculate the penalty for splitting before \c Tok. 186 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 187 bool InFunctionDecl) const; 188 189 bool spaceRequiredBeforeParens(const FormatToken &Right) const; 190 191 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 192 const FormatToken &Right) const; 193 194 bool spaceRequiredBefore(const AnnotatedLine &Line, 195 const FormatToken &Right) const; 196 197 bool mustBreakBefore(const AnnotatedLine &Line, 198 const FormatToken &Right) const; 199 200 bool canBreakBefore(const AnnotatedLine &Line, 201 const FormatToken &Right) const; 202 203 bool mustBreakForReturnType(const AnnotatedLine &Line) const; 204 205 void printDebugInfo(const AnnotatedLine &Line) const; 206 207 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; 208 209 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; 210 211 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 212 FormatToken *CurrentToken, 213 unsigned Depth) const; 214 FormatStyle::PointerAlignmentStyle 215 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; 216 217 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( 218 const FormatToken &PointerOrReference) const; 219 220 const FormatStyle &Style; 221 222 const AdditionalKeywords &Keywords; 223 }; 224 225 } // end namespace format 226 } // end namespace clang 227 228 #endif 229