1 //===-- ClangHighlighter.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ClangHighlighter.h" 10 11 #include "lldb/Host/FileSystem.h" 12 #include "lldb/Target/Language.h" 13 #include "lldb/Utility/AnsiTerminal.h" 14 #include "lldb/Utility/StreamString.h" 15 16 #include "clang/Basic/FileManager.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "clang/Lex/Lexer.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/Support/MemoryBuffer.h" 21 #include <optional> 22 23 using namespace lldb_private; 24 25 bool ClangHighlighter::isKeyword(llvm::StringRef token) const { 26 return keywords.find(token) != keywords.end(); 27 } 28 29 ClangHighlighter::ClangHighlighter() { 30 #define KEYWORD(X, N) keywords.insert(#X); 31 #include "clang/Basic/TokenKinds.def" 32 } 33 34 /// Determines which style should be applied to the given token. 35 /// \param highlighter 36 /// The current highlighter that should use the style. 37 /// \param token 38 /// The current token. 39 /// \param tok_str 40 /// The string in the source code the token represents. 41 /// \param options 42 /// The style we use for coloring the source code. 43 /// \param in_pp_directive 44 /// If we are currently in a preprocessor directive. NOTE: This is 45 /// passed by reference and will be updated if the current token starts 46 /// or ends a preprocessor directive. 47 /// \return 48 /// The ColorStyle that should be applied to the token. 49 static HighlightStyle::ColorStyle 50 determineClangStyle(const ClangHighlighter &highlighter, 51 const clang::Token &token, llvm::StringRef tok_str, 52 const HighlightStyle &options, bool &in_pp_directive) { 53 using namespace clang; 54 55 if (token.is(tok::comment)) { 56 // If we were in a preprocessor directive before, we now left it. 57 in_pp_directive = false; 58 return options.comment; 59 } else if (in_pp_directive || token.getKind() == tok::hash) { 60 // Let's assume that the rest of the line is a PP directive. 61 in_pp_directive = true; 62 // Preprocessor directives are hard to match, so we have to hack this in. 63 return options.pp_directive; 64 } else if (tok::isStringLiteral(token.getKind())) 65 return options.string_literal; 66 else if (tok::isLiteral(token.getKind())) 67 return options.scalar_literal; 68 else if (highlighter.isKeyword(tok_str)) 69 return options.keyword; 70 else 71 switch (token.getKind()) { 72 case tok::raw_identifier: 73 case tok::identifier: 74 return options.identifier; 75 case tok::l_brace: 76 case tok::r_brace: 77 return options.braces; 78 case tok::l_square: 79 case tok::r_square: 80 return options.square_brackets; 81 case tok::l_paren: 82 case tok::r_paren: 83 return options.parentheses; 84 case tok::comma: 85 return options.comma; 86 case tok::coloncolon: 87 case tok::colon: 88 return options.colon; 89 90 case tok::amp: 91 case tok::ampamp: 92 case tok::ampequal: 93 case tok::star: 94 case tok::starequal: 95 case tok::plus: 96 case tok::plusplus: 97 case tok::plusequal: 98 case tok::minus: 99 case tok::arrow: 100 case tok::minusminus: 101 case tok::minusequal: 102 case tok::tilde: 103 case tok::exclaim: 104 case tok::exclaimequal: 105 case tok::slash: 106 case tok::slashequal: 107 case tok::percent: 108 case tok::percentequal: 109 case tok::less: 110 case tok::lessless: 111 case tok::lessequal: 112 case tok::lesslessequal: 113 case tok::spaceship: 114 case tok::greater: 115 case tok::greatergreater: 116 case tok::greaterequal: 117 case tok::greatergreaterequal: 118 case tok::caret: 119 case tok::caretequal: 120 case tok::pipe: 121 case tok::pipepipe: 122 case tok::pipeequal: 123 case tok::question: 124 case tok::equal: 125 case tok::equalequal: 126 return options.operators; 127 default: 128 break; 129 } 130 return HighlightStyle::ColorStyle(); 131 } 132 133 void ClangHighlighter::Highlight(const HighlightStyle &options, 134 llvm::StringRef line, 135 std::optional<size_t> cursor_pos, 136 llvm::StringRef previous_lines, 137 Stream &result) const { 138 using namespace clang; 139 140 FileSystemOptions file_opts; 141 FileManager file_mgr(file_opts, 142 FileSystem::Instance().GetVirtualFileSystem()); 143 144 // The line might end in a backslash which would cause Clang to drop the 145 // backslash and the terminating new line. This makes sense when parsing C++, 146 // but when highlighting we care about preserving the backslash/newline. To 147 // not lose this information we remove the new line here so that Clang knows 148 // this is just a single line we are highlighting. We add back the newline 149 // after tokenizing. 150 llvm::StringRef line_ending = ""; 151 // There are a few legal line endings Clang recognizes and we need to 152 // temporarily remove from the string. 153 if (line.consume_back("\r\n")) 154 line_ending = "\r\n"; 155 else if (line.consume_back("\n")) 156 line_ending = "\n"; 157 else if (line.consume_back("\r")) 158 line_ending = "\r"; 159 160 unsigned line_number = previous_lines.count('\n') + 1U; 161 162 // Let's build the actual source code Clang needs and setup some utility 163 // objects. 164 std::string full_source = previous_lines.str() + line.str(); 165 llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs()); 166 llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts( 167 new DiagnosticOptions()); 168 DiagnosticsEngine diags(diag_ids, diags_opts); 169 clang::SourceManager SM(diags, file_mgr); 170 auto buf = llvm::MemoryBuffer::getMemBuffer(full_source); 171 172 FileID FID = SM.createFileID(buf->getMemBufferRef()); 173 174 // Let's just enable the latest ObjC and C++ which should get most tokens 175 // right. 176 LangOptions Opts; 177 Opts.ObjC = true; 178 // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too 179 Opts.CPlusPlus17 = true; 180 Opts.LineComment = true; 181 182 Lexer lex(FID, buf->getMemBufferRef(), SM, Opts); 183 // The lexer should keep whitespace around. 184 lex.SetKeepWhitespaceMode(true); 185 186 // Keeps track if we have entered a PP directive. 187 bool in_pp_directive = false; 188 189 // True once we actually lexed the user provided line. 190 bool found_user_line = false; 191 192 // True if we already highlighted the token under the cursor, false otherwise. 193 bool highlighted_cursor = false; 194 Token token; 195 bool exit = false; 196 while (!exit) { 197 // Returns true if this is the last token we get from the lexer. 198 exit = lex.LexFromRawLexer(token); 199 200 bool invalid = false; 201 unsigned current_line_number = 202 SM.getSpellingLineNumber(token.getLocation(), &invalid); 203 if (current_line_number != line_number) 204 continue; 205 found_user_line = true; 206 207 // We don't need to print any tokens without a spelling line number. 208 if (invalid) 209 continue; 210 211 // Same as above but with the column number. 212 invalid = false; 213 unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); 214 if (invalid) 215 continue; 216 // Column numbers start at 1, but indexes in our string start at 0. 217 --start; 218 219 // Annotations don't have a length, so let's skip them. 220 if (token.isAnnotation()) 221 continue; 222 223 // Extract the token string from our source code. 224 llvm::StringRef tok_str = line.substr(start, token.getLength()); 225 226 // If the token is just an empty string, we can skip all the work below. 227 if (tok_str.empty()) 228 continue; 229 230 // If the cursor is inside this token, we have to apply the 'selected' 231 // highlight style before applying the actual token color. 232 llvm::StringRef to_print = tok_str; 233 StreamString storage; 234 auto end = start + token.getLength(); 235 if (cursor_pos && end > *cursor_pos && !highlighted_cursor) { 236 highlighted_cursor = true; 237 options.selected.Apply(storage, tok_str); 238 to_print = storage.GetString(); 239 } 240 241 // See how we are supposed to highlight this token. 242 HighlightStyle::ColorStyle color = 243 determineClangStyle(*this, token, tok_str, options, in_pp_directive); 244 245 color.Apply(result, to_print); 246 } 247 248 // Add the line ending we trimmed before tokenizing. 249 result << line_ending; 250 251 // If we went over the whole file but couldn't find our own file, then 252 // somehow our setup was wrong. When we're in release mode we just give the 253 // user the normal line and pretend we don't know how to highlight it. In 254 // debug mode we bail out with an assert as this should never happen. 255 if (!found_user_line) { 256 result << line; 257 assert(false && "We couldn't find the user line in the input file?"); 258 } 259 } 260