1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Various code that examines C++ source code without using heavy AST machinery 10 // (and often not even the lexer). To be used sparingly! 11 // 12 //===----------------------------------------------------------------------===// 13 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H 14 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H 15 16 #include "Context.h" 17 #include "Protocol.h" 18 #include "clang/Basic/Diagnostic.h" 19 #include "clang/Basic/LangOptions.h" 20 #include "clang/Basic/SourceLocation.h" 21 #include "clang/Basic/SourceManager.h" 22 #include "clang/Format/Format.h" 23 #include "clang/Tooling/Core/Replacement.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSet.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/SHA1.h" 28 #include <string> 29 30 namespace clang { 31 class SourceManager; 32 33 namespace clangd { 34 35 // We tend to generate digests for source codes in a lot of different places. 36 // This represents the type for those digests to prevent us hard coding details 37 // of hashing function at every place that needs to store this information. 38 using FileDigest = std::array<uint8_t, 8>; 39 FileDigest digest(StringRef Content); 40 Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID); 41 42 // This context variable controls the behavior of functions in this file 43 // that convert between LSP offsets and native clang byte offsets. 44 // If not set, defaults to UTF-16 for backwards-compatibility. 45 extern Key<OffsetEncoding> kCurrentOffsetEncoding; 46 47 // Counts the number of UTF-16 code units needed to represent a string (LSP 48 // specifies string lengths in UTF-16 code units). 49 // Use of UTF-16 may be overridden by kCurrentOffsetEncoding. 50 size_t lspLength(StringRef Code); 51 52 /// Turn a [line, column] pair into an offset in Code. 53 /// 54 /// If P.character exceeds the line length, returns the offset at end-of-line. 55 /// (If !AllowColumnsBeyondLineLength, then returns an error instead). 56 /// If the line number is out of range, returns an error. 57 /// 58 /// The returned value is in the range [0, Code.size()]. 59 llvm::Expected<size_t> 60 positionToOffset(llvm::StringRef Code, Position P, 61 bool AllowColumnsBeyondLineLength = true); 62 63 /// Turn an offset in Code into a [line, column] pair. 64 /// The offset must be in range [0, Code.size()]. 65 Position offsetToPosition(llvm::StringRef Code, size_t Offset); 66 67 /// Turn a SourceLocation into a [line, column] pair. 68 /// FIXME: This should return an error if the location is invalid. 69 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc); 70 71 /// Returns the taken range at \p TokLoc. 72 llvm::Optional<Range> getTokenRange(const SourceManager &SM, 73 const LangOptions &LangOpts, 74 SourceLocation TokLoc); 75 76 /// Return the file location, corresponding to \p P. Note that one should take 77 /// care to avoid comparing the result with expansion locations. 78 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, 79 Position P); 80 81 /// Get the beginning SourceLocation at a specified \p Pos in the main file. 82 /// May be invalid if Pos is, or if there's no identifier or operators. 83 /// The returned position is in the main file, callers may prefer to 84 /// obtain the macro expansion location. 85 SourceLocation getBeginningOfIdentifier(const Position &Pos, 86 const SourceManager &SM, 87 const LangOptions &LangOpts); 88 89 /// Returns true iff \p Loc is inside the main file. This function handles 90 /// file & macro locations. For macro locations, returns iff the macro is being 91 /// expanded inside the main file. 92 /// 93 /// The function is usually used to check whether a declaration is inside the 94 /// the main file. 95 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM); 96 97 /// Returns the #include location through which IncludedFIle was loaded. 98 /// Where SM.getIncludeLoc() returns the location of the *filename*, which may 99 /// be in a macro, includeHashLoc() returns the location of the #. 100 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM); 101 102 /// Returns true if the token at Loc is spelled in the source code. 103 /// This is not the case for: 104 /// * symbols formed via macro concatenation, the spelling location will 105 /// be "<scratch space>" 106 /// * symbols controlled and defined by a compile command-line option 107 /// `-DName=foo`, the spelling location will be "<command line>". 108 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM); 109 110 /// Turns a token range into a half-open range and checks its correctness. 111 /// The resulting range will have only valid source location on both sides, both 112 /// of which are file locations. 113 /// 114 /// File locations always point to a particular offset in a file, i.e. they 115 /// never refer to a location inside a macro expansion. Turning locations from 116 /// macro expansions into file locations is ambiguous - one can use 117 /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function 118 /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion. 119 /// 120 /// User input (e.g. cursor position) is expressed as a file location, so this 121 /// function can be viewed as a way to normalize the ranges used in the clang 122 /// AST so that they are comparable with ranges coming from the user input. 123 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr, 124 const LangOptions &LangOpts, 125 SourceRange R); 126 127 /// Returns true iff all of the following conditions hold: 128 /// - start and end locations are valid, 129 /// - start and end locations are file locations from the same file 130 /// (i.e. expansion locations are not taken into account). 131 /// - start offset <= end offset. 132 /// FIXME: introduce a type for source range with this invariant. 133 bool isValidFileRange(const SourceManager &Mgr, SourceRange R); 134 135 /// Returns true iff \p L is contained in \p R. 136 /// EXPECTS: isValidFileRange(R) == true, L is a file location. 137 bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R, 138 SourceLocation L); 139 140 /// Returns true iff \p L is contained in \p R or \p L is equal to the end point 141 /// of \p R. 142 /// EXPECTS: isValidFileRange(R) == true, L is a file location. 143 bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R, 144 SourceLocation L); 145 146 /// Returns the source code covered by the source range. 147 /// EXPECTS: isValidFileRange(R) == true. 148 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R); 149 150 // Converts a half-open clang source range to an LSP range. 151 // Note that clang also uses closed source ranges, which this can't handle! 152 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R); 153 154 // Converts an offset to a clang line/column (1-based, columns are bytes). 155 // The offset must be in range [0, Code.size()]. 156 // Prefer to use SourceManager if one is available. 157 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, 158 size_t Offset); 159 160 /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no 161 /// qualifier. 162 std::pair<llvm::StringRef, llvm::StringRef> 163 splitQualifiedName(llvm::StringRef QName); 164 165 TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R); 166 167 std::vector<TextEdit> replacementsToEdits(StringRef Code, 168 const tooling::Replacements &Repls); 169 170 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, 171 const LangOptions &L); 172 173 /// Get the canonical path of \p F. This means: 174 /// 175 /// - Absolute path 176 /// - Symlinks resolved 177 /// - No "." or ".." component 178 /// - No duplicate or trailing directory separator 179 /// 180 /// This function should be used when paths needs to be used outside the 181 /// component that generate it, so that paths are normalized as much as 182 /// possible. 183 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, 184 const SourceManager &SourceMgr); 185 186 bool isRangeConsecutive(const Range &Left, const Range &Right); 187 188 /// Choose the clang-format style we should apply to a certain file. 189 /// This will usually use FS to look for .clang-format directories. 190 /// FIXME: should we be caching the .clang-format file search? 191 /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle, 192 /// though the latter may have been overridden in main()! 193 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, 194 llvm::StringRef Content, 195 llvm::vfs::FileSystem *FS); 196 197 /// Cleanup and format the given replacements. 198 llvm::Expected<tooling::Replacements> 199 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, 200 const format::FormatStyle &Style); 201 202 /// A set of edits generated for a single file. Can verify whether it is safe to 203 /// apply these edits to a code block. 204 struct Edit { 205 tooling::Replacements Replacements; 206 std::string InitialCode; 207 EditEdit208 Edit(llvm::StringRef Code, tooling::Replacements Reps) 209 : Replacements(std::move(Reps)), InitialCode(Code) {} 210 211 /// Returns the file contents after changes are applied. 212 llvm::Expected<std::string> apply() const; 213 214 /// Represents Replacements as TextEdits that are available for use in LSP. 215 std::vector<TextEdit> asTextEdits() const; 216 217 /// Checks whether the Replacements are applicable to given Code. 218 bool canApplyTo(llvm::StringRef Code) const; 219 }; 220 /// A mapping from absolute file path (the one used for accessing the underlying 221 /// VFS) to edits. 222 using FileEdits = llvm::StringMap<Edit>; 223 224 /// Formats the edits and code around it according to Style. Changes 225 /// Replacements to formatted ones if succeeds. 226 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style); 227 228 /// Collects identifiers with counts in the source code. 229 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, 230 const format::FormatStyle &Style); 231 232 /// Collects all ranges of the given identifier in the source code. 233 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier, 234 llvm::StringRef Content, 235 const LangOptions &LangOpts); 236 237 /// Collects words from the source code. 238 /// Unlike collectIdentifiers: 239 /// - also finds text in comments: 240 /// - splits text into words 241 /// - drops stopwords like "get" and "for" 242 llvm::StringSet<> collectWords(llvm::StringRef Content); 243 244 /// Heuristically determine namespaces visible at a point, without parsing Code. 245 /// This considers using-directives and enclosing namespace-declarations that 246 /// are visible (and not obfuscated) in the file itself (not headers). 247 /// Code should be truncated at the point of interest. 248 /// 249 /// The returned vector is always non-empty. 250 /// - The first element is the namespace that encloses the point: a declaration 251 /// near the point would be within this namespace. 252 /// - The elements are the namespaces in scope at the point: an unqualified 253 /// lookup would search within these namespaces. 254 /// 255 /// Using directives are resolved against all enclosing scopes, but no other 256 /// namespace directives. 257 /// 258 /// example: 259 /// using namespace a; 260 /// namespace foo { 261 /// using namespace b; 262 /// 263 /// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::". 264 std::vector<std::string> visibleNamespaces(llvm::StringRef Code, 265 const format::FormatStyle &Style); 266 267 /// Represents locations that can accept a definition. 268 struct EligibleRegion { 269 /// Namespace that owns all of the EligiblePoints, e.g. 270 /// namespace a{ namespace b {^ void foo();^} } 271 /// It will be “a::b” for both carrot locations. 272 std::string EnclosingNamespace; 273 /// Offsets into the code marking eligible points to insert a function 274 /// definition. 275 std::vector<Position> EligiblePoints; 276 }; 277 278 /// Returns most eligible region to insert a definition for \p 279 /// FullyQualifiedName in the \p Code. 280 /// Pseudo parses \pCode under the hood to determine namespace decls and 281 /// possible insertion points. Choses the region that matches the longest prefix 282 /// of \p FullyQualifiedName. Returns EOF if there are no shared namespaces. 283 /// \p FullyQualifiedName should not contain anonymous namespaces. 284 EligibleRegion getEligiblePoints(llvm::StringRef Code, 285 llvm::StringRef FullyQualifiedName, 286 const format::FormatStyle &Style); 287 288 struct DefinedMacro { 289 llvm::StringRef Name; 290 const MacroInfo *Info; 291 }; 292 /// Gets the macro at a specified \p Loc. 293 llvm::Optional<DefinedMacro> locateMacroAt(SourceLocation Loc, 294 Preprocessor &PP); 295 296 /// Infers whether this is a header from the FileName and LangOpts (if 297 /// presents). 298 bool isHeaderFile(llvm::StringRef FileName, 299 llvm::Optional<LangOptions> LangOpts = llvm::None); 300 301 } // namespace clangd 302 } // namespace clang 303 #endif 304