1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Various code that examines C++ source code without using heavy AST machinery
10 // (and often not even the lexer). To be used sparingly!
11 //
12 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
14 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
15 
16 #include "Context.h"
17 #include "Protocol.h"
18 #include "clang/Basic/Diagnostic.h"
19 #include "clang/Basic/LangOptions.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Format/Format.h"
23 #include "clang/Tooling/Core/Replacement.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/StringSet.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/SHA1.h"
28 #include <string>
29 
30 namespace clang {
31 class SourceManager;
32 
33 namespace clangd {
34 
35 // We tend to generate digests for source codes in a lot of different places.
36 // This represents the type for those digests to prevent us hard coding details
37 // of hashing function at every place that needs to store this information.
38 using FileDigest = std::array<uint8_t, 8>;
39 FileDigest digest(StringRef Content);
40 Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
41 
42 // This context variable controls the behavior of functions in this file
43 // that convert between LSP offsets and native clang byte offsets.
44 // If not set, defaults to UTF-16 for backwards-compatibility.
45 extern Key<OffsetEncoding> kCurrentOffsetEncoding;
46 
47 // Counts the number of UTF-16 code units needed to represent a string (LSP
48 // specifies string lengths in UTF-16 code units).
49 // Use of UTF-16 may be overridden by kCurrentOffsetEncoding.
50 size_t lspLength(StringRef Code);
51 
52 /// Turn a [line, column] pair into an offset in Code.
53 ///
54 /// If P.character exceeds the line length, returns the offset at end-of-line.
55 /// (If !AllowColumnsBeyondLineLength, then returns an error instead).
56 /// If the line number is out of range, returns an error.
57 ///
58 /// The returned value is in the range [0, Code.size()].
59 llvm::Expected<size_t>
60 positionToOffset(llvm::StringRef Code, Position P,
61                  bool AllowColumnsBeyondLineLength = true);
62 
63 /// Turn an offset in Code into a [line, column] pair.
64 /// The offset must be in range [0, Code.size()].
65 Position offsetToPosition(llvm::StringRef Code, size_t Offset);
66 
67 /// Turn a SourceLocation into a [line, column] pair.
68 /// FIXME: This should return an error if the location is invalid.
69 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
70 
71 /// Returns the taken range at \p TokLoc.
72 llvm::Optional<Range> getTokenRange(const SourceManager &SM,
73                                     const LangOptions &LangOpts,
74                                     SourceLocation TokLoc);
75 
76 /// Return the file location, corresponding to \p P. Note that one should take
77 /// care to avoid comparing the result with expansion locations.
78 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
79                                                         Position P);
80 
81 /// Get the beginning SourceLocation at a specified \p Pos in the main file.
82 /// May be invalid if Pos is, or if there's no identifier or operators.
83 /// The returned position is in the main file, callers may prefer to
84 /// obtain the macro expansion location.
85 SourceLocation getBeginningOfIdentifier(const Position &Pos,
86                                         const SourceManager &SM,
87                                         const LangOptions &LangOpts);
88 
89 /// Returns true iff \p Loc is inside the main file. This function handles
90 /// file & macro locations. For macro locations, returns iff the macro is being
91 /// expanded inside the main file.
92 ///
93 /// The function is usually used to check whether a declaration is inside the
94 /// the main file.
95 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM);
96 
97 /// Returns the #include location through which IncludedFIle was loaded.
98 /// Where SM.getIncludeLoc() returns the location of the *filename*, which may
99 /// be in a macro, includeHashLoc() returns the location of the #.
100 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM);
101 
102 /// Returns true if the token at Loc is spelled in the source code.
103 /// This is not the case for:
104 ///   * symbols formed via macro concatenation, the spelling location will
105 ///     be "<scratch space>"
106 ///   * symbols controlled and defined by a compile command-line option
107 ///     `-DName=foo`, the spelling location will be "<command line>".
108 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM);
109 
110 /// Turns a token range into a half-open range and checks its correctness.
111 /// The resulting range will have only valid source location on both sides, both
112 /// of which are file locations.
113 ///
114 /// File locations always point to a particular offset in a file, i.e. they
115 /// never refer to a location inside a macro expansion. Turning locations from
116 /// macro expansions into file locations is ambiguous - one can use
117 /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function
118 /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion.
119 ///
120 /// User input (e.g. cursor position) is expressed as a file location, so this
121 /// function can be viewed as a way to normalize the ranges used in the clang
122 /// AST so that they are comparable with ranges coming from the user input.
123 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
124                                                 const LangOptions &LangOpts,
125                                                 SourceRange R);
126 
127 /// Returns true iff all of the following conditions hold:
128 ///   - start and end locations are valid,
129 ///   - start and end locations are file locations from the same file
130 ///     (i.e. expansion locations are not taken into account).
131 ///   - start offset <= end offset.
132 /// FIXME: introduce a type for source range with this invariant.
133 bool isValidFileRange(const SourceManager &Mgr, SourceRange R);
134 
135 /// Returns true iff \p L is contained in \p R.
136 /// EXPECTS: isValidFileRange(R) == true, L is a file location.
137 bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
138                            SourceLocation L);
139 
140 /// Returns true iff \p L is contained in \p R or \p L is equal to the end point
141 /// of \p R.
142 /// EXPECTS: isValidFileRange(R) == true, L is a file location.
143 bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
144                           SourceLocation L);
145 
146 /// Returns the source code covered by the source range.
147 /// EXPECTS: isValidFileRange(R) == true.
148 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R);
149 
150 // Converts a half-open clang source range to an LSP range.
151 // Note that clang also uses closed source ranges, which this can't handle!
152 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R);
153 
154 // Converts an offset to a clang line/column (1-based, columns are bytes).
155 // The offset must be in range [0, Code.size()].
156 // Prefer to use SourceManager if one is available.
157 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
158                                                   size_t Offset);
159 
160 /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no
161 /// qualifier.
162 std::pair<llvm::StringRef, llvm::StringRef>
163 splitQualifiedName(llvm::StringRef QName);
164 
165 TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R);
166 
167 std::vector<TextEdit> replacementsToEdits(StringRef Code,
168                                           const tooling::Replacements &Repls);
169 
170 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
171                     const LangOptions &L);
172 
173 /// Get the canonical path of \p F.  This means:
174 ///
175 ///   - Absolute path
176 ///   - Symlinks resolved
177 ///   - No "." or ".." component
178 ///   - No duplicate or trailing directory separator
179 ///
180 /// This function should be used when paths needs to be used outside the
181 /// component that generate it, so that paths are normalized as much as
182 /// possible.
183 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
184                                              const SourceManager &SourceMgr);
185 
186 bool isRangeConsecutive(const Range &Left, const Range &Right);
187 
188 /// Choose the clang-format style we should apply to a certain file.
189 /// This will usually use FS to look for .clang-format directories.
190 /// FIXME: should we be caching the .clang-format file search?
191 /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
192 /// though the latter may have been overridden in main()!
193 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
194                                           llvm::StringRef Content,
195                                           llvm::vfs::FileSystem *FS);
196 
197 /// Cleanup and format the given replacements.
198 llvm::Expected<tooling::Replacements>
199 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
200                  const format::FormatStyle &Style);
201 
202 /// A set of edits generated for a single file. Can verify whether it is safe to
203 /// apply these edits to a code block.
204 struct Edit {
205   tooling::Replacements Replacements;
206   std::string InitialCode;
207 
EditEdit208   Edit(llvm::StringRef Code, tooling::Replacements Reps)
209       : Replacements(std::move(Reps)), InitialCode(Code) {}
210 
211   /// Returns the file contents after changes are applied.
212   llvm::Expected<std::string> apply() const;
213 
214   /// Represents Replacements as TextEdits that are available for use in LSP.
215   std::vector<TextEdit> asTextEdits() const;
216 
217   /// Checks whether the Replacements are applicable to given Code.
218   bool canApplyTo(llvm::StringRef Code) const;
219 };
220 /// A mapping from absolute file path (the one used for accessing the underlying
221 /// VFS) to edits.
222 using FileEdits = llvm::StringMap<Edit>;
223 
224 /// Formats the edits and code around it according to Style. Changes
225 /// Replacements to formatted ones if succeeds.
226 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style);
227 
228 /// Collects identifiers with counts in the source code.
229 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
230                                              const format::FormatStyle &Style);
231 
232 /// Collects all ranges of the given identifier in the source code.
233 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
234                                            llvm::StringRef Content,
235                                            const LangOptions &LangOpts);
236 
237 /// Collects words from the source code.
238 /// Unlike collectIdentifiers:
239 /// - also finds text in comments:
240 /// - splits text into words
241 /// - drops stopwords like "get" and "for"
242 llvm::StringSet<> collectWords(llvm::StringRef Content);
243 
244 /// Heuristically determine namespaces visible at a point, without parsing Code.
245 /// This considers using-directives and enclosing namespace-declarations that
246 /// are visible (and not obfuscated) in the file itself (not headers).
247 /// Code should be truncated at the point of interest.
248 ///
249 /// The returned vector is always non-empty.
250 /// - The first element is the namespace that encloses the point: a declaration
251 ///   near the point would be within this namespace.
252 /// - The elements are the namespaces in scope at the point: an unqualified
253 ///   lookup would search within these namespaces.
254 ///
255 /// Using directives are resolved against all enclosing scopes, but no other
256 /// namespace directives.
257 ///
258 /// example:
259 ///   using namespace a;
260 ///   namespace foo {
261 ///     using namespace b;
262 ///
263 /// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::".
264 std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
265                                            const format::FormatStyle &Style);
266 
267 /// Represents locations that can accept a definition.
268 struct EligibleRegion {
269   /// Namespace that owns all of the EligiblePoints, e.g.
270   /// namespace a{ namespace b {^ void foo();^} }
271   /// It will be “a::b” for both carrot locations.
272   std::string EnclosingNamespace;
273   /// Offsets into the code marking eligible points to insert a function
274   /// definition.
275   std::vector<Position> EligiblePoints;
276 };
277 
278 /// Returns most eligible region to insert a definition for \p
279 /// FullyQualifiedName in the \p Code.
280 /// Pseudo parses \pCode under the hood to determine namespace decls and
281 /// possible insertion points. Choses the region that matches the longest prefix
282 /// of \p FullyQualifiedName. Returns EOF if there are no shared namespaces.
283 /// \p FullyQualifiedName should not contain anonymous namespaces.
284 EligibleRegion getEligiblePoints(llvm::StringRef Code,
285                                  llvm::StringRef FullyQualifiedName,
286                                  const format::FormatStyle &Style);
287 
288 struct DefinedMacro {
289   llvm::StringRef Name;
290   const MacroInfo *Info;
291 };
292 /// Gets the macro at a specified \p Loc.
293 llvm::Optional<DefinedMacro> locateMacroAt(SourceLocation Loc,
294                                            Preprocessor &PP);
295 
296 /// Infers whether this is a header from the FileName and LangOpts (if
297 /// presents).
298 bool isHeaderFile(llvm::StringRef FileName,
299                   llvm::Optional<LangOptions> LangOpts = llvm::None);
300 
301 } // namespace clangd
302 } // namespace clang
303 #endif
304