1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "SourceCode.h"
9 
10 #include "FuzzyMatch.h"
11 #include "Preamble.h"
12 #include "Protocol.h"
13 #include "refactor/Tweak.h"
14 #include "support/Context.h"
15 #include "support/Logger.h"
16 #include "support/Threading.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/Basic/LangOptions.h"
19 #include "clang/Basic/SourceLocation.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Driver/Types.h"
23 #include "clang/Format/Format.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Lex/Preprocessor.h"
26 #include "clang/Lex/Token.h"
27 #include "clang/Tooling/Core/Replacement.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/None.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/ADT/StringMap.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/LineIterator.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/SHA1.h"
43 #include "llvm/Support/VirtualFileSystem.h"
44 #include "llvm/Support/xxhash.h"
45 #include <algorithm>
46 #include <cstddef>
47 #include <string>
48 #include <vector>
49 
50 namespace clang {
51 namespace clangd {
52 
53 // Here be dragons. LSP positions use columns measured in *UTF-16 code units*!
54 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial.
55 
56 // Iterates over unicode codepoints in the (UTF-8) string. For each,
57 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true.
58 // Returns true if CB returned true, false if we hit the end of string.
59 //
60 // If the string is not valid UTF-8, we log this error and "decode" the
61 // text in some arbitrary way. This is pretty sad, but this tends to happen deep
62 // within indexing of headers where clang misdetected the encoding, and
63 // propagating the error all the way back up is (probably?) not be worth it.
64 template <typename Callback>
iterateCodepoints(llvm::StringRef U8,const Callback & CB)65 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) {
66   bool LoggedInvalid = false;
67   // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
68   // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
69   for (size_t I = 0; I < U8.size();) {
70     unsigned char C = static_cast<unsigned char>(U8[I]);
71     if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character.
72       if (CB(1, 1))
73         return true;
74       ++I;
75       continue;
76     }
77     // This convenient property of UTF-8 holds for all non-ASCII characters.
78     size_t UTF8Length = llvm::countLeadingOnes(C);
79     // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here.
80     // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*.
81     if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) {
82       if (!LoggedInvalid) {
83         elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8));
84         LoggedInvalid = true;
85       }
86       // We can't give a correct result, but avoid returning something wild.
87       // Pretend this is a valid ASCII byte, for lack of better options.
88       // (Too late to get ISO-8859-* right, we've skipped some bytes already).
89       if (CB(1, 1))
90         return true;
91       ++I;
92       continue;
93     }
94     I += UTF8Length; // Skip over all trailing bytes.
95     // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
96     // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...)
97     if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1))
98       return true;
99   }
100   return false;
101 }
102 
103 // Returns the byte offset into the string that is an offset of \p Units in
104 // the specified encoding.
105 // Conceptually, this converts to the encoding, truncates to CodeUnits,
106 // converts back to UTF-8, and returns the length in bytes.
measureUnits(llvm::StringRef U8,int Units,OffsetEncoding Enc,bool & Valid)107 static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc,
108                            bool &Valid) {
109   Valid = Units >= 0;
110   if (Units <= 0)
111     return 0;
112   size_t Result = 0;
113   switch (Enc) {
114   case OffsetEncoding::UTF8:
115     Result = Units;
116     break;
117   case OffsetEncoding::UTF16:
118     Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
119       Result += U8Len;
120       Units -= U16Len;
121       return Units <= 0;
122     });
123     if (Units < 0) // Offset in the middle of a surrogate pair.
124       Valid = false;
125     break;
126   case OffsetEncoding::UTF32:
127     Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) {
128       Result += U8Len;
129       Units--;
130       return Units <= 0;
131     });
132     break;
133   case OffsetEncoding::UnsupportedEncoding:
134     llvm_unreachable("unsupported encoding");
135   }
136   // Don't return an out-of-range index if we overran.
137   if (Result > U8.size()) {
138     Valid = false;
139     return U8.size();
140   }
141   return Result;
142 }
143 
144 Key<OffsetEncoding> kCurrentOffsetEncoding;
lspEncoding()145 static OffsetEncoding lspEncoding() {
146   auto *Enc = Context::current().get(kCurrentOffsetEncoding);
147   return Enc ? *Enc : OffsetEncoding::UTF16;
148 }
149 
150 // Like most strings in clangd, the input is UTF-8 encoded.
lspLength(llvm::StringRef Code)151 size_t lspLength(llvm::StringRef Code) {
152   size_t Count = 0;
153   switch (lspEncoding()) {
154   case OffsetEncoding::UTF8:
155     Count = Code.size();
156     break;
157   case OffsetEncoding::UTF16:
158     iterateCodepoints(Code, [&](int U8Len, int U16Len) {
159       Count += U16Len;
160       return false;
161     });
162     break;
163   case OffsetEncoding::UTF32:
164     iterateCodepoints(Code, [&](int U8Len, int U16Len) {
165       ++Count;
166       return false;
167     });
168     break;
169   case OffsetEncoding::UnsupportedEncoding:
170     llvm_unreachable("unsupported encoding");
171   }
172   return Count;
173 }
174 
positionToOffset(llvm::StringRef Code,Position P,bool AllowColumnsBeyondLineLength)175 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P,
176                                         bool AllowColumnsBeyondLineLength) {
177   if (P.line < 0)
178     return llvm::make_error<llvm::StringError>(
179         llvm::formatv("Line value can't be negative ({0})", P.line),
180         llvm::errc::invalid_argument);
181   if (P.character < 0)
182     return llvm::make_error<llvm::StringError>(
183         llvm::formatv("Character value can't be negative ({0})", P.character),
184         llvm::errc::invalid_argument);
185   size_t StartOfLine = 0;
186   for (int I = 0; I != P.line; ++I) {
187     size_t NextNL = Code.find('\n', StartOfLine);
188     if (NextNL == llvm::StringRef::npos)
189       return llvm::make_error<llvm::StringError>(
190           llvm::formatv("Line value is out of range ({0})", P.line),
191           llvm::errc::invalid_argument);
192     StartOfLine = NextNL + 1;
193   }
194   StringRef Line =
195       Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; });
196 
197   // P.character may be in UTF-16, transcode if necessary.
198   bool Valid;
199   size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid);
200   if (!Valid && !AllowColumnsBeyondLineLength)
201     return llvm::make_error<llvm::StringError>(
202         llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(),
203                       P.character, P.line),
204         llvm::errc::invalid_argument);
205   return StartOfLine + ByteInLine;
206 }
207 
offsetToPosition(llvm::StringRef Code,size_t Offset)208 Position offsetToPosition(llvm::StringRef Code, size_t Offset) {
209   Offset = std::min(Code.size(), Offset);
210   llvm::StringRef Before = Code.substr(0, Offset);
211   int Lines = Before.count('\n');
212   size_t PrevNL = Before.rfind('\n');
213   size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
214   Position Pos;
215   Pos.line = Lines;
216   Pos.character = lspLength(Before.substr(StartOfLine));
217   return Pos;
218 }
219 
sourceLocToPosition(const SourceManager & SM,SourceLocation Loc)220 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) {
221   // We use the SourceManager's line tables, but its column number is in bytes.
222   FileID FID;
223   unsigned Offset;
224   std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc);
225   Position P;
226   P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1;
227   bool Invalid = false;
228   llvm::StringRef Code = SM.getBufferData(FID, &Invalid);
229   if (!Invalid) {
230     auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
231     auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
232     P.character = lspLength(LineSoFar);
233   }
234   return P;
235 }
236 
isSpelledInSource(SourceLocation Loc,const SourceManager & SM)237 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) {
238   if (Loc.isMacroID()) {
239     std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM);
240     if (llvm::StringRef(PrintLoc).startswith("<scratch") ||
241         llvm::StringRef(PrintLoc).startswith("<command line>"))
242       return false;
243   }
244   return true;
245 }
246 
isValidFileRange(const SourceManager & Mgr,SourceRange R)247 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
248   if (!R.getBegin().isValid() || !R.getEnd().isValid())
249     return false;
250 
251   FileID BeginFID;
252   size_t BeginOffset = 0;
253   std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
254 
255   FileID EndFID;
256   size_t EndOffset = 0;
257   std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd());
258 
259   return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
260 }
261 
includeHashLoc(FileID IncludedFile,const SourceManager & SM)262 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) {
263   assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
264   FileID IncludingFile;
265   unsigned Offset;
266   std::tie(IncludingFile, Offset) =
267       SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile));
268   bool Invalid = false;
269   llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid);
270   if (Invalid)
271     return SourceLocation();
272   // Now buf is "...\n#include <foo>\n..."
273   // and Offset points here:   ^
274   // Rewind to the preceding # on the line.
275   assert(Offset < Buf.size());
276   for (;; --Offset) {
277     if (Buf[Offset] == '#')
278       return SM.getComposedLoc(IncludingFile, Offset);
279     if (Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on?
280       return SourceLocation();
281   }
282 }
283 
getTokenLengthAtLoc(SourceLocation Loc,const SourceManager & SM,const LangOptions & LangOpts)284 static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM,
285                                     const LangOptions &LangOpts) {
286   Token TheTok;
287   if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts))
288     return 0;
289   // FIXME: Here we check whether the token at the location is a greatergreater
290   // (>>) token and consider it as a single greater (>). This is to get it
291   // working for templates but it isn't correct for the right shift operator. We
292   // can avoid this by using half open char ranges in getFileRange() but getting
293   // token ending is not well supported in macroIDs.
294   if (TheTok.is(tok::greatergreater))
295     return 1;
296   return TheTok.getLength();
297 }
298 
299 // Returns location of the last character of the token at a given loc
getLocForTokenEnd(SourceLocation BeginLoc,const SourceManager & SM,const LangOptions & LangOpts)300 static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc,
301                                         const SourceManager &SM,
302                                         const LangOptions &LangOpts) {
303   unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts);
304   return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0);
305 }
306 
307 // Returns location of the starting of the token at a given EndLoc
getLocForTokenBegin(SourceLocation EndLoc,const SourceManager & SM,const LangOptions & LangOpts)308 static SourceLocation getLocForTokenBegin(SourceLocation EndLoc,
309                                           const SourceManager &SM,
310                                           const LangOptions &LangOpts) {
311   return EndLoc.getLocWithOffset(
312       -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts));
313 }
314 
315 // Converts a char source range to a token range.
toTokenRange(CharSourceRange Range,const SourceManager & SM,const LangOptions & LangOpts)316 static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM,
317                                 const LangOptions &LangOpts) {
318   if (!Range.isTokenRange())
319     Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts));
320   return Range.getAsRange();
321 }
322 // Returns the union of two token ranges.
323 // To find the maximum of the Ends of the ranges, we compare the location of the
324 // last character of the token.
unionTokenRange(SourceRange R1,SourceRange R2,const SourceManager & SM,const LangOptions & LangOpts)325 static SourceRange unionTokenRange(SourceRange R1, SourceRange R2,
326                                    const SourceManager &SM,
327                                    const LangOptions &LangOpts) {
328   SourceLocation Begin =
329       SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin())
330           ? R1.getBegin()
331           : R2.getBegin();
332   SourceLocation End =
333       SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts),
334                                    getLocForTokenEnd(R2.getEnd(), SM, LangOpts))
335           ? R2.getEnd()
336           : R1.getEnd();
337   return SourceRange(Begin, End);
338 }
339 
340 // Given a range whose endpoints may be in different expansions or files,
341 // tries to find a range within a common file by following up the expansion and
342 // include location in each.
rangeInCommonFile(SourceRange R,const SourceManager & SM,const LangOptions & LangOpts)343 static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM,
344                                      const LangOptions &LangOpts) {
345   // Fast path for most common cases.
346   if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd()))
347     return R;
348   // Record the stack of expansion locations for the beginning, keyed by FileID.
349   llvm::DenseMap<FileID, SourceLocation> BeginExpansions;
350   for (SourceLocation Begin = R.getBegin(); Begin.isValid();
351        Begin = Begin.isFileID()
352                    ? includeHashLoc(SM.getFileID(Begin), SM)
353                    : SM.getImmediateExpansionRange(Begin).getBegin()) {
354     BeginExpansions[SM.getFileID(Begin)] = Begin;
355   }
356   // Move up the stack of expansion locations for the end until we find the
357   // location in BeginExpansions with that has the same file id.
358   for (SourceLocation End = R.getEnd(); End.isValid();
359        End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM)
360                             : toTokenRange(SM.getImmediateExpansionRange(End),
361                                            SM, LangOpts)
362                                   .getEnd()) {
363     auto It = BeginExpansions.find(SM.getFileID(End));
364     if (It != BeginExpansions.end()) {
365       if (SM.getFileOffset(It->second) > SM.getFileOffset(End))
366         return SourceLocation();
367       return {It->second, End};
368     }
369   }
370   return SourceRange();
371 }
372 
373 // Find an expansion range (not necessarily immediate) the ends of which are in
374 // the same file id.
375 static SourceRange
getExpansionTokenRangeInSameFile(SourceLocation Loc,const SourceManager & SM,const LangOptions & LangOpts)376 getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM,
377                                  const LangOptions &LangOpts) {
378   return rangeInCommonFile(
379       toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM,
380       LangOpts);
381 }
382 
383 // Returns the file range for a given Location as a Token Range
384 // This is quite similar to getFileLoc in SourceManager as both use
385 // getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs).
386 // However:
387 // - We want to maintain the full range information as we move from one file to
388 //   the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange.
389 // - We want to split '>>' tokens as the lexer parses the '>>' in nested
390 //   template instantiations as a '>>' instead of two '>'s.
391 // There is also getExpansionRange but it simply calls
392 // getImmediateExpansionRange on the begin and ends separately which is wrong.
getTokenFileRange(SourceLocation Loc,const SourceManager & SM,const LangOptions & LangOpts)393 static SourceRange getTokenFileRange(SourceLocation Loc,
394                                      const SourceManager &SM,
395                                      const LangOptions &LangOpts) {
396   SourceRange FileRange = Loc;
397   while (!FileRange.getBegin().isFileID()) {
398     if (SM.isMacroArgExpansion(FileRange.getBegin())) {
399       FileRange = unionTokenRange(
400           SM.getImmediateSpellingLoc(FileRange.getBegin()),
401           SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts);
402       assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd()));
403     } else {
404       SourceRange ExpansionRangeForBegin =
405           getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts);
406       SourceRange ExpansionRangeForEnd =
407           getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts);
408       if (ExpansionRangeForBegin.isInvalid() ||
409           ExpansionRangeForEnd.isInvalid())
410         return SourceRange();
411       assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(),
412                                     ExpansionRangeForEnd.getBegin()) &&
413              "Both Expansion ranges should be in same file.");
414       FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd,
415                                   SM, LangOpts);
416     }
417   }
418   return FileRange;
419 }
420 
isInsideMainFile(SourceLocation Loc,const SourceManager & SM)421 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) {
422   if (!Loc.isValid())
423     return false;
424   FileID FID = SM.getFileID(SM.getExpansionLoc(Loc));
425   return FID == SM.getMainFileID() || FID == SM.getPreambleFileID();
426 }
427 
toHalfOpenFileRange(const SourceManager & SM,const LangOptions & LangOpts,SourceRange R)428 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM,
429                                                 const LangOptions &LangOpts,
430                                                 SourceRange R) {
431   SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts);
432   if (!isValidFileRange(SM, R1))
433     return llvm::None;
434 
435   SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts);
436   if (!isValidFileRange(SM, R2))
437     return llvm::None;
438 
439   SourceRange Result =
440       rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts);
441   unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts);
442   // Convert from closed token range to half-open (char) range
443   Result.setEnd(Result.getEnd().getLocWithOffset(TokLen));
444   if (!isValidFileRange(SM, Result))
445     return llvm::None;
446 
447   return Result;
448 }
449 
toSourceCode(const SourceManager & SM,SourceRange R)450 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) {
451   assert(isValidFileRange(SM, R));
452   bool Invalid = false;
453   auto *Buf = SM.getBuffer(SM.getFileID(R.getBegin()), &Invalid);
454   assert(!Invalid);
455 
456   size_t BeginOffset = SM.getFileOffset(R.getBegin());
457   size_t EndOffset = SM.getFileOffset(R.getEnd());
458   return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset);
459 }
460 
sourceLocationInMainFile(const SourceManager & SM,Position P)461 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
462                                                         Position P) {
463   llvm::StringRef Code = SM.getBuffer(SM.getMainFileID())->getBuffer();
464   auto Offset =
465       positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false);
466   if (!Offset)
467     return Offset.takeError();
468   return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset);
469 }
470 
halfOpenToRange(const SourceManager & SM,CharSourceRange R)471 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) {
472   // Clang is 1-based, LSP uses 0-based indexes.
473   Position Begin = sourceLocToPosition(SM, R.getBegin());
474   Position End = sourceLocToPosition(SM, R.getEnd());
475 
476   return {Begin, End};
477 }
478 
offsetToClangLineColumn(llvm::StringRef Code,size_t Offset)479 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
480                                                   size_t Offset) {
481   Offset = std::min(Code.size(), Offset);
482   llvm::StringRef Before = Code.substr(0, Offset);
483   int Lines = Before.count('\n');
484   size_t PrevNL = Before.rfind('\n');
485   size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1);
486   return {Lines + 1, Offset - StartOfLine + 1};
487 }
488 
splitQualifiedName(StringRef QName)489 std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) {
490   size_t Pos = QName.rfind("::");
491   if (Pos == llvm::StringRef::npos)
492     return {llvm::StringRef(), QName};
493   return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)};
494 }
495 
replacementToEdit(llvm::StringRef Code,const tooling::Replacement & R)496 TextEdit replacementToEdit(llvm::StringRef Code,
497                            const tooling::Replacement &R) {
498   Range ReplacementRange = {
499       offsetToPosition(Code, R.getOffset()),
500       offsetToPosition(Code, R.getOffset() + R.getLength())};
501   return {ReplacementRange, std::string(R.getReplacementText())};
502 }
503 
replacementsToEdits(llvm::StringRef Code,const tooling::Replacements & Repls)504 std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code,
505                                           const tooling::Replacements &Repls) {
506   std::vector<TextEdit> Edits;
507   for (const auto &R : Repls)
508     Edits.push_back(replacementToEdit(Code, R));
509   return Edits;
510 }
511 
getCanonicalPath(const FileEntry * F,const SourceManager & SourceMgr)512 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
513                                              const SourceManager &SourceMgr) {
514   if (!F)
515     return None;
516 
517   llvm::SmallString<128> FilePath = F->getName();
518   if (!llvm::sys::path::is_absolute(FilePath)) {
519     if (auto EC =
520             SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute(
521                 FilePath)) {
522       elog("Could not turn relative path '{0}' to absolute: {1}", FilePath,
523            EC.message());
524       return None;
525     }
526   }
527 
528   // Handle the symbolic link path case where the current working directory
529   // (getCurrentWorkingDirectory) is a symlink. We always want to the real
530   // file path (instead of the symlink path) for the  C++ symbols.
531   //
532   // Consider the following example:
533   //
534   //   src dir: /project/src/foo.h
535   //   current working directory (symlink): /tmp/build -> /project/src/
536   //
537   //  The file path of Symbol is "/project/src/foo.h" instead of
538   //  "/tmp/build/foo.h"
539   if (auto Dir = SourceMgr.getFileManager().getDirectory(
540           llvm::sys::path::parent_path(FilePath))) {
541     llvm::SmallString<128> RealPath;
542     llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(*Dir);
543     llvm::sys::path::append(RealPath, DirName,
544                             llvm::sys::path::filename(FilePath));
545     return RealPath.str().str();
546   }
547 
548   return FilePath.str().str();
549 }
550 
toTextEdit(const FixItHint & FixIt,const SourceManager & M,const LangOptions & L)551 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
552                     const LangOptions &L) {
553   TextEdit Result;
554   Result.range =
555       halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L));
556   Result.newText = FixIt.CodeToInsert;
557   return Result;
558 }
559 
digest(llvm::StringRef Content)560 FileDigest digest(llvm::StringRef Content) {
561   uint64_t Hash{llvm::xxHash64(Content)};
562   FileDigest Result;
563   for (unsigned I = 0; I < Result.size(); ++I) {
564     Result[I] = uint8_t(Hash);
565     Hash >>= 8;
566   }
567   return Result;
568 }
569 
digestFile(const SourceManager & SM,FileID FID)570 llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
571   bool Invalid = false;
572   llvm::StringRef Content = SM.getBufferData(FID, &Invalid);
573   if (Invalid)
574     return None;
575   return digest(Content);
576 }
577 
getFormatStyleForFile(llvm::StringRef File,llvm::StringRef Content,const ThreadsafeFS & TFS)578 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
579                                           llvm::StringRef Content,
580                                           const ThreadsafeFS &TFS) {
581   auto Style = format::getStyle(format::DefaultFormatStyle, File,
582                                 format::DefaultFallbackStyle, Content,
583                                 TFS.view(/*CWD=*/llvm::None).get());
584   if (!Style) {
585     log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File,
586         Style.takeError());
587     return format::getLLVMStyle();
588   }
589   return *Style;
590 }
591 
592 llvm::Expected<tooling::Replacements>
cleanupAndFormat(StringRef Code,const tooling::Replacements & Replaces,const format::FormatStyle & Style)593 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
594                  const format::FormatStyle &Style) {
595   auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style);
596   if (!CleanReplaces)
597     return CleanReplaces;
598   return formatReplacements(Code, std::move(*CleanReplaces), Style);
599 }
600 
601 static void
lex(llvm::StringRef Code,const LangOptions & LangOpts,llvm::function_ref<void (const syntax::Token &,const SourceManager & SM)> Action)602 lex(llvm::StringRef Code, const LangOptions &LangOpts,
603     llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
604         Action) {
605   // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
606   std::string NullTerminatedCode = Code.str();
607   SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
608   auto &SM = FileSM.get();
609   for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
610     Action(Tok, SM);
611 }
612 
collectIdentifiers(llvm::StringRef Content,const format::FormatStyle & Style)613 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
614                                              const format::FormatStyle &Style) {
615   llvm::StringMap<unsigned> Identifiers;
616   auto LangOpt = format::getFormattingLangOpts(Style);
617   lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) {
618     if (Tok.kind() == tok::identifier)
619       ++Identifiers[Tok.text(SM)];
620     // FIXME: Should this function really return keywords too ?
621     else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind()))
622       ++Identifiers[Keyword];
623   });
624   return Identifiers;
625 }
626 
collectIdentifierRanges(llvm::StringRef Identifier,llvm::StringRef Content,const LangOptions & LangOpts)627 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
628                                            llvm::StringRef Content,
629                                            const LangOptions &LangOpts) {
630   std::vector<Range> Ranges;
631   lex(Content, LangOpts,
632       [&](const syntax::Token &Tok, const SourceManager &SM) {
633         if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier)
634           return;
635         Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM)));
636       });
637   return Ranges;
638 }
639 
640 namespace {
641 struct NamespaceEvent {
642   enum {
643     BeginNamespace, // namespace <ns> {.     Payload is resolved <ns>.
644     EndNamespace,   // } // namespace <ns>.  Payload is resolved *outer*
645                     // namespace.
646     UsingDirective  // using namespace <ns>. Payload is unresolved <ns>.
647   } Trigger;
648   std::string Payload;
649   Position Pos;
650 };
651 // Scans C++ source code for constructs that change the visible namespaces.
parseNamespaceEvents(llvm::StringRef Code,const LangOptions & LangOpts,llvm::function_ref<void (NamespaceEvent)> Callback)652 void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts,
653                           llvm::function_ref<void(NamespaceEvent)> Callback) {
654 
655   // Stack of enclosing namespaces, e.g. {"clang", "clangd"}
656   std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
657   // Stack counts open braces. true if the brace opened a namespace.
658   std::vector<bool> BraceStack;
659 
660   enum {
661     Default,
662     Namespace,          // just saw 'namespace'
663     NamespaceName,      // just saw 'namespace' NSName
664     Using,              // just saw 'using'
665     UsingNamespace,     // just saw 'using namespace'
666     UsingNamespaceName, // just saw 'using namespace' NSName
667   } State = Default;
668   std::string NSName;
669 
670   NamespaceEvent Event;
671   lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) {
672     Event.Pos = sourceLocToPosition(SM, Tok.location());
673     switch (Tok.kind()) {
674     case tok::kw_using:
675       State = State == Default ? Using : Default;
676       break;
677     case tok::kw_namespace:
678       switch (State) {
679       case Using:
680         State = UsingNamespace;
681         break;
682       case Default:
683         State = Namespace;
684         break;
685       default:
686         State = Default;
687         break;
688       }
689       break;
690     case tok::identifier:
691       switch (State) {
692       case UsingNamespace:
693         NSName.clear();
694         LLVM_FALLTHROUGH;
695       case UsingNamespaceName:
696         NSName.append(Tok.text(SM).str());
697         State = UsingNamespaceName;
698         break;
699       case Namespace:
700         NSName.clear();
701         LLVM_FALLTHROUGH;
702       case NamespaceName:
703         NSName.append(Tok.text(SM).str());
704         State = NamespaceName;
705         break;
706       case Using:
707       case Default:
708         State = Default;
709         break;
710       }
711       break;
712     case tok::coloncolon:
713       // This can come at the beginning or in the middle of a namespace
714       // name.
715       switch (State) {
716       case UsingNamespace:
717         NSName.clear();
718         LLVM_FALLTHROUGH;
719       case UsingNamespaceName:
720         NSName.append("::");
721         State = UsingNamespaceName;
722         break;
723       case NamespaceName:
724         NSName.append("::");
725         State = NamespaceName;
726         break;
727       case Namespace: // Not legal here.
728       case Using:
729       case Default:
730         State = Default;
731         break;
732       }
733       break;
734     case tok::l_brace:
735       // Record which { started a namespace, so we know when } ends one.
736       if (State == NamespaceName) {
737         // Parsed: namespace <name> {
738         BraceStack.push_back(true);
739         Enclosing.push_back(NSName);
740         Event.Trigger = NamespaceEvent::BeginNamespace;
741         Event.Payload = llvm::join(Enclosing, "::");
742         Callback(Event);
743       } else {
744         // This case includes anonymous namespaces (State = Namespace).
745         // For our purposes, they're not namespaces and we ignore them.
746         BraceStack.push_back(false);
747       }
748       State = Default;
749       break;
750     case tok::r_brace:
751       // If braces are unmatched, we're going to be confused, but don't
752       // crash.
753       if (!BraceStack.empty()) {
754         if (BraceStack.back()) {
755           // Parsed: } // namespace
756           Enclosing.pop_back();
757           Event.Trigger = NamespaceEvent::EndNamespace;
758           Event.Payload = llvm::join(Enclosing, "::");
759           Callback(Event);
760         }
761         BraceStack.pop_back();
762       }
763       break;
764     case tok::semi:
765       if (State == UsingNamespaceName) {
766         // Parsed: using namespace <name> ;
767         Event.Trigger = NamespaceEvent::UsingDirective;
768         Event.Payload = std::move(NSName);
769         Callback(Event);
770       }
771       State = Default;
772       break;
773     default:
774       State = Default;
775       break;
776     }
777   });
778 }
779 
780 // Returns the prefix namespaces of NS: {"" ... NS}.
ancestorNamespaces(llvm::StringRef NS)781 llvm::SmallVector<llvm::StringRef, 8> ancestorNamespaces(llvm::StringRef NS) {
782   llvm::SmallVector<llvm::StringRef, 8> Results;
783   Results.push_back(NS.take_front(0));
784   NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
785   for (llvm::StringRef &R : Results)
786     R = NS.take_front(R.end() - NS.begin());
787   return Results;
788 }
789 
790 } // namespace
791 
visibleNamespaces(llvm::StringRef Code,const LangOptions & LangOpts)792 std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
793                                            const LangOptions &LangOpts) {
794   std::string Current;
795   // Map from namespace to (resolved) namespaces introduced via using directive.
796   llvm::StringMap<llvm::StringSet<>> UsingDirectives;
797 
798   parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) {
799     llvm::StringRef NS = Event.Payload;
800     switch (Event.Trigger) {
801     case NamespaceEvent::BeginNamespace:
802     case NamespaceEvent::EndNamespace:
803       Current = std::move(Event.Payload);
804       break;
805     case NamespaceEvent::UsingDirective:
806       if (NS.consume_front("::"))
807         UsingDirectives[Current].insert(NS);
808       else {
809         for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
810           if (Enclosing.empty())
811             UsingDirectives[Current].insert(NS);
812           else
813             UsingDirectives[Current].insert((Enclosing + "::" + NS).str());
814         }
815       }
816       break;
817     }
818   });
819 
820   std::vector<std::string> Found;
821   for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) {
822     Found.push_back(std::string(Enclosing));
823     auto It = UsingDirectives.find(Enclosing);
824     if (It != UsingDirectives.end())
825       for (const auto &Used : It->second)
826         Found.push_back(std::string(Used.getKey()));
827   }
828 
829   llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) {
830     if (Current == RHS)
831       return false;
832     if (Current == LHS)
833       return true;
834     return LHS < RHS;
835   });
836   Found.erase(std::unique(Found.begin(), Found.end()), Found.end());
837   return Found;
838 }
839 
collectWords(llvm::StringRef Content)840 llvm::StringSet<> collectWords(llvm::StringRef Content) {
841   // We assume short words are not significant.
842   // We may want to consider other stopwords, e.g. language keywords.
843   // (A very naive implementation showed no benefit, but lexing might do better)
844   static constexpr int MinWordLength = 4;
845 
846   std::vector<CharRole> Roles(Content.size());
847   calculateRoles(Content, Roles);
848 
849   llvm::StringSet<> Result;
850   llvm::SmallString<256> Word;
851   auto Flush = [&] {
852     if (Word.size() >= MinWordLength) {
853       for (char &C : Word)
854         C = llvm::toLower(C);
855       Result.insert(Word);
856     }
857     Word.clear();
858   };
859   for (unsigned I = 0; I < Content.size(); ++I) {
860     switch (Roles[I]) {
861     case Head:
862       Flush();
863       LLVM_FALLTHROUGH;
864     case Tail:
865       Word.push_back(Content[I]);
866       break;
867     case Unknown:
868     case Separator:
869       Flush();
870       break;
871     }
872   }
873   Flush();
874 
875   return Result;
876 }
877 
isLikelyIdentifier(llvm::StringRef Word,llvm::StringRef Before,llvm::StringRef After)878 static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before,
879                                llvm::StringRef After) {
880   // `foo` is an identifier.
881   if (Before.endswith("`") && After.startswith("`"))
882     return true;
883   // In foo::bar, both foo and bar are identifiers.
884   if (Before.endswith("::") || After.startswith("::"))
885     return true;
886   // Doxygen tags like \c foo indicate identifiers.
887   // Don't search too far back.
888   // This duplicates clang's doxygen parser, revisit if it gets complicated.
889   Before = Before.take_back(100); // Don't search too far back.
890   auto Pos = Before.find_last_of("\\@");
891   if (Pos != llvm::StringRef::npos) {
892     llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' ');
893     if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" ||
894         Tag == "param" || Tag == "param[in]" || Tag == "param[out]" ||
895         Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" ||
896         Tag == "throws" || Tag == "link")
897       return true;
898   }
899 
900   // Word contains underscore.
901   // This handles things like snake_case and MACRO_CASE.
902   if (Word.contains('_')) {
903     return true;
904   }
905   // Word contains capital letter other than at beginning.
906   // This handles things like lowerCamel and UpperCamel.
907   // The check for also containing a lowercase letter is to rule out
908   // initialisms like "HTTP".
909   bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos;
910   bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos;
911   if (HasLower && HasUpper) {
912     return true;
913   }
914   // FIXME: consider mid-sentence Capitalization?
915   return false;
916 }
917 
touching(SourceLocation SpelledLoc,const syntax::TokenBuffer & TB,const LangOptions & LangOpts)918 llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc,
919                                                   const syntax::TokenBuffer &TB,
920                                                   const LangOptions &LangOpts) {
921   const auto &SM = TB.sourceManager();
922   auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB);
923   for (const auto &T : Touching) {
924     // If the token is an identifier or a keyword, don't use any heuristics.
925     if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) {
926       SpelledWord Result;
927       Result.Location = T.location();
928       Result.Text = T.text(SM);
929       Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind());
930       Result.PartOfSpelledToken = &T;
931       Result.SpelledToken = &T;
932       auto Expanded =
933           TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location()));
934       if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text)
935         Result.ExpandedToken = &Expanded.front();
936       return Result;
937     }
938   }
939   FileID File;
940   unsigned Offset;
941   std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc);
942   bool Invalid = false;
943   llvm::StringRef Code = SM.getBufferData(File, &Invalid);
944   if (Invalid)
945     return llvm::None;
946   unsigned B = Offset, E = Offset;
947   while (B > 0 && isIdentifierBody(Code[B - 1]))
948     --B;
949   while (E < Code.size() && isIdentifierBody(Code[E]))
950     ++E;
951   if (B == E)
952     return llvm::None;
953 
954   SpelledWord Result;
955   Result.Location = SM.getComposedLoc(File, B);
956   Result.Text = Code.slice(B, E);
957   Result.LikelyIdentifier =
958       isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) &&
959       // should not be a keyword
960       tok::isAnyIdentifier(
961           IdentifierTable(LangOpts).get(Result.Text).getTokenID());
962   for (const auto &T : Touching)
963     if (T.location() <= Result.Location)
964       Result.PartOfSpelledToken = &T;
965   return Result;
966 }
967 
locateMacroAt(const syntax::Token & SpelledTok,Preprocessor & PP)968 llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
969                                            Preprocessor &PP) {
970   SourceLocation Loc = SpelledTok.location();
971   assert(Loc.isFileID());
972   const auto &SM = PP.getSourceManager();
973   IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM));
974   if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition())
975     return None;
976 
977   // Get the definition just before the searched location so that a macro
978   // referenced in a '#undef MACRO' can still be found. Note that we only do
979   // that if Loc is not pointing at start of file.
980   if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc)
981     Loc = Loc.getLocWithOffset(-1);
982   MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc);
983   if (auto *MI = MacroDef.getMacroInfo())
984     return DefinedMacro{
985         IdentifierInfo->getName(), MI,
986         translatePreamblePatchLocation(MI->getDefinitionLoc(), SM)};
987   return None;
988 }
989 
apply() const990 llvm::Expected<std::string> Edit::apply() const {
991   return tooling::applyAllReplacements(InitialCode, Replacements);
992 }
993 
asTextEdits() const994 std::vector<TextEdit> Edit::asTextEdits() const {
995   return replacementsToEdits(InitialCode, Replacements);
996 }
997 
canApplyTo(llvm::StringRef Code) const998 bool Edit::canApplyTo(llvm::StringRef Code) const {
999   // Create line iterators, since line numbers are important while applying our
1000   // edit we cannot skip blank lines.
1001   auto LHS = llvm::MemoryBuffer::getMemBuffer(Code);
1002   llvm::line_iterator LHSIt(*LHS, /*SkipBlanks=*/false);
1003 
1004   auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode);
1005   llvm::line_iterator RHSIt(*RHS, /*SkipBlanks=*/false);
1006 
1007   // Compare the InitialCode we prepared the edit for with the Code we received
1008   // line by line to make sure there are no differences.
1009   // FIXME: This check is too conservative now, it should be enough to only
1010   // check lines around the replacements contained inside the Edit.
1011   while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) {
1012     if (*LHSIt != *RHSIt)
1013       return false;
1014     ++LHSIt;
1015     ++RHSIt;
1016   }
1017 
1018   // After we reach EOF for any of the files we make sure the other one doesn't
1019   // contain any additional content except empty lines, they should not
1020   // interfere with the edit we produced.
1021   while (!LHSIt.is_at_eof()) {
1022     if (!LHSIt->empty())
1023       return false;
1024     ++LHSIt;
1025   }
1026   while (!RHSIt.is_at_eof()) {
1027     if (!RHSIt->empty())
1028       return false;
1029     ++RHSIt;
1030   }
1031   return true;
1032 }
1033 
reformatEdit(Edit & E,const format::FormatStyle & Style)1034 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) {
1035   if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style))
1036     E.Replacements = std::move(*NewEdits);
1037   else
1038     return NewEdits.takeError();
1039   return llvm::Error::success();
1040 }
1041 
getEligiblePoints(llvm::StringRef Code,llvm::StringRef FullyQualifiedName,const LangOptions & LangOpts)1042 EligibleRegion getEligiblePoints(llvm::StringRef Code,
1043                                  llvm::StringRef FullyQualifiedName,
1044                                  const LangOptions &LangOpts) {
1045   EligibleRegion ER;
1046   // Start with global namespace.
1047   std::vector<std::string> Enclosing = {""};
1048   // FIXME: In addition to namespaces try to generate events for function
1049   // definitions as well. One might use a closing parantheses(")" followed by an
1050   // opening brace "{" to trigger the start.
1051   parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) {
1052     // Using Directives only introduces declarations to current scope, they do
1053     // not change the current namespace, so skip them.
1054     if (Event.Trigger == NamespaceEvent::UsingDirective)
1055       return;
1056     // Do not qualify the global namespace.
1057     if (!Event.Payload.empty())
1058       Event.Payload.append("::");
1059 
1060     std::string CurrentNamespace;
1061     if (Event.Trigger == NamespaceEvent::BeginNamespace) {
1062       Enclosing.emplace_back(std::move(Event.Payload));
1063       CurrentNamespace = Enclosing.back();
1064       // parseNameSpaceEvents reports the beginning position of a token; we want
1065       // to insert after '{', so increment by one.
1066       ++Event.Pos.character;
1067     } else {
1068       // Event.Payload points to outer namespace when exiting a scope, so use
1069       // the namespace we've last entered instead.
1070       CurrentNamespace = std::move(Enclosing.back());
1071       Enclosing.pop_back();
1072       assert(Enclosing.back() == Event.Payload);
1073     }
1074 
1075     // Ignore namespaces that are not a prefix of the target.
1076     if (!FullyQualifiedName.startswith(CurrentNamespace))
1077       return;
1078 
1079     // Prefer the namespace that shares the longest prefix with target.
1080     if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) {
1081       ER.EligiblePoints.clear();
1082       ER.EnclosingNamespace = CurrentNamespace;
1083     }
1084     if (CurrentNamespace.size() == ER.EnclosingNamespace.size())
1085       ER.EligiblePoints.emplace_back(std::move(Event.Pos));
1086   });
1087   // If there were no shared namespaces just return EOF.
1088   if (ER.EligiblePoints.empty()) {
1089     assert(ER.EnclosingNamespace.empty());
1090     ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size()));
1091   }
1092   return ER;
1093 }
1094 
isHeaderFile(llvm::StringRef FileName,llvm::Optional<LangOptions> LangOpts)1095 bool isHeaderFile(llvm::StringRef FileName,
1096                   llvm::Optional<LangOptions> LangOpts) {
1097   // Respect the langOpts, for non-file-extension cases, e.g. standard library
1098   // files.
1099   if (LangOpts && LangOpts->IsHeaderFile)
1100     return true;
1101   namespace types = clang::driver::types;
1102   auto Lang = types::lookupTypeForExtension(
1103       llvm::sys::path::extension(FileName).substr(1));
1104   return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang);
1105 }
1106 
isProtoFile(SourceLocation Loc,const SourceManager & SM)1107 bool isProtoFile(SourceLocation Loc, const SourceManager &SM) {
1108   auto FileName = SM.getFilename(Loc);
1109   if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
1110     return false;
1111   auto FID = SM.getFileID(Loc);
1112   // All proto generated headers should start with this line.
1113   static const char *PROTO_HEADER_COMMENT =
1114       "// Generated by the protocol buffer compiler.  DO NOT EDIT!";
1115   // Double check that this is an actual protobuf header.
1116   return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT);
1117 }
1118 
1119 } // namespace clangd
1120 } // namespace clang
1121