1 //===--- Rename.cpp - Symbol-rename refactorings -----------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "refactor/Rename.h"
10 #include "AST.h"
11 #include "FindTarget.h"
12 #include "ParsedAST.h"
13 #include "Selection.h"
14 #include "SourceCode.h"
15 #include "index/SymbolCollector.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/ASTTypeTraits.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclCXX.h"
22 #include "clang/AST/DeclTemplate.h"
23 #include "clang/AST/ParentMapContext.h"
24 #include "clang/AST/Stmt.h"
25 #include "clang/Basic/CharInfo.h"
26 #include "clang/Basic/LLVM.h"
27 #include "clang/Basic/SourceLocation.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/JSON.h"
36 #include <algorithm>
37 
38 namespace clang {
39 namespace clangd {
40 namespace {
41 
filePath(const SymbolLocation & Loc,llvm::StringRef HintFilePath)42 llvm::Optional<std::string> filePath(const SymbolLocation &Loc,
43                                      llvm::StringRef HintFilePath) {
44   if (!Loc)
45     return None;
46   auto Path = URI::resolve(Loc.FileURI, HintFilePath);
47   if (!Path) {
48     elog("Could not resolve URI {0}: {1}", Loc.FileURI, Path.takeError());
49     return None;
50   }
51 
52   return *Path;
53 }
54 
55 // Returns true if the given location is expanded from any macro body.
isInMacroBody(const SourceManager & SM,SourceLocation Loc)56 bool isInMacroBody(const SourceManager &SM, SourceLocation Loc) {
57   while (Loc.isMacroID()) {
58     if (SM.isMacroBodyExpansion(Loc))
59       return true;
60     Loc = SM.getImmediateMacroCallerLoc(Loc);
61   }
62 
63   return false;
64 }
65 
66 // Canonical declarations help simplify the process of renaming. Examples:
67 // - Template's canonical decl is the templated declaration (i.e.
68 //   ClassTemplateDecl is canonicalized to its child CXXRecordDecl,
69 //   FunctionTemplateDecl - to child FunctionDecl)
70 // - Given a constructor/destructor, canonical declaration is the parent
71 //   CXXRecordDecl because we want to rename both type name and its ctor/dtor.
72 // - All specializations are canonicalized to the primary template. For example:
73 //
74 //    template <typename T, int U>
75 //    bool Foo = true; (1)
76 //
77 //    template <typename T>
78 //    bool Foo<T, 0> = true; (2)
79 //
80 //    template <>
81 //    bool Foo<int, 0> = true; (3)
82 //
83 // Here, both partial (2) and full (3) specializations are canonicalized to (1)
84 // which ensures all three of them are renamed.
canonicalRenameDecl(const NamedDecl * D)85 const NamedDecl *canonicalRenameDecl(const NamedDecl *D) {
86   if (const auto *VarTemplate = dyn_cast<VarTemplateSpecializationDecl>(D))
87     return canonicalRenameDecl(
88         VarTemplate->getSpecializedTemplate()->getTemplatedDecl());
89   if (const auto *Template = dyn_cast<TemplateDecl>(D))
90     if (const NamedDecl *TemplatedDecl = Template->getTemplatedDecl())
91       return canonicalRenameDecl(TemplatedDecl);
92   if (const auto *ClassTemplateSpecialization =
93           dyn_cast<ClassTemplateSpecializationDecl>(D))
94     return canonicalRenameDecl(
95         ClassTemplateSpecialization->getSpecializedTemplate()
96             ->getTemplatedDecl());
97   if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
98     if (Method->getDeclKind() == Decl::Kind::CXXConstructor ||
99         Method->getDeclKind() == Decl::Kind::CXXDestructor)
100       return canonicalRenameDecl(Method->getParent());
101     if (const FunctionDecl *InstantiatedMethod =
102             Method->getInstantiatedFromMemberFunction())
103       Method = cast<CXXMethodDecl>(InstantiatedMethod);
104     // FIXME(kirillbobyrev): For virtual methods with
105     // size_overridden_methods() > 1, this will not rename all functions it
106     // overrides, because this code assumes there is a single canonical
107     // declaration.
108     while (Method->isVirtual() && Method->size_overridden_methods())
109       Method = *Method->overridden_methods().begin();
110     return Method->getCanonicalDecl();
111   }
112   if (const auto *Function = dyn_cast<FunctionDecl>(D))
113     if (const FunctionTemplateDecl *Template = Function->getPrimaryTemplate())
114       return canonicalRenameDecl(Template);
115   if (const auto *Field = dyn_cast<FieldDecl>(D)) {
116     // This is a hacky way to do something like
117     // CXXMethodDecl::getInstantiatedFromMemberFunction for the field because
118     // Clang AST does not store relevant information about the field that is
119     // instantiated.
120     const auto *FieldParent =
121         dyn_cast_or_null<CXXRecordDecl>(Field->getParent());
122     if (!FieldParent)
123       return Field->getCanonicalDecl();
124     FieldParent = FieldParent->getTemplateInstantiationPattern();
125     // Field is not instantiation.
126     if (!FieldParent || Field->getParent() == FieldParent)
127       return Field->getCanonicalDecl();
128     for (const FieldDecl *Candidate : FieldParent->fields())
129       if (Field->getDeclName() == Candidate->getDeclName())
130         return Candidate->getCanonicalDecl();
131     elog("FieldParent should have field with the same name as Field.");
132   }
133   if (const auto *VD = dyn_cast<VarDecl>(D)) {
134     if (const VarDecl *OriginalVD = VD->getInstantiatedFromStaticDataMember())
135       VD = OriginalVD;
136     return VD->getCanonicalDecl();
137   }
138   return dyn_cast<NamedDecl>(D->getCanonicalDecl());
139 }
140 
locateDeclAt(ParsedAST & AST,SourceLocation TokenStartLoc)141 llvm::DenseSet<const NamedDecl *> locateDeclAt(ParsedAST &AST,
142                                                SourceLocation TokenStartLoc) {
143   unsigned Offset =
144       AST.getSourceManager().getDecomposedSpellingLoc(TokenStartLoc).second;
145 
146   SelectionTree Selection = SelectionTree::createRight(
147       AST.getASTContext(), AST.getTokens(), Offset, Offset);
148   const SelectionTree::Node *SelectedNode = Selection.commonAncestor();
149   if (!SelectedNode)
150     return {};
151 
152   llvm::DenseSet<const NamedDecl *> Result;
153   for (const NamedDecl *D :
154        targetDecl(SelectedNode->ASTNode,
155                   DeclRelation::Alias | DeclRelation::TemplatePattern,
156                   AST.getHeuristicResolver())) {
157     Result.insert(canonicalRenameDecl(D));
158   }
159   return Result;
160 }
161 
162 // By default, we exclude C++ standard symbols and protobuf symbols as rename
163 // these symbols would change system/generated files which are unlikely to be
164 // modified.
isExcluded(const NamedDecl & RenameDecl)165 bool isExcluded(const NamedDecl &RenameDecl) {
166   if (isProtoFile(RenameDecl.getLocation(),
167                   RenameDecl.getASTContext().getSourceManager()))
168     return true;
169   static const auto *StdSymbols = new llvm::DenseSet<llvm::StringRef>({
170 #define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name},
171 #include "StdSymbolMap.inc"
172 #undef SYMBOL
173   });
174   return StdSymbols->count(printQualifiedName(RenameDecl));
175 }
176 
177 enum class ReasonToReject {
178   NoSymbolFound,
179   NoIndexProvided,
180   NonIndexable,
181   UnsupportedSymbol,
182   AmbiguousSymbol,
183 
184   // name validation. FIXME: reconcile with InvalidName
185   SameName,
186 };
187 
renameable(const NamedDecl & RenameDecl,StringRef MainFilePath,const SymbolIndex * Index)188 llvm::Optional<ReasonToReject> renameable(const NamedDecl &RenameDecl,
189                                           StringRef MainFilePath,
190                                           const SymbolIndex *Index) {
191   trace::Span Tracer("Renameable");
192   // Filter out symbols that are unsupported in both rename modes.
193   if (llvm::isa<NamespaceDecl>(&RenameDecl))
194     return ReasonToReject::UnsupportedSymbol;
195   if (const auto *FD = llvm::dyn_cast<FunctionDecl>(&RenameDecl)) {
196     if (FD->isOverloadedOperator())
197       return ReasonToReject::UnsupportedSymbol;
198   }
199   // function-local symbols is safe to rename.
200   if (RenameDecl.getParentFunctionOrMethod())
201     return None;
202 
203   if (isExcluded(RenameDecl))
204     return ReasonToReject::UnsupportedSymbol;
205 
206   // Check whether the symbol being rename is indexable.
207   auto &ASTCtx = RenameDecl.getASTContext();
208   bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts());
209   bool DeclaredInMainFile =
210       isInsideMainFile(RenameDecl.getBeginLoc(), ASTCtx.getSourceManager());
211   bool IsMainFileOnly = true;
212   if (MainFileIsHeader)
213     // main file is a header, the symbol can't be main file only.
214     IsMainFileOnly = false;
215   else if (!DeclaredInMainFile)
216     IsMainFileOnly = false;
217   // If the symbol is not indexable, we disallow rename.
218   if (!SymbolCollector::shouldCollectSymbol(
219           RenameDecl, RenameDecl.getASTContext(), SymbolCollector::Options(),
220           IsMainFileOnly))
221     return ReasonToReject::NonIndexable;
222 
223 
224   // FIXME: Renaming virtual methods requires to rename all overridens in
225   // subclasses, our index doesn't have this information.
226   if (const auto *S = llvm::dyn_cast<CXXMethodDecl>(&RenameDecl)) {
227     if (S->isVirtual())
228       return ReasonToReject::UnsupportedSymbol;
229   }
230   return None;
231 }
232 
makeError(ReasonToReject Reason)233 llvm::Error makeError(ReasonToReject Reason) {
234   auto Message = [](ReasonToReject Reason) {
235     switch (Reason) {
236     case ReasonToReject::NoSymbolFound:
237       return "there is no symbol at the given location";
238     case ReasonToReject::NoIndexProvided:
239       return "no index provided";
240     case ReasonToReject::NonIndexable:
241       return "symbol may be used in other files (not eligible for indexing)";
242     case ReasonToReject::UnsupportedSymbol:
243       return "symbol is not a supported kind (e.g. namespace, macro)";
244     case ReasonToReject::AmbiguousSymbol:
245       return "there are multiple symbols at the given location";
246     case ReasonToReject::SameName:
247       return "new name is the same as the old name";
248     }
249     llvm_unreachable("unhandled reason kind");
250   };
251   return error("Cannot rename symbol: {0}", Message(Reason));
252 }
253 
254 // Return all rename occurrences in the main file.
findOccurrencesWithinFile(ParsedAST & AST,const NamedDecl & ND)255 std::vector<SourceLocation> findOccurrencesWithinFile(ParsedAST &AST,
256                                                       const NamedDecl &ND) {
257   trace::Span Tracer("FindOccurrencesWithinFile");
258   assert(canonicalRenameDecl(&ND) == &ND &&
259          "ND should be already canonicalized.");
260 
261   std::vector<SourceLocation> Results;
262   for (Decl *TopLevelDecl : AST.getLocalTopLevelDecls()) {
263     findExplicitReferences(
264         TopLevelDecl,
265         [&](ReferenceLoc Ref) {
266           if (Ref.Targets.empty())
267             return;
268           for (const auto *Target : Ref.Targets) {
269             if (canonicalRenameDecl(Target) == &ND) {
270               Results.push_back(Ref.NameLoc);
271               return;
272             }
273           }
274         },
275         AST.getHeuristicResolver());
276   }
277 
278   return Results;
279 }
280 
281 // Detect name conflict with othter DeclStmts in the same enclosing scope.
lookupSiblingWithinEnclosingScope(ASTContext & Ctx,const NamedDecl & RenamedDecl,StringRef NewName)282 const NamedDecl *lookupSiblingWithinEnclosingScope(ASTContext &Ctx,
283                                                    const NamedDecl &RenamedDecl,
284                                                    StringRef NewName) {
285   // Store Parents list outside of GetSingleParent, so that returned pointer is
286   // not invalidated.
287   DynTypedNodeList Storage(DynTypedNode::create(RenamedDecl));
288   auto GetSingleParent = [&](const DynTypedNode &Node) -> const DynTypedNode * {
289     Storage = Ctx.getParents(Node);
290     return (Storage.size() == 1) ? Storage.begin() : nullptr;
291   };
292 
293   // We need to get to the enclosing scope: NamedDecl's parent is typically
294   // DeclStmt (or FunctionProtoTypeLoc in case of function arguments), so
295   // enclosing scope would be the second order parent.
296   const auto *Parent = GetSingleParent(DynTypedNode::create(RenamedDecl));
297   if (!Parent || !(Parent->get<DeclStmt>() || Parent->get<TypeLoc>()))
298     return nullptr;
299   Parent = GetSingleParent(*Parent);
300 
301   // The following helpers check corresponding AST nodes for variable
302   // declarations with the name collision.
303   auto CheckDeclStmt = [&](const DeclStmt *DS,
304                            StringRef Name) -> const NamedDecl * {
305     if (!DS)
306       return nullptr;
307     for (const auto &Child : DS->getDeclGroup())
308       if (const auto *ND = dyn_cast<NamedDecl>(Child))
309         if (ND != &RenamedDecl && ND->getName() == Name)
310           return ND;
311     return nullptr;
312   };
313   auto CheckCompoundStmt = [&](const Stmt *S,
314                                StringRef Name) -> const NamedDecl * {
315     if (const auto *CS = dyn_cast_or_null<CompoundStmt>(S))
316       for (const auto *Node : CS->children())
317         if (const auto *Result = CheckDeclStmt(dyn_cast<DeclStmt>(Node), Name))
318           return Result;
319     return nullptr;
320   };
321   auto CheckConditionVariable = [&](const auto *Scope,
322                                     StringRef Name) -> const NamedDecl * {
323     if (!Scope)
324       return nullptr;
325     return CheckDeclStmt(Scope->getConditionVariableDeclStmt(), Name);
326   };
327 
328   // CompoundStmt is the most common enclosing scope for function-local symbols
329   // In the simplest case we just iterate through sibling DeclStmts and check
330   // for collisions.
331   if (const auto *EnclosingCS = Parent->get<CompoundStmt>()) {
332     if (const auto *Result = CheckCompoundStmt(EnclosingCS, NewName))
333       return Result;
334     const auto *ScopeParent = GetSingleParent(*Parent);
335     // CompoundStmt may be found within if/while/for. In these cases, rename can
336     // collide with the init-statement variable decalaration, they should be
337     // checked.
338     if (const auto *Result =
339             CheckConditionVariable(ScopeParent->get<IfStmt>(), NewName))
340       return Result;
341     if (const auto *Result =
342             CheckConditionVariable(ScopeParent->get<WhileStmt>(), NewName))
343       return Result;
344     if (const auto *For = ScopeParent->get<ForStmt>())
345       if (const auto *Result = CheckDeclStmt(
346               dyn_cast_or_null<DeclStmt>(For->getInit()), NewName))
347         return Result;
348     // Also check if there is a name collision with function arguments.
349     if (const auto *Function = ScopeParent->get<FunctionDecl>())
350       for (const auto *Parameter : Function->parameters())
351         if (Parameter->getName() == NewName)
352           return Parameter;
353     return nullptr;
354   }
355 
356   // When renaming a variable within init-statement within if/while/for
357   // condition, also check the CompoundStmt in the body.
358   if (const auto *EnclosingIf = Parent->get<IfStmt>()) {
359     if (const auto *Result = CheckCompoundStmt(EnclosingIf->getElse(), NewName))
360       return Result;
361     return CheckCompoundStmt(EnclosingIf->getThen(), NewName);
362   }
363   if (const auto *EnclosingWhile = Parent->get<WhileStmt>())
364     return CheckCompoundStmt(EnclosingWhile->getBody(), NewName);
365   if (const auto *EnclosingFor = Parent->get<ForStmt>()) {
366     // Check for conflicts with other declarations within initialization
367     // statement.
368     if (const auto *Result = CheckDeclStmt(
369             dyn_cast_or_null<DeclStmt>(EnclosingFor->getInit()), NewName))
370       return Result;
371     return CheckCompoundStmt(EnclosingFor->getBody(), NewName);
372   }
373   if (const auto *EnclosingFunction = Parent->get<FunctionDecl>()) {
374     // Check for conflicts with other arguments.
375     for (const auto *Parameter : EnclosingFunction->parameters())
376       if (Parameter != &RenamedDecl && Parameter->getName() == NewName)
377         return Parameter;
378     // FIXME: We don't modify all references to function parameters when
379     // renaming from forward declaration now, so using a name colliding with
380     // something in the definition's body is a valid transformation.
381     if (!EnclosingFunction->doesThisDeclarationHaveABody())
382       return nullptr;
383     return CheckCompoundStmt(EnclosingFunction->getBody(), NewName);
384   }
385 
386   return nullptr;
387 }
388 
389 // Lookup the declarations (if any) with the given Name in the context of
390 // RenameDecl.
lookupSiblingsWithinContext(ASTContext & Ctx,const NamedDecl & RenamedDecl,llvm::StringRef NewName)391 const NamedDecl *lookupSiblingsWithinContext(ASTContext &Ctx,
392                                              const NamedDecl &RenamedDecl,
393                                              llvm::StringRef NewName) {
394   const auto &II = Ctx.Idents.get(NewName);
395   DeclarationName LookupName(&II);
396   DeclContextLookupResult LookupResult;
397   const auto *DC = RenamedDecl.getDeclContext();
398   while (DC && DC->isTransparentContext())
399     DC = DC->getParent();
400   switch (DC->getDeclKind()) {
401   // The enclosing DeclContext may not be the enclosing scope, it might have
402   // false positives and negatives, so we only choose "confident" DeclContexts
403   // that don't have any subscopes that are neither DeclContexts nor
404   // transparent.
405   //
406   // Notably, FunctionDecl is excluded -- because local variables are not scoped
407   // to the function, but rather to the CompoundStmt that is its body. Lookup
408   // will not find function-local variables.
409   case Decl::TranslationUnit:
410   case Decl::Namespace:
411   case Decl::Record:
412   case Decl::Enum:
413   case Decl::CXXRecord:
414     LookupResult = DC->lookup(LookupName);
415     break;
416   default:
417     break;
418   }
419   // Lookup may contain the RenameDecl itself, exclude it.
420   for (const auto *D : LookupResult)
421     if (D->getCanonicalDecl() != RenamedDecl.getCanonicalDecl())
422       return D;
423   return nullptr;
424 }
425 
lookupSiblingWithName(ASTContext & Ctx,const NamedDecl & RenamedDecl,llvm::StringRef NewName)426 const NamedDecl *lookupSiblingWithName(ASTContext &Ctx,
427                                        const NamedDecl &RenamedDecl,
428                                        llvm::StringRef NewName) {
429   trace::Span Tracer("LookupSiblingWithName");
430   if (const auto *Result =
431           lookupSiblingsWithinContext(Ctx, RenamedDecl, NewName))
432     return Result;
433   return lookupSiblingWithinEnclosingScope(Ctx, RenamedDecl, NewName);
434 }
435 
436 struct InvalidName {
437   enum Kind {
438     Keywords,
439     Conflict,
440     BadIdentifier,
441   };
442   Kind K;
443   std::string Details;
444 };
toString(InvalidName::Kind K)445 std::string toString(InvalidName::Kind K) {
446   switch (K) {
447   case InvalidName::Keywords:
448     return "Keywords";
449   case InvalidName::Conflict:
450     return "Conflict";
451   case InvalidName::BadIdentifier:
452     return "BadIdentifier";
453   }
454   llvm_unreachable("unhandled InvalidName kind");
455 }
456 
makeError(InvalidName Reason)457 llvm::Error makeError(InvalidName Reason) {
458   auto Message = [](InvalidName Reason) {
459     switch (Reason.K) {
460     case InvalidName::Keywords:
461       return llvm::formatv("the chosen name \"{0}\" is a keyword",
462                            Reason.Details);
463     case InvalidName::Conflict:
464       return llvm::formatv("conflict with the symbol in {0}", Reason.Details);
465     case InvalidName::BadIdentifier:
466       return llvm::formatv("the chosen name \"{0}\" is not a valid identifier",
467                            Reason.Details);
468     }
469     llvm_unreachable("unhandled InvalidName kind");
470   };
471   return error("invalid name: {0}", Message(Reason));
472 }
473 
mayBeValidIdentifier(llvm::StringRef Ident)474 static bool mayBeValidIdentifier(llvm::StringRef Ident) {
475   assert(llvm::json::isUTF8(Ident));
476   if (Ident.empty())
477     return false;
478   // We don't check all the rules for non-ascii characters (most are allowed).
479   bool AllowDollar = true; // lenient
480   if (llvm::isASCII(Ident.front()) &&
481       !isIdentifierHead(Ident.front(), AllowDollar))
482     return false;
483   for (char C : Ident) {
484     if (llvm::isASCII(C) && !isIdentifierBody(C, AllowDollar))
485       return false;
486   }
487   return true;
488 }
489 
490 // Check if we can rename the given RenameDecl into NewName.
491 // Return details if the rename would produce a conflict.
checkName(const NamedDecl & RenameDecl,llvm::StringRef NewName)492 llvm::Optional<InvalidName> checkName(const NamedDecl &RenameDecl,
493                                       llvm::StringRef NewName) {
494   trace::Span Tracer("CheckName");
495   static constexpr trace::Metric InvalidNameMetric(
496       "rename_name_invalid", trace::Metric::Counter, "invalid_kind");
497   auto &ASTCtx = RenameDecl.getASTContext();
498   llvm::Optional<InvalidName> Result;
499   if (isKeyword(NewName, ASTCtx.getLangOpts()))
500     Result = InvalidName{InvalidName::Keywords, NewName.str()};
501   else if (!mayBeValidIdentifier(NewName))
502     Result = InvalidName{InvalidName::BadIdentifier, NewName.str()};
503   else {
504     // Name conflict detection.
505     // Function conflicts are subtle (overloading), so ignore them.
506     if (RenameDecl.getKind() != Decl::Function) {
507       if (auto *Conflict = lookupSiblingWithName(ASTCtx, RenameDecl, NewName))
508         Result = InvalidName{
509             InvalidName::Conflict,
510             Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
511     }
512   }
513   if (Result)
514     InvalidNameMetric.record(1, toString(Result->K));
515   return Result;
516 }
517 
518 // AST-based rename, it renames all occurrences in the main file.
519 llvm::Expected<tooling::Replacements>
renameWithinFile(ParsedAST & AST,const NamedDecl & RenameDecl,llvm::StringRef NewName)520 renameWithinFile(ParsedAST &AST, const NamedDecl &RenameDecl,
521                  llvm::StringRef NewName) {
522   trace::Span Tracer("RenameWithinFile");
523   const SourceManager &SM = AST.getSourceManager();
524 
525   tooling::Replacements FilteredChanges;
526   for (SourceLocation Loc : findOccurrencesWithinFile(AST, RenameDecl)) {
527     SourceLocation RenameLoc = Loc;
528     // We don't rename in any macro bodies, but we allow rename the symbol
529     // spelled in a top-level macro argument in the main file.
530     if (RenameLoc.isMacroID()) {
531       if (isInMacroBody(SM, RenameLoc))
532         continue;
533       RenameLoc = SM.getSpellingLoc(Loc);
534     }
535     // Filter out locations not from main file.
536     // We traverse only main file decls, but locations could come from an
537     // non-preamble #include file e.g.
538     //   void test() {
539     //     int f^oo;
540     //     #include "use_foo.inc"
541     //   }
542     if (!isInsideMainFile(RenameLoc, SM))
543       continue;
544     if (auto Err = FilteredChanges.add(tooling::Replacement(
545             SM, CharSourceRange::getTokenRange(RenameLoc), NewName)))
546       return std::move(Err);
547   }
548   return FilteredChanges;
549 }
550 
toRange(const SymbolLocation & L)551 Range toRange(const SymbolLocation &L) {
552   Range R;
553   R.start.line = L.Start.line();
554   R.start.character = L.Start.column();
555   R.end.line = L.End.line();
556   R.end.character = L.End.column();
557   return R;
558 }
559 
560 // Return all rename occurrences (using the index) outside of the main file,
561 // grouped by the absolute file path.
562 llvm::Expected<llvm::StringMap<std::vector<Range>>>
findOccurrencesOutsideFile(const NamedDecl & RenameDecl,llvm::StringRef MainFile,const SymbolIndex & Index,size_t MaxLimitFiles)563 findOccurrencesOutsideFile(const NamedDecl &RenameDecl,
564                            llvm::StringRef MainFile, const SymbolIndex &Index,
565                            size_t MaxLimitFiles) {
566   trace::Span Tracer("FindOccurrencesOutsideFile");
567   RefsRequest RQuest;
568   RQuest.IDs.insert(getSymbolID(&RenameDecl));
569 
570   // Absolute file path => rename occurrences in that file.
571   llvm::StringMap<std::vector<Range>> AffectedFiles;
572   bool HasMore = Index.refs(RQuest, [&](const Ref &R) {
573     if (AffectedFiles.size() >= MaxLimitFiles)
574       return;
575     if ((R.Kind & RefKind::Spelled) == RefKind::Unknown)
576       return;
577     if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) {
578       if (!pathEqual(*RefFilePath, MainFile))
579         AffectedFiles[*RefFilePath].push_back(toRange(R.Location));
580     }
581   });
582 
583   if (AffectedFiles.size() >= MaxLimitFiles)
584     return error("The number of affected files exceeds the max limit {0}",
585                  MaxLimitFiles);
586   if (HasMore)
587     return error("The symbol {0} has too many occurrences",
588                  RenameDecl.getQualifiedNameAsString());
589   // Sort and deduplicate the results, in case that index returns duplications.
590   for (auto &FileAndOccurrences : AffectedFiles) {
591     auto &Ranges = FileAndOccurrences.getValue();
592     llvm::sort(Ranges);
593     Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
594 
595     SPAN_ATTACH(Tracer, FileAndOccurrences.first(),
596                 static_cast<int64_t>(Ranges.size()));
597   }
598   return AffectedFiles;
599 }
600 
601 // Index-based rename, it renames all occurrences outside of the main file.
602 //
603 // The cross-file rename is purely based on the index, as we don't want to
604 // build all ASTs for affected files, which may cause a performance hit.
605 // We choose to trade off some correctness for performance and scalability.
606 //
607 // Clangd builds a dynamic index for all opened files on top of the static
608 // index of the whole codebase. Dynamic index is up-to-date (respects dirty
609 // buffers) as long as clangd finishes processing opened files, while static
610 // index (background index) is relatively stale. We choose the dirty buffers
611 // as the file content we rename on, and fallback to file content on disk if
612 // there is no dirty buffer.
613 llvm::Expected<FileEdits>
renameOutsideFile(const NamedDecl & RenameDecl,llvm::StringRef MainFilePath,llvm::StringRef NewName,const SymbolIndex & Index,size_t MaxLimitFiles,llvm::vfs::FileSystem & FS)614 renameOutsideFile(const NamedDecl &RenameDecl, llvm::StringRef MainFilePath,
615                   llvm::StringRef NewName, const SymbolIndex &Index,
616                   size_t MaxLimitFiles, llvm::vfs::FileSystem &FS) {
617   trace::Span Tracer("RenameOutsideFile");
618   auto AffectedFiles = findOccurrencesOutsideFile(RenameDecl, MainFilePath,
619                                                   Index, MaxLimitFiles);
620   if (!AffectedFiles)
621     return AffectedFiles.takeError();
622   FileEdits Results;
623   for (auto &FileAndOccurrences : *AffectedFiles) {
624     llvm::StringRef FilePath = FileAndOccurrences.first();
625 
626     auto ExpBuffer = FS.getBufferForFile(FilePath);
627     if (!ExpBuffer) {
628       elog("Fail to read file content: Fail to open file {0}: {1}", FilePath,
629            ExpBuffer.getError().message());
630       continue;
631     }
632 
633     auto AffectedFileCode = (*ExpBuffer)->getBuffer();
634     auto RenameRanges =
635         adjustRenameRanges(AffectedFileCode, RenameDecl.getNameAsString(),
636                            std::move(FileAndOccurrences.second),
637                            RenameDecl.getASTContext().getLangOpts());
638     if (!RenameRanges) {
639       // Our heuristics fails to adjust rename ranges to the current state of
640       // the file, it is most likely the index is stale, so we give up the
641       // entire rename.
642       return error("Index results don't match the content of file {0} "
643                    "(the index may be stale)",
644                    FilePath);
645     }
646     auto RenameEdit =
647         buildRenameEdit(FilePath, AffectedFileCode, *RenameRanges, NewName);
648     if (!RenameEdit)
649       return error("failed to rename in file {0}: {1}", FilePath,
650                    RenameEdit.takeError());
651     if (!RenameEdit->Replacements.empty())
652       Results.insert({FilePath, std::move(*RenameEdit)});
653   }
654   return Results;
655 }
656 
657 // A simple edit is either changing line or column, but not both.
impliesSimpleEdit(const Position & LHS,const Position & RHS)658 bool impliesSimpleEdit(const Position &LHS, const Position &RHS) {
659   return LHS.line == RHS.line || LHS.character == RHS.character;
660 }
661 
662 // Performs a DFS to enumerate all possible near-miss matches.
663 // It finds the locations where the indexed occurrences are now spelled in
664 // Lexed occurrences, a near miss is defined as:
665 //   - a near miss maps all of the **name** occurrences from the index onto a
666 //     *subset* of lexed occurrences (we allow a single name refers to more
667 //     than one symbol)
668 //   - all indexed occurrences must be mapped, and Result must be distinct and
669 //     preserve order (only support detecting simple edits to ensure a
670 //     robust mapping)
671 //   - each indexed -> lexed occurrences mapping correspondence may change the
672 //     *line* or *column*, but not both (increases chance of a robust mapping)
findNearMiss(std::vector<size_t> & PartialMatch,ArrayRef<Range> IndexedRest,ArrayRef<Range> LexedRest,int LexedIndex,int & Fuel,llvm::function_ref<void (const std::vector<size_t> &)> MatchedCB)673 void findNearMiss(
674     std::vector<size_t> &PartialMatch, ArrayRef<Range> IndexedRest,
675     ArrayRef<Range> LexedRest, int LexedIndex, int &Fuel,
676     llvm::function_ref<void(const std::vector<size_t> &)> MatchedCB) {
677   if (--Fuel < 0)
678     return;
679   if (IndexedRest.size() > LexedRest.size())
680     return;
681   if (IndexedRest.empty()) {
682     MatchedCB(PartialMatch);
683     return;
684   }
685   if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) {
686     PartialMatch.push_back(LexedIndex);
687     findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(),
688                  LexedIndex + 1, Fuel, MatchedCB);
689     PartialMatch.pop_back();
690   }
691   findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(),
692                LexedIndex + 1, Fuel, MatchedCB);
693 }
694 
695 } // namespace
696 
rename(const RenameInputs & RInputs)697 llvm::Expected<RenameResult> rename(const RenameInputs &RInputs) {
698   assert(!RInputs.Index == !RInputs.FS &&
699          "Index and FS must either both be specified or both null.");
700   trace::Span Tracer("Rename flow");
701   const auto &Opts = RInputs.Opts;
702   ParsedAST &AST = RInputs.AST;
703   const SourceManager &SM = AST.getSourceManager();
704   llvm::StringRef MainFileCode = SM.getBufferData(SM.getMainFileID());
705   // Try to find the tokens adjacent to the cursor position.
706   auto Loc = sourceLocationInMainFile(SM, RInputs.Pos);
707   if (!Loc)
708     return Loc.takeError();
709   const syntax::Token *IdentifierToken =
710       spelledIdentifierTouching(*Loc, AST.getTokens());
711 
712   // Renames should only triggered on identifiers.
713   if (!IdentifierToken)
714     return makeError(ReasonToReject::NoSymbolFound);
715   Range CurrentIdentifier = halfOpenToRange(
716       SM, CharSourceRange::getCharRange(IdentifierToken->location(),
717                                         IdentifierToken->endLocation()));
718   // FIXME: Renaming macros is not supported yet, the macro-handling code should
719   // be moved to rename tooling library.
720   if (locateMacroAt(*IdentifierToken, AST.getPreprocessor()))
721     return makeError(ReasonToReject::UnsupportedSymbol);
722 
723   auto DeclsUnderCursor = locateDeclAt(AST, IdentifierToken->location());
724   if (DeclsUnderCursor.empty())
725     return makeError(ReasonToReject::NoSymbolFound);
726   if (DeclsUnderCursor.size() > 1)
727     return makeError(ReasonToReject::AmbiguousSymbol);
728   const auto &RenameDecl = **DeclsUnderCursor.begin();
729   const auto *ID = RenameDecl.getIdentifier();
730   if (!ID)
731     return makeError(ReasonToReject::UnsupportedSymbol);
732   if (ID->getName() == RInputs.NewName)
733     return makeError(ReasonToReject::SameName);
734   auto Invalid = checkName(RenameDecl, RInputs.NewName);
735   if (Invalid)
736     return makeError(*Invalid);
737 
738   auto Reject = renameable(RenameDecl, RInputs.MainFilePath, RInputs.Index);
739   if (Reject)
740     return makeError(*Reject);
741 
742   // We have two implementations of the rename:
743   //   - AST-based rename: used for renaming local symbols, e.g. variables
744   //     defined in a function body;
745   //   - index-based rename: used for renaming non-local symbols, and not
746   //     feasible for local symbols (as by design our index don't index these
747   //     symbols by design;
748   // To make cross-file rename work for local symbol, we use a hybrid solution:
749   //   - run AST-based rename on the main file;
750   //   - run index-based rename on other affected files;
751   auto MainFileRenameEdit = renameWithinFile(AST, RenameDecl, RInputs.NewName);
752   if (!MainFileRenameEdit)
753     return MainFileRenameEdit.takeError();
754 
755   // Check the rename-triggering location is actually being renamed.
756   // This is a robustness check to avoid surprising rename results -- if the
757   // the triggering location is not actually the name of the node we identified
758   // (e.g. for broken code), then rename is likely not what users expect, so we
759   // reject this kind of rename.
760   auto StartOffset = positionToOffset(MainFileCode, CurrentIdentifier.start);
761   auto EndOffset = positionToOffset(MainFileCode, CurrentIdentifier.end);
762   if (!StartOffset)
763     return StartOffset.takeError();
764   if (!EndOffset)
765     return EndOffset.takeError();
766   if (llvm::find_if(
767           *MainFileRenameEdit,
768           [&StartOffset, &EndOffset](const clang::tooling::Replacement &R) {
769             return R.getOffset() == *StartOffset &&
770                    R.getLength() == *EndOffset - *StartOffset;
771           }) == MainFileRenameEdit->end()) {
772     return makeError(ReasonToReject::NoSymbolFound);
773   }
774   RenameResult Result;
775   Result.Target = CurrentIdentifier;
776   Edit MainFileEdits = Edit(MainFileCode, std::move(*MainFileRenameEdit));
777   llvm::for_each(MainFileEdits.asTextEdits(), [&Result](const TextEdit &TE) {
778     Result.LocalChanges.push_back(TE.range);
779   });
780 
781   // return the main file edit if this is a within-file rename or the symbol
782   // being renamed is function local.
783   if (RenameDecl.getParentFunctionOrMethod()) {
784     Result.GlobalChanges = FileEdits(
785         {std::make_pair(RInputs.MainFilePath, std::move(MainFileEdits))});
786     return Result;
787   }
788 
789   // If the index is nullptr, we don't know the completeness of the result, so
790   // we don't populate the field GlobalChanges.
791   if (!RInputs.Index) {
792     assert(Result.GlobalChanges.empty());
793     return Result;
794   }
795 
796   auto OtherFilesEdits = renameOutsideFile(
797       RenameDecl, RInputs.MainFilePath, RInputs.NewName, *RInputs.Index,
798       Opts.LimitFiles == 0 ? std::numeric_limits<size_t>::max()
799                            : Opts.LimitFiles,
800       *RInputs.FS);
801   if (!OtherFilesEdits)
802     return OtherFilesEdits.takeError();
803   Result.GlobalChanges = *OtherFilesEdits;
804   // Attach the rename edits for the main file.
805   Result.GlobalChanges.try_emplace(RInputs.MainFilePath,
806                                    std::move(MainFileEdits));
807   return Result;
808 }
809 
buildRenameEdit(llvm::StringRef AbsFilePath,llvm::StringRef InitialCode,std::vector<Range> Occurrences,llvm::StringRef NewName)810 llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
811                                      llvm::StringRef InitialCode,
812                                      std::vector<Range> Occurrences,
813                                      llvm::StringRef NewName) {
814   trace::Span Tracer("BuildRenameEdit");
815   SPAN_ATTACH(Tracer, "file_path", AbsFilePath);
816   SPAN_ATTACH(Tracer, "rename_occurrences",
817               static_cast<int64_t>(Occurrences.size()));
818 
819   assert(std::is_sorted(Occurrences.begin(), Occurrences.end()));
820   assert(std::unique(Occurrences.begin(), Occurrences.end()) ==
821              Occurrences.end() &&
822          "Occurrences must be unique");
823 
824   // These two always correspond to the same position.
825   Position LastPos{0, 0};
826   size_t LastOffset = 0;
827 
828   auto Offset = [&](const Position &P) -> llvm::Expected<size_t> {
829     assert(LastPos <= P && "malformed input");
830     Position Shifted = {
831         P.line - LastPos.line,
832         P.line > LastPos.line ? P.character : P.character - LastPos.character};
833     auto ShiftedOffset =
834         positionToOffset(InitialCode.substr(LastOffset), Shifted);
835     if (!ShiftedOffset)
836       return error("fail to convert the position {0} to offset ({1})", P,
837                    ShiftedOffset.takeError());
838     LastPos = P;
839     LastOffset += *ShiftedOffset;
840     return LastOffset;
841   };
842 
843   std::vector<std::pair</*start*/ size_t, /*end*/ size_t>> OccurrencesOffsets;
844   for (const auto &R : Occurrences) {
845     auto StartOffset = Offset(R.start);
846     if (!StartOffset)
847       return StartOffset.takeError();
848     auto EndOffset = Offset(R.end);
849     if (!EndOffset)
850       return EndOffset.takeError();
851     OccurrencesOffsets.push_back({*StartOffset, *EndOffset});
852   }
853 
854   tooling::Replacements RenameEdit;
855   for (const auto &R : OccurrencesOffsets) {
856     auto ByteLength = R.second - R.first;
857     if (auto Err = RenameEdit.add(
858             tooling::Replacement(AbsFilePath, R.first, ByteLength, NewName)))
859       return std::move(Err);
860   }
861   return Edit(InitialCode, std::move(RenameEdit));
862 }
863 
864 // Details:
865 //  - lex the draft code to get all rename candidates, this yields a superset
866 //    of candidates.
867 //  - apply range patching heuristics to generate "authoritative" occurrences,
868 //    cases we consider:
869 //      (a) index returns a subset of candidates, we use the indexed results.
870 //        - fully equal, we are sure the index is up-to-date
871 //        - proper subset, index is correct in most cases? there may be false
872 //          positives (e.g. candidates got appended), but rename is still safe
873 //      (b) index returns non-candidate results, we attempt to map the indexed
874 //          ranges onto candidates in a plausible way (e.g. guess that lines
875 //          were inserted). If such a "near miss" is found, the rename is still
876 //          possible
877 llvm::Optional<std::vector<Range>>
adjustRenameRanges(llvm::StringRef DraftCode,llvm::StringRef Identifier,std::vector<Range> Indexed,const LangOptions & LangOpts)878 adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
879                    std::vector<Range> Indexed, const LangOptions &LangOpts) {
880   trace::Span Tracer("AdjustRenameRanges");
881   assert(!Indexed.empty());
882   assert(std::is_sorted(Indexed.begin(), Indexed.end()));
883   std::vector<Range> Lexed =
884       collectIdentifierRanges(Identifier, DraftCode, LangOpts);
885   llvm::sort(Lexed);
886   return getMappedRanges(Indexed, Lexed);
887 }
888 
getMappedRanges(ArrayRef<Range> Indexed,ArrayRef<Range> Lexed)889 llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
890                                                    ArrayRef<Range> Lexed) {
891   trace::Span Tracer("GetMappedRanges");
892   assert(!Indexed.empty());
893   assert(std::is_sorted(Indexed.begin(), Indexed.end()));
894   assert(std::is_sorted(Lexed.begin(), Lexed.end()));
895 
896   if (Indexed.size() > Lexed.size()) {
897     vlog("The number of lexed occurrences is less than indexed occurrences");
898     SPAN_ATTACH(
899         Tracer, "error",
900         "The number of lexed occurrences is less than indexed occurrences");
901     return llvm::None;
902   }
903   // Fast check for the special subset case.
904   if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end()))
905     return Indexed.vec();
906 
907   std::vector<size_t> Best;
908   size_t BestCost = std::numeric_limits<size_t>::max();
909   bool HasMultiple = 0;
910   std::vector<size_t> ResultStorage;
911   int Fuel = 10000;
912   findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel,
913                [&](const std::vector<size_t> &Matched) {
914                  size_t MCost =
915                      renameRangeAdjustmentCost(Indexed, Lexed, Matched);
916                  if (MCost < BestCost) {
917                    BestCost = MCost;
918                    Best = std::move(Matched);
919                    HasMultiple = false; // reset
920                    return;
921                  }
922                  if (MCost == BestCost)
923                    HasMultiple = true;
924                });
925   if (HasMultiple) {
926     vlog("The best near miss is not unique.");
927     SPAN_ATTACH(Tracer, "error", "The best near miss is not unique");
928     return llvm::None;
929   }
930   if (Best.empty()) {
931     vlog("Didn't find a near miss.");
932     SPAN_ATTACH(Tracer, "error", "Didn't find a near miss");
933     return llvm::None;
934   }
935   std::vector<Range> Mapped;
936   for (auto I : Best)
937     Mapped.push_back(Lexed[I]);
938   SPAN_ATTACH(Tracer, "mapped_ranges", static_cast<int64_t>(Mapped.size()));
939   return Mapped;
940 }
941 
942 // The cost is the sum of the implied edit sizes between successive diffs, only
943 // simple edits are considered:
944 //   - insert/remove a line (change line offset)
945 //   - insert/remove a character on an existing line (change column offset)
946 //
947 // Example I, total result is 1 + 1 = 2.
948 //   diff[0]: line + 1 <- insert a line before edit 0.
949 //   diff[1]: line + 1
950 //   diff[2]: line + 1
951 //   diff[3]: line + 2 <- insert a line before edits 2 and 3.
952 //
953 // Example II, total result is 1 + 1 + 1 = 3.
954 //   diff[0]: line + 1  <- insert a line before edit 0.
955 //   diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a
956 //   character on edit 1.
renameRangeAdjustmentCost(ArrayRef<Range> Indexed,ArrayRef<Range> Lexed,ArrayRef<size_t> MappedIndex)957 size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
958                                  ArrayRef<size_t> MappedIndex) {
959   assert(Indexed.size() == MappedIndex.size());
960   assert(std::is_sorted(Indexed.begin(), Indexed.end()));
961   assert(std::is_sorted(Lexed.begin(), Lexed.end()));
962 
963   int LastLine = -1;
964   int LastDLine = 0, LastDColumn = 0;
965   int Cost = 0;
966   for (size_t I = 0; I < Indexed.size(); ++I) {
967     int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line;
968     int DColumn =
969         Indexed[I].start.character - Lexed[MappedIndex[I]].start.character;
970     int Line = Indexed[I].start.line;
971     if (Line != LastLine)
972       LastDColumn = 0; // column offsets don't carry cross lines.
973     Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn);
974     std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn);
975   }
976   return Cost;
977 }
978 
979 } // namespace clangd
980 } // namespace clang
981