1 //===--- Rename.cpp - Symbol-rename refactorings -----------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "refactor/Rename.h"
10 #include "AST.h"
11 #include "FindTarget.h"
12 #include "ParsedAST.h"
13 #include "Selection.h"
14 #include "SourceCode.h"
15 #include "index/SymbolCollector.h"
16 #include "support/Logger.h"
17 #include "support/Trace.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/AST/ASTTypeTraits.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclCXX.h"
22 #include "clang/AST/DeclTemplate.h"
23 #include "clang/AST/ParentMapContext.h"
24 #include "clang/AST/Stmt.h"
25 #include "clang/Basic/CharInfo.h"
26 #include "clang/Basic/LLVM.h"
27 #include "clang/Basic/SourceLocation.h"
28 #include "clang/Tooling/Syntax/Tokens.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Error.h"
34 #include "llvm/Support/FormatVariadic.h"
35 #include "llvm/Support/JSON.h"
36 #include <algorithm>
37
38 namespace clang {
39 namespace clangd {
40 namespace {
41
filePath(const SymbolLocation & Loc,llvm::StringRef HintFilePath)42 llvm::Optional<std::string> filePath(const SymbolLocation &Loc,
43 llvm::StringRef HintFilePath) {
44 if (!Loc)
45 return None;
46 auto Path = URI::resolve(Loc.FileURI, HintFilePath);
47 if (!Path) {
48 elog("Could not resolve URI {0}: {1}", Loc.FileURI, Path.takeError());
49 return None;
50 }
51
52 return *Path;
53 }
54
55 // Returns true if the given location is expanded from any macro body.
isInMacroBody(const SourceManager & SM,SourceLocation Loc)56 bool isInMacroBody(const SourceManager &SM, SourceLocation Loc) {
57 while (Loc.isMacroID()) {
58 if (SM.isMacroBodyExpansion(Loc))
59 return true;
60 Loc = SM.getImmediateMacroCallerLoc(Loc);
61 }
62
63 return false;
64 }
65
66 // Canonical declarations help simplify the process of renaming. Examples:
67 // - Template's canonical decl is the templated declaration (i.e.
68 // ClassTemplateDecl is canonicalized to its child CXXRecordDecl,
69 // FunctionTemplateDecl - to child FunctionDecl)
70 // - Given a constructor/destructor, canonical declaration is the parent
71 // CXXRecordDecl because we want to rename both type name and its ctor/dtor.
72 // - All specializations are canonicalized to the primary template. For example:
73 //
74 // template <typename T, int U>
75 // bool Foo = true; (1)
76 //
77 // template <typename T>
78 // bool Foo<T, 0> = true; (2)
79 //
80 // template <>
81 // bool Foo<int, 0> = true; (3)
82 //
83 // Here, both partial (2) and full (3) specializations are canonicalized to (1)
84 // which ensures all three of them are renamed.
canonicalRenameDecl(const NamedDecl * D)85 const NamedDecl *canonicalRenameDecl(const NamedDecl *D) {
86 if (const auto *VarTemplate = dyn_cast<VarTemplateSpecializationDecl>(D))
87 return canonicalRenameDecl(
88 VarTemplate->getSpecializedTemplate()->getTemplatedDecl());
89 if (const auto *Template = dyn_cast<TemplateDecl>(D))
90 if (const NamedDecl *TemplatedDecl = Template->getTemplatedDecl())
91 return canonicalRenameDecl(TemplatedDecl);
92 if (const auto *ClassTemplateSpecialization =
93 dyn_cast<ClassTemplateSpecializationDecl>(D))
94 return canonicalRenameDecl(
95 ClassTemplateSpecialization->getSpecializedTemplate()
96 ->getTemplatedDecl());
97 if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
98 if (Method->getDeclKind() == Decl::Kind::CXXConstructor ||
99 Method->getDeclKind() == Decl::Kind::CXXDestructor)
100 return canonicalRenameDecl(Method->getParent());
101 if (const FunctionDecl *InstantiatedMethod =
102 Method->getInstantiatedFromMemberFunction())
103 Method = cast<CXXMethodDecl>(InstantiatedMethod);
104 // FIXME(kirillbobyrev): For virtual methods with
105 // size_overridden_methods() > 1, this will not rename all functions it
106 // overrides, because this code assumes there is a single canonical
107 // declaration.
108 while (Method->isVirtual() && Method->size_overridden_methods())
109 Method = *Method->overridden_methods().begin();
110 return Method->getCanonicalDecl();
111 }
112 if (const auto *Function = dyn_cast<FunctionDecl>(D))
113 if (const FunctionTemplateDecl *Template = Function->getPrimaryTemplate())
114 return canonicalRenameDecl(Template);
115 if (const auto *Field = dyn_cast<FieldDecl>(D)) {
116 // This is a hacky way to do something like
117 // CXXMethodDecl::getInstantiatedFromMemberFunction for the field because
118 // Clang AST does not store relevant information about the field that is
119 // instantiated.
120 const auto *FieldParent =
121 dyn_cast_or_null<CXXRecordDecl>(Field->getParent());
122 if (!FieldParent)
123 return Field->getCanonicalDecl();
124 FieldParent = FieldParent->getTemplateInstantiationPattern();
125 // Field is not instantiation.
126 if (!FieldParent || Field->getParent() == FieldParent)
127 return Field->getCanonicalDecl();
128 for (const FieldDecl *Candidate : FieldParent->fields())
129 if (Field->getDeclName() == Candidate->getDeclName())
130 return Candidate->getCanonicalDecl();
131 elog("FieldParent should have field with the same name as Field.");
132 }
133 if (const auto *VD = dyn_cast<VarDecl>(D)) {
134 if (const VarDecl *OriginalVD = VD->getInstantiatedFromStaticDataMember())
135 VD = OriginalVD;
136 return VD->getCanonicalDecl();
137 }
138 return dyn_cast<NamedDecl>(D->getCanonicalDecl());
139 }
140
locateDeclAt(ParsedAST & AST,SourceLocation TokenStartLoc)141 llvm::DenseSet<const NamedDecl *> locateDeclAt(ParsedAST &AST,
142 SourceLocation TokenStartLoc) {
143 unsigned Offset =
144 AST.getSourceManager().getDecomposedSpellingLoc(TokenStartLoc).second;
145
146 SelectionTree Selection = SelectionTree::createRight(
147 AST.getASTContext(), AST.getTokens(), Offset, Offset);
148 const SelectionTree::Node *SelectedNode = Selection.commonAncestor();
149 if (!SelectedNode)
150 return {};
151
152 llvm::DenseSet<const NamedDecl *> Result;
153 for (const NamedDecl *D :
154 targetDecl(SelectedNode->ASTNode,
155 DeclRelation::Alias | DeclRelation::TemplatePattern,
156 AST.getHeuristicResolver())) {
157 Result.insert(canonicalRenameDecl(D));
158 }
159 return Result;
160 }
161
162 // By default, we exclude C++ standard symbols and protobuf symbols as rename
163 // these symbols would change system/generated files which are unlikely to be
164 // modified.
isExcluded(const NamedDecl & RenameDecl)165 bool isExcluded(const NamedDecl &RenameDecl) {
166 if (isProtoFile(RenameDecl.getLocation(),
167 RenameDecl.getASTContext().getSourceManager()))
168 return true;
169 static const auto *StdSymbols = new llvm::DenseSet<llvm::StringRef>({
170 #define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name},
171 #include "StdSymbolMap.inc"
172 #undef SYMBOL
173 });
174 return StdSymbols->count(printQualifiedName(RenameDecl));
175 }
176
177 enum class ReasonToReject {
178 NoSymbolFound,
179 NoIndexProvided,
180 NonIndexable,
181 UnsupportedSymbol,
182 AmbiguousSymbol,
183
184 // name validation. FIXME: reconcile with InvalidName
185 SameName,
186 };
187
renameable(const NamedDecl & RenameDecl,StringRef MainFilePath,const SymbolIndex * Index)188 llvm::Optional<ReasonToReject> renameable(const NamedDecl &RenameDecl,
189 StringRef MainFilePath,
190 const SymbolIndex *Index) {
191 trace::Span Tracer("Renameable");
192 // Filter out symbols that are unsupported in both rename modes.
193 if (llvm::isa<NamespaceDecl>(&RenameDecl))
194 return ReasonToReject::UnsupportedSymbol;
195 if (const auto *FD = llvm::dyn_cast<FunctionDecl>(&RenameDecl)) {
196 if (FD->isOverloadedOperator())
197 return ReasonToReject::UnsupportedSymbol;
198 }
199 // function-local symbols is safe to rename.
200 if (RenameDecl.getParentFunctionOrMethod())
201 return None;
202
203 if (isExcluded(RenameDecl))
204 return ReasonToReject::UnsupportedSymbol;
205
206 // Check whether the symbol being rename is indexable.
207 auto &ASTCtx = RenameDecl.getASTContext();
208 bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts());
209 bool DeclaredInMainFile =
210 isInsideMainFile(RenameDecl.getBeginLoc(), ASTCtx.getSourceManager());
211 bool IsMainFileOnly = true;
212 if (MainFileIsHeader)
213 // main file is a header, the symbol can't be main file only.
214 IsMainFileOnly = false;
215 else if (!DeclaredInMainFile)
216 IsMainFileOnly = false;
217 // If the symbol is not indexable, we disallow rename.
218 if (!SymbolCollector::shouldCollectSymbol(
219 RenameDecl, RenameDecl.getASTContext(), SymbolCollector::Options(),
220 IsMainFileOnly))
221 return ReasonToReject::NonIndexable;
222
223
224 // FIXME: Renaming virtual methods requires to rename all overridens in
225 // subclasses, our index doesn't have this information.
226 if (const auto *S = llvm::dyn_cast<CXXMethodDecl>(&RenameDecl)) {
227 if (S->isVirtual())
228 return ReasonToReject::UnsupportedSymbol;
229 }
230 return None;
231 }
232
makeError(ReasonToReject Reason)233 llvm::Error makeError(ReasonToReject Reason) {
234 auto Message = [](ReasonToReject Reason) {
235 switch (Reason) {
236 case ReasonToReject::NoSymbolFound:
237 return "there is no symbol at the given location";
238 case ReasonToReject::NoIndexProvided:
239 return "no index provided";
240 case ReasonToReject::NonIndexable:
241 return "symbol may be used in other files (not eligible for indexing)";
242 case ReasonToReject::UnsupportedSymbol:
243 return "symbol is not a supported kind (e.g. namespace, macro)";
244 case ReasonToReject::AmbiguousSymbol:
245 return "there are multiple symbols at the given location";
246 case ReasonToReject::SameName:
247 return "new name is the same as the old name";
248 }
249 llvm_unreachable("unhandled reason kind");
250 };
251 return error("Cannot rename symbol: {0}", Message(Reason));
252 }
253
254 // Return all rename occurrences in the main file.
findOccurrencesWithinFile(ParsedAST & AST,const NamedDecl & ND)255 std::vector<SourceLocation> findOccurrencesWithinFile(ParsedAST &AST,
256 const NamedDecl &ND) {
257 trace::Span Tracer("FindOccurrencesWithinFile");
258 assert(canonicalRenameDecl(&ND) == &ND &&
259 "ND should be already canonicalized.");
260
261 std::vector<SourceLocation> Results;
262 for (Decl *TopLevelDecl : AST.getLocalTopLevelDecls()) {
263 findExplicitReferences(
264 TopLevelDecl,
265 [&](ReferenceLoc Ref) {
266 if (Ref.Targets.empty())
267 return;
268 for (const auto *Target : Ref.Targets) {
269 if (canonicalRenameDecl(Target) == &ND) {
270 Results.push_back(Ref.NameLoc);
271 return;
272 }
273 }
274 },
275 AST.getHeuristicResolver());
276 }
277
278 return Results;
279 }
280
281 // Detect name conflict with othter DeclStmts in the same enclosing scope.
lookupSiblingWithinEnclosingScope(ASTContext & Ctx,const NamedDecl & RenamedDecl,StringRef NewName)282 const NamedDecl *lookupSiblingWithinEnclosingScope(ASTContext &Ctx,
283 const NamedDecl &RenamedDecl,
284 StringRef NewName) {
285 // Store Parents list outside of GetSingleParent, so that returned pointer is
286 // not invalidated.
287 DynTypedNodeList Storage(DynTypedNode::create(RenamedDecl));
288 auto GetSingleParent = [&](const DynTypedNode &Node) -> const DynTypedNode * {
289 Storage = Ctx.getParents(Node);
290 return (Storage.size() == 1) ? Storage.begin() : nullptr;
291 };
292
293 // We need to get to the enclosing scope: NamedDecl's parent is typically
294 // DeclStmt (or FunctionProtoTypeLoc in case of function arguments), so
295 // enclosing scope would be the second order parent.
296 const auto *Parent = GetSingleParent(DynTypedNode::create(RenamedDecl));
297 if (!Parent || !(Parent->get<DeclStmt>() || Parent->get<TypeLoc>()))
298 return nullptr;
299 Parent = GetSingleParent(*Parent);
300
301 // The following helpers check corresponding AST nodes for variable
302 // declarations with the name collision.
303 auto CheckDeclStmt = [&](const DeclStmt *DS,
304 StringRef Name) -> const NamedDecl * {
305 if (!DS)
306 return nullptr;
307 for (const auto &Child : DS->getDeclGroup())
308 if (const auto *ND = dyn_cast<NamedDecl>(Child))
309 if (ND != &RenamedDecl && ND->getName() == Name)
310 return ND;
311 return nullptr;
312 };
313 auto CheckCompoundStmt = [&](const Stmt *S,
314 StringRef Name) -> const NamedDecl * {
315 if (const auto *CS = dyn_cast_or_null<CompoundStmt>(S))
316 for (const auto *Node : CS->children())
317 if (const auto *Result = CheckDeclStmt(dyn_cast<DeclStmt>(Node), Name))
318 return Result;
319 return nullptr;
320 };
321 auto CheckConditionVariable = [&](const auto *Scope,
322 StringRef Name) -> const NamedDecl * {
323 if (!Scope)
324 return nullptr;
325 return CheckDeclStmt(Scope->getConditionVariableDeclStmt(), Name);
326 };
327
328 // CompoundStmt is the most common enclosing scope for function-local symbols
329 // In the simplest case we just iterate through sibling DeclStmts and check
330 // for collisions.
331 if (const auto *EnclosingCS = Parent->get<CompoundStmt>()) {
332 if (const auto *Result = CheckCompoundStmt(EnclosingCS, NewName))
333 return Result;
334 const auto *ScopeParent = GetSingleParent(*Parent);
335 // CompoundStmt may be found within if/while/for. In these cases, rename can
336 // collide with the init-statement variable decalaration, they should be
337 // checked.
338 if (const auto *Result =
339 CheckConditionVariable(ScopeParent->get<IfStmt>(), NewName))
340 return Result;
341 if (const auto *Result =
342 CheckConditionVariable(ScopeParent->get<WhileStmt>(), NewName))
343 return Result;
344 if (const auto *For = ScopeParent->get<ForStmt>())
345 if (const auto *Result = CheckDeclStmt(
346 dyn_cast_or_null<DeclStmt>(For->getInit()), NewName))
347 return Result;
348 // Also check if there is a name collision with function arguments.
349 if (const auto *Function = ScopeParent->get<FunctionDecl>())
350 for (const auto *Parameter : Function->parameters())
351 if (Parameter->getName() == NewName)
352 return Parameter;
353 return nullptr;
354 }
355
356 // When renaming a variable within init-statement within if/while/for
357 // condition, also check the CompoundStmt in the body.
358 if (const auto *EnclosingIf = Parent->get<IfStmt>()) {
359 if (const auto *Result = CheckCompoundStmt(EnclosingIf->getElse(), NewName))
360 return Result;
361 return CheckCompoundStmt(EnclosingIf->getThen(), NewName);
362 }
363 if (const auto *EnclosingWhile = Parent->get<WhileStmt>())
364 return CheckCompoundStmt(EnclosingWhile->getBody(), NewName);
365 if (const auto *EnclosingFor = Parent->get<ForStmt>()) {
366 // Check for conflicts with other declarations within initialization
367 // statement.
368 if (const auto *Result = CheckDeclStmt(
369 dyn_cast_or_null<DeclStmt>(EnclosingFor->getInit()), NewName))
370 return Result;
371 return CheckCompoundStmt(EnclosingFor->getBody(), NewName);
372 }
373 if (const auto *EnclosingFunction = Parent->get<FunctionDecl>()) {
374 // Check for conflicts with other arguments.
375 for (const auto *Parameter : EnclosingFunction->parameters())
376 if (Parameter != &RenamedDecl && Parameter->getName() == NewName)
377 return Parameter;
378 // FIXME: We don't modify all references to function parameters when
379 // renaming from forward declaration now, so using a name colliding with
380 // something in the definition's body is a valid transformation.
381 if (!EnclosingFunction->doesThisDeclarationHaveABody())
382 return nullptr;
383 return CheckCompoundStmt(EnclosingFunction->getBody(), NewName);
384 }
385
386 return nullptr;
387 }
388
389 // Lookup the declarations (if any) with the given Name in the context of
390 // RenameDecl.
lookupSiblingsWithinContext(ASTContext & Ctx,const NamedDecl & RenamedDecl,llvm::StringRef NewName)391 const NamedDecl *lookupSiblingsWithinContext(ASTContext &Ctx,
392 const NamedDecl &RenamedDecl,
393 llvm::StringRef NewName) {
394 const auto &II = Ctx.Idents.get(NewName);
395 DeclarationName LookupName(&II);
396 DeclContextLookupResult LookupResult;
397 const auto *DC = RenamedDecl.getDeclContext();
398 while (DC && DC->isTransparentContext())
399 DC = DC->getParent();
400 switch (DC->getDeclKind()) {
401 // The enclosing DeclContext may not be the enclosing scope, it might have
402 // false positives and negatives, so we only choose "confident" DeclContexts
403 // that don't have any subscopes that are neither DeclContexts nor
404 // transparent.
405 //
406 // Notably, FunctionDecl is excluded -- because local variables are not scoped
407 // to the function, but rather to the CompoundStmt that is its body. Lookup
408 // will not find function-local variables.
409 case Decl::TranslationUnit:
410 case Decl::Namespace:
411 case Decl::Record:
412 case Decl::Enum:
413 case Decl::CXXRecord:
414 LookupResult = DC->lookup(LookupName);
415 break;
416 default:
417 break;
418 }
419 // Lookup may contain the RenameDecl itself, exclude it.
420 for (const auto *D : LookupResult)
421 if (D->getCanonicalDecl() != RenamedDecl.getCanonicalDecl())
422 return D;
423 return nullptr;
424 }
425
lookupSiblingWithName(ASTContext & Ctx,const NamedDecl & RenamedDecl,llvm::StringRef NewName)426 const NamedDecl *lookupSiblingWithName(ASTContext &Ctx,
427 const NamedDecl &RenamedDecl,
428 llvm::StringRef NewName) {
429 trace::Span Tracer("LookupSiblingWithName");
430 if (const auto *Result =
431 lookupSiblingsWithinContext(Ctx, RenamedDecl, NewName))
432 return Result;
433 return lookupSiblingWithinEnclosingScope(Ctx, RenamedDecl, NewName);
434 }
435
436 struct InvalidName {
437 enum Kind {
438 Keywords,
439 Conflict,
440 BadIdentifier,
441 };
442 Kind K;
443 std::string Details;
444 };
toString(InvalidName::Kind K)445 std::string toString(InvalidName::Kind K) {
446 switch (K) {
447 case InvalidName::Keywords:
448 return "Keywords";
449 case InvalidName::Conflict:
450 return "Conflict";
451 case InvalidName::BadIdentifier:
452 return "BadIdentifier";
453 }
454 llvm_unreachable("unhandled InvalidName kind");
455 }
456
makeError(InvalidName Reason)457 llvm::Error makeError(InvalidName Reason) {
458 auto Message = [](InvalidName Reason) {
459 switch (Reason.K) {
460 case InvalidName::Keywords:
461 return llvm::formatv("the chosen name \"{0}\" is a keyword",
462 Reason.Details);
463 case InvalidName::Conflict:
464 return llvm::formatv("conflict with the symbol in {0}", Reason.Details);
465 case InvalidName::BadIdentifier:
466 return llvm::formatv("the chosen name \"{0}\" is not a valid identifier",
467 Reason.Details);
468 }
469 llvm_unreachable("unhandled InvalidName kind");
470 };
471 return error("invalid name: {0}", Message(Reason));
472 }
473
mayBeValidIdentifier(llvm::StringRef Ident)474 static bool mayBeValidIdentifier(llvm::StringRef Ident) {
475 assert(llvm::json::isUTF8(Ident));
476 if (Ident.empty())
477 return false;
478 // We don't check all the rules for non-ascii characters (most are allowed).
479 bool AllowDollar = true; // lenient
480 if (llvm::isASCII(Ident.front()) &&
481 !isIdentifierHead(Ident.front(), AllowDollar))
482 return false;
483 for (char C : Ident) {
484 if (llvm::isASCII(C) && !isIdentifierBody(C, AllowDollar))
485 return false;
486 }
487 return true;
488 }
489
490 // Check if we can rename the given RenameDecl into NewName.
491 // Return details if the rename would produce a conflict.
checkName(const NamedDecl & RenameDecl,llvm::StringRef NewName)492 llvm::Optional<InvalidName> checkName(const NamedDecl &RenameDecl,
493 llvm::StringRef NewName) {
494 trace::Span Tracer("CheckName");
495 static constexpr trace::Metric InvalidNameMetric(
496 "rename_name_invalid", trace::Metric::Counter, "invalid_kind");
497 auto &ASTCtx = RenameDecl.getASTContext();
498 llvm::Optional<InvalidName> Result;
499 if (isKeyword(NewName, ASTCtx.getLangOpts()))
500 Result = InvalidName{InvalidName::Keywords, NewName.str()};
501 else if (!mayBeValidIdentifier(NewName))
502 Result = InvalidName{InvalidName::BadIdentifier, NewName.str()};
503 else {
504 // Name conflict detection.
505 // Function conflicts are subtle (overloading), so ignore them.
506 if (RenameDecl.getKind() != Decl::Function) {
507 if (auto *Conflict = lookupSiblingWithName(ASTCtx, RenameDecl, NewName))
508 Result = InvalidName{
509 InvalidName::Conflict,
510 Conflict->getLocation().printToString(ASTCtx.getSourceManager())};
511 }
512 }
513 if (Result)
514 InvalidNameMetric.record(1, toString(Result->K));
515 return Result;
516 }
517
518 // AST-based rename, it renames all occurrences in the main file.
519 llvm::Expected<tooling::Replacements>
renameWithinFile(ParsedAST & AST,const NamedDecl & RenameDecl,llvm::StringRef NewName)520 renameWithinFile(ParsedAST &AST, const NamedDecl &RenameDecl,
521 llvm::StringRef NewName) {
522 trace::Span Tracer("RenameWithinFile");
523 const SourceManager &SM = AST.getSourceManager();
524
525 tooling::Replacements FilteredChanges;
526 for (SourceLocation Loc : findOccurrencesWithinFile(AST, RenameDecl)) {
527 SourceLocation RenameLoc = Loc;
528 // We don't rename in any macro bodies, but we allow rename the symbol
529 // spelled in a top-level macro argument in the main file.
530 if (RenameLoc.isMacroID()) {
531 if (isInMacroBody(SM, RenameLoc))
532 continue;
533 RenameLoc = SM.getSpellingLoc(Loc);
534 }
535 // Filter out locations not from main file.
536 // We traverse only main file decls, but locations could come from an
537 // non-preamble #include file e.g.
538 // void test() {
539 // int f^oo;
540 // #include "use_foo.inc"
541 // }
542 if (!isInsideMainFile(RenameLoc, SM))
543 continue;
544 if (auto Err = FilteredChanges.add(tooling::Replacement(
545 SM, CharSourceRange::getTokenRange(RenameLoc), NewName)))
546 return std::move(Err);
547 }
548 return FilteredChanges;
549 }
550
toRange(const SymbolLocation & L)551 Range toRange(const SymbolLocation &L) {
552 Range R;
553 R.start.line = L.Start.line();
554 R.start.character = L.Start.column();
555 R.end.line = L.End.line();
556 R.end.character = L.End.column();
557 return R;
558 }
559
560 // Return all rename occurrences (using the index) outside of the main file,
561 // grouped by the absolute file path.
562 llvm::Expected<llvm::StringMap<std::vector<Range>>>
findOccurrencesOutsideFile(const NamedDecl & RenameDecl,llvm::StringRef MainFile,const SymbolIndex & Index,size_t MaxLimitFiles)563 findOccurrencesOutsideFile(const NamedDecl &RenameDecl,
564 llvm::StringRef MainFile, const SymbolIndex &Index,
565 size_t MaxLimitFiles) {
566 trace::Span Tracer("FindOccurrencesOutsideFile");
567 RefsRequest RQuest;
568 RQuest.IDs.insert(getSymbolID(&RenameDecl));
569
570 // Absolute file path => rename occurrences in that file.
571 llvm::StringMap<std::vector<Range>> AffectedFiles;
572 bool HasMore = Index.refs(RQuest, [&](const Ref &R) {
573 if (AffectedFiles.size() >= MaxLimitFiles)
574 return;
575 if ((R.Kind & RefKind::Spelled) == RefKind::Unknown)
576 return;
577 if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) {
578 if (!pathEqual(*RefFilePath, MainFile))
579 AffectedFiles[*RefFilePath].push_back(toRange(R.Location));
580 }
581 });
582
583 if (AffectedFiles.size() >= MaxLimitFiles)
584 return error("The number of affected files exceeds the max limit {0}",
585 MaxLimitFiles);
586 if (HasMore)
587 return error("The symbol {0} has too many occurrences",
588 RenameDecl.getQualifiedNameAsString());
589 // Sort and deduplicate the results, in case that index returns duplications.
590 for (auto &FileAndOccurrences : AffectedFiles) {
591 auto &Ranges = FileAndOccurrences.getValue();
592 llvm::sort(Ranges);
593 Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
594
595 SPAN_ATTACH(Tracer, FileAndOccurrences.first(),
596 static_cast<int64_t>(Ranges.size()));
597 }
598 return AffectedFiles;
599 }
600
601 // Index-based rename, it renames all occurrences outside of the main file.
602 //
603 // The cross-file rename is purely based on the index, as we don't want to
604 // build all ASTs for affected files, which may cause a performance hit.
605 // We choose to trade off some correctness for performance and scalability.
606 //
607 // Clangd builds a dynamic index for all opened files on top of the static
608 // index of the whole codebase. Dynamic index is up-to-date (respects dirty
609 // buffers) as long as clangd finishes processing opened files, while static
610 // index (background index) is relatively stale. We choose the dirty buffers
611 // as the file content we rename on, and fallback to file content on disk if
612 // there is no dirty buffer.
613 llvm::Expected<FileEdits>
renameOutsideFile(const NamedDecl & RenameDecl,llvm::StringRef MainFilePath,llvm::StringRef NewName,const SymbolIndex & Index,size_t MaxLimitFiles,llvm::vfs::FileSystem & FS)614 renameOutsideFile(const NamedDecl &RenameDecl, llvm::StringRef MainFilePath,
615 llvm::StringRef NewName, const SymbolIndex &Index,
616 size_t MaxLimitFiles, llvm::vfs::FileSystem &FS) {
617 trace::Span Tracer("RenameOutsideFile");
618 auto AffectedFiles = findOccurrencesOutsideFile(RenameDecl, MainFilePath,
619 Index, MaxLimitFiles);
620 if (!AffectedFiles)
621 return AffectedFiles.takeError();
622 FileEdits Results;
623 for (auto &FileAndOccurrences : *AffectedFiles) {
624 llvm::StringRef FilePath = FileAndOccurrences.first();
625
626 auto ExpBuffer = FS.getBufferForFile(FilePath);
627 if (!ExpBuffer) {
628 elog("Fail to read file content: Fail to open file {0}: {1}", FilePath,
629 ExpBuffer.getError().message());
630 continue;
631 }
632
633 auto AffectedFileCode = (*ExpBuffer)->getBuffer();
634 auto RenameRanges =
635 adjustRenameRanges(AffectedFileCode, RenameDecl.getNameAsString(),
636 std::move(FileAndOccurrences.second),
637 RenameDecl.getASTContext().getLangOpts());
638 if (!RenameRanges) {
639 // Our heuristics fails to adjust rename ranges to the current state of
640 // the file, it is most likely the index is stale, so we give up the
641 // entire rename.
642 return error("Index results don't match the content of file {0} "
643 "(the index may be stale)",
644 FilePath);
645 }
646 auto RenameEdit =
647 buildRenameEdit(FilePath, AffectedFileCode, *RenameRanges, NewName);
648 if (!RenameEdit)
649 return error("failed to rename in file {0}: {1}", FilePath,
650 RenameEdit.takeError());
651 if (!RenameEdit->Replacements.empty())
652 Results.insert({FilePath, std::move(*RenameEdit)});
653 }
654 return Results;
655 }
656
657 // A simple edit is either changing line or column, but not both.
impliesSimpleEdit(const Position & LHS,const Position & RHS)658 bool impliesSimpleEdit(const Position &LHS, const Position &RHS) {
659 return LHS.line == RHS.line || LHS.character == RHS.character;
660 }
661
662 // Performs a DFS to enumerate all possible near-miss matches.
663 // It finds the locations where the indexed occurrences are now spelled in
664 // Lexed occurrences, a near miss is defined as:
665 // - a near miss maps all of the **name** occurrences from the index onto a
666 // *subset* of lexed occurrences (we allow a single name refers to more
667 // than one symbol)
668 // - all indexed occurrences must be mapped, and Result must be distinct and
669 // preserve order (only support detecting simple edits to ensure a
670 // robust mapping)
671 // - each indexed -> lexed occurrences mapping correspondence may change the
672 // *line* or *column*, but not both (increases chance of a robust mapping)
findNearMiss(std::vector<size_t> & PartialMatch,ArrayRef<Range> IndexedRest,ArrayRef<Range> LexedRest,int LexedIndex,int & Fuel,llvm::function_ref<void (const std::vector<size_t> &)> MatchedCB)673 void findNearMiss(
674 std::vector<size_t> &PartialMatch, ArrayRef<Range> IndexedRest,
675 ArrayRef<Range> LexedRest, int LexedIndex, int &Fuel,
676 llvm::function_ref<void(const std::vector<size_t> &)> MatchedCB) {
677 if (--Fuel < 0)
678 return;
679 if (IndexedRest.size() > LexedRest.size())
680 return;
681 if (IndexedRest.empty()) {
682 MatchedCB(PartialMatch);
683 return;
684 }
685 if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) {
686 PartialMatch.push_back(LexedIndex);
687 findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(),
688 LexedIndex + 1, Fuel, MatchedCB);
689 PartialMatch.pop_back();
690 }
691 findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(),
692 LexedIndex + 1, Fuel, MatchedCB);
693 }
694
695 } // namespace
696
rename(const RenameInputs & RInputs)697 llvm::Expected<RenameResult> rename(const RenameInputs &RInputs) {
698 assert(!RInputs.Index == !RInputs.FS &&
699 "Index and FS must either both be specified or both null.");
700 trace::Span Tracer("Rename flow");
701 const auto &Opts = RInputs.Opts;
702 ParsedAST &AST = RInputs.AST;
703 const SourceManager &SM = AST.getSourceManager();
704 llvm::StringRef MainFileCode = SM.getBufferData(SM.getMainFileID());
705 // Try to find the tokens adjacent to the cursor position.
706 auto Loc = sourceLocationInMainFile(SM, RInputs.Pos);
707 if (!Loc)
708 return Loc.takeError();
709 const syntax::Token *IdentifierToken =
710 spelledIdentifierTouching(*Loc, AST.getTokens());
711
712 // Renames should only triggered on identifiers.
713 if (!IdentifierToken)
714 return makeError(ReasonToReject::NoSymbolFound);
715 Range CurrentIdentifier = halfOpenToRange(
716 SM, CharSourceRange::getCharRange(IdentifierToken->location(),
717 IdentifierToken->endLocation()));
718 // FIXME: Renaming macros is not supported yet, the macro-handling code should
719 // be moved to rename tooling library.
720 if (locateMacroAt(*IdentifierToken, AST.getPreprocessor()))
721 return makeError(ReasonToReject::UnsupportedSymbol);
722
723 auto DeclsUnderCursor = locateDeclAt(AST, IdentifierToken->location());
724 if (DeclsUnderCursor.empty())
725 return makeError(ReasonToReject::NoSymbolFound);
726 if (DeclsUnderCursor.size() > 1)
727 return makeError(ReasonToReject::AmbiguousSymbol);
728 const auto &RenameDecl = **DeclsUnderCursor.begin();
729 const auto *ID = RenameDecl.getIdentifier();
730 if (!ID)
731 return makeError(ReasonToReject::UnsupportedSymbol);
732 if (ID->getName() == RInputs.NewName)
733 return makeError(ReasonToReject::SameName);
734 auto Invalid = checkName(RenameDecl, RInputs.NewName);
735 if (Invalid)
736 return makeError(*Invalid);
737
738 auto Reject = renameable(RenameDecl, RInputs.MainFilePath, RInputs.Index);
739 if (Reject)
740 return makeError(*Reject);
741
742 // We have two implementations of the rename:
743 // - AST-based rename: used for renaming local symbols, e.g. variables
744 // defined in a function body;
745 // - index-based rename: used for renaming non-local symbols, and not
746 // feasible for local symbols (as by design our index don't index these
747 // symbols by design;
748 // To make cross-file rename work for local symbol, we use a hybrid solution:
749 // - run AST-based rename on the main file;
750 // - run index-based rename on other affected files;
751 auto MainFileRenameEdit = renameWithinFile(AST, RenameDecl, RInputs.NewName);
752 if (!MainFileRenameEdit)
753 return MainFileRenameEdit.takeError();
754
755 // Check the rename-triggering location is actually being renamed.
756 // This is a robustness check to avoid surprising rename results -- if the
757 // the triggering location is not actually the name of the node we identified
758 // (e.g. for broken code), then rename is likely not what users expect, so we
759 // reject this kind of rename.
760 auto StartOffset = positionToOffset(MainFileCode, CurrentIdentifier.start);
761 auto EndOffset = positionToOffset(MainFileCode, CurrentIdentifier.end);
762 if (!StartOffset)
763 return StartOffset.takeError();
764 if (!EndOffset)
765 return EndOffset.takeError();
766 if (llvm::find_if(
767 *MainFileRenameEdit,
768 [&StartOffset, &EndOffset](const clang::tooling::Replacement &R) {
769 return R.getOffset() == *StartOffset &&
770 R.getLength() == *EndOffset - *StartOffset;
771 }) == MainFileRenameEdit->end()) {
772 return makeError(ReasonToReject::NoSymbolFound);
773 }
774 RenameResult Result;
775 Result.Target = CurrentIdentifier;
776 Edit MainFileEdits = Edit(MainFileCode, std::move(*MainFileRenameEdit));
777 llvm::for_each(MainFileEdits.asTextEdits(), [&Result](const TextEdit &TE) {
778 Result.LocalChanges.push_back(TE.range);
779 });
780
781 // return the main file edit if this is a within-file rename or the symbol
782 // being renamed is function local.
783 if (RenameDecl.getParentFunctionOrMethod()) {
784 Result.GlobalChanges = FileEdits(
785 {std::make_pair(RInputs.MainFilePath, std::move(MainFileEdits))});
786 return Result;
787 }
788
789 // If the index is nullptr, we don't know the completeness of the result, so
790 // we don't populate the field GlobalChanges.
791 if (!RInputs.Index) {
792 assert(Result.GlobalChanges.empty());
793 return Result;
794 }
795
796 auto OtherFilesEdits = renameOutsideFile(
797 RenameDecl, RInputs.MainFilePath, RInputs.NewName, *RInputs.Index,
798 Opts.LimitFiles == 0 ? std::numeric_limits<size_t>::max()
799 : Opts.LimitFiles,
800 *RInputs.FS);
801 if (!OtherFilesEdits)
802 return OtherFilesEdits.takeError();
803 Result.GlobalChanges = *OtherFilesEdits;
804 // Attach the rename edits for the main file.
805 Result.GlobalChanges.try_emplace(RInputs.MainFilePath,
806 std::move(MainFileEdits));
807 return Result;
808 }
809
buildRenameEdit(llvm::StringRef AbsFilePath,llvm::StringRef InitialCode,std::vector<Range> Occurrences,llvm::StringRef NewName)810 llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
811 llvm::StringRef InitialCode,
812 std::vector<Range> Occurrences,
813 llvm::StringRef NewName) {
814 trace::Span Tracer("BuildRenameEdit");
815 SPAN_ATTACH(Tracer, "file_path", AbsFilePath);
816 SPAN_ATTACH(Tracer, "rename_occurrences",
817 static_cast<int64_t>(Occurrences.size()));
818
819 assert(std::is_sorted(Occurrences.begin(), Occurrences.end()));
820 assert(std::unique(Occurrences.begin(), Occurrences.end()) ==
821 Occurrences.end() &&
822 "Occurrences must be unique");
823
824 // These two always correspond to the same position.
825 Position LastPos{0, 0};
826 size_t LastOffset = 0;
827
828 auto Offset = [&](const Position &P) -> llvm::Expected<size_t> {
829 assert(LastPos <= P && "malformed input");
830 Position Shifted = {
831 P.line - LastPos.line,
832 P.line > LastPos.line ? P.character : P.character - LastPos.character};
833 auto ShiftedOffset =
834 positionToOffset(InitialCode.substr(LastOffset), Shifted);
835 if (!ShiftedOffset)
836 return error("fail to convert the position {0} to offset ({1})", P,
837 ShiftedOffset.takeError());
838 LastPos = P;
839 LastOffset += *ShiftedOffset;
840 return LastOffset;
841 };
842
843 std::vector<std::pair</*start*/ size_t, /*end*/ size_t>> OccurrencesOffsets;
844 for (const auto &R : Occurrences) {
845 auto StartOffset = Offset(R.start);
846 if (!StartOffset)
847 return StartOffset.takeError();
848 auto EndOffset = Offset(R.end);
849 if (!EndOffset)
850 return EndOffset.takeError();
851 OccurrencesOffsets.push_back({*StartOffset, *EndOffset});
852 }
853
854 tooling::Replacements RenameEdit;
855 for (const auto &R : OccurrencesOffsets) {
856 auto ByteLength = R.second - R.first;
857 if (auto Err = RenameEdit.add(
858 tooling::Replacement(AbsFilePath, R.first, ByteLength, NewName)))
859 return std::move(Err);
860 }
861 return Edit(InitialCode, std::move(RenameEdit));
862 }
863
864 // Details:
865 // - lex the draft code to get all rename candidates, this yields a superset
866 // of candidates.
867 // - apply range patching heuristics to generate "authoritative" occurrences,
868 // cases we consider:
869 // (a) index returns a subset of candidates, we use the indexed results.
870 // - fully equal, we are sure the index is up-to-date
871 // - proper subset, index is correct in most cases? there may be false
872 // positives (e.g. candidates got appended), but rename is still safe
873 // (b) index returns non-candidate results, we attempt to map the indexed
874 // ranges onto candidates in a plausible way (e.g. guess that lines
875 // were inserted). If such a "near miss" is found, the rename is still
876 // possible
877 llvm::Optional<std::vector<Range>>
adjustRenameRanges(llvm::StringRef DraftCode,llvm::StringRef Identifier,std::vector<Range> Indexed,const LangOptions & LangOpts)878 adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
879 std::vector<Range> Indexed, const LangOptions &LangOpts) {
880 trace::Span Tracer("AdjustRenameRanges");
881 assert(!Indexed.empty());
882 assert(std::is_sorted(Indexed.begin(), Indexed.end()));
883 std::vector<Range> Lexed =
884 collectIdentifierRanges(Identifier, DraftCode, LangOpts);
885 llvm::sort(Lexed);
886 return getMappedRanges(Indexed, Lexed);
887 }
888
getMappedRanges(ArrayRef<Range> Indexed,ArrayRef<Range> Lexed)889 llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
890 ArrayRef<Range> Lexed) {
891 trace::Span Tracer("GetMappedRanges");
892 assert(!Indexed.empty());
893 assert(std::is_sorted(Indexed.begin(), Indexed.end()));
894 assert(std::is_sorted(Lexed.begin(), Lexed.end()));
895
896 if (Indexed.size() > Lexed.size()) {
897 vlog("The number of lexed occurrences is less than indexed occurrences");
898 SPAN_ATTACH(
899 Tracer, "error",
900 "The number of lexed occurrences is less than indexed occurrences");
901 return llvm::None;
902 }
903 // Fast check for the special subset case.
904 if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end()))
905 return Indexed.vec();
906
907 std::vector<size_t> Best;
908 size_t BestCost = std::numeric_limits<size_t>::max();
909 bool HasMultiple = 0;
910 std::vector<size_t> ResultStorage;
911 int Fuel = 10000;
912 findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel,
913 [&](const std::vector<size_t> &Matched) {
914 size_t MCost =
915 renameRangeAdjustmentCost(Indexed, Lexed, Matched);
916 if (MCost < BestCost) {
917 BestCost = MCost;
918 Best = std::move(Matched);
919 HasMultiple = false; // reset
920 return;
921 }
922 if (MCost == BestCost)
923 HasMultiple = true;
924 });
925 if (HasMultiple) {
926 vlog("The best near miss is not unique.");
927 SPAN_ATTACH(Tracer, "error", "The best near miss is not unique");
928 return llvm::None;
929 }
930 if (Best.empty()) {
931 vlog("Didn't find a near miss.");
932 SPAN_ATTACH(Tracer, "error", "Didn't find a near miss");
933 return llvm::None;
934 }
935 std::vector<Range> Mapped;
936 for (auto I : Best)
937 Mapped.push_back(Lexed[I]);
938 SPAN_ATTACH(Tracer, "mapped_ranges", static_cast<int64_t>(Mapped.size()));
939 return Mapped;
940 }
941
942 // The cost is the sum of the implied edit sizes between successive diffs, only
943 // simple edits are considered:
944 // - insert/remove a line (change line offset)
945 // - insert/remove a character on an existing line (change column offset)
946 //
947 // Example I, total result is 1 + 1 = 2.
948 // diff[0]: line + 1 <- insert a line before edit 0.
949 // diff[1]: line + 1
950 // diff[2]: line + 1
951 // diff[3]: line + 2 <- insert a line before edits 2 and 3.
952 //
953 // Example II, total result is 1 + 1 + 1 = 3.
954 // diff[0]: line + 1 <- insert a line before edit 0.
955 // diff[1]: column + 1 <- remove a line between edits 0 and 1, and insert a
956 // character on edit 1.
renameRangeAdjustmentCost(ArrayRef<Range> Indexed,ArrayRef<Range> Lexed,ArrayRef<size_t> MappedIndex)957 size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
958 ArrayRef<size_t> MappedIndex) {
959 assert(Indexed.size() == MappedIndex.size());
960 assert(std::is_sorted(Indexed.begin(), Indexed.end()));
961 assert(std::is_sorted(Lexed.begin(), Lexed.end()));
962
963 int LastLine = -1;
964 int LastDLine = 0, LastDColumn = 0;
965 int Cost = 0;
966 for (size_t I = 0; I < Indexed.size(); ++I) {
967 int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line;
968 int DColumn =
969 Indexed[I].start.character - Lexed[MappedIndex[I]].start.character;
970 int Line = Indexed[I].start.line;
971 if (Line != LastLine)
972 LastDColumn = 0; // column offsets don't carry cross lines.
973 Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn);
974 std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn);
975 }
976 return Cost;
977 }
978
979 } // namespace clangd
980 } // namespace clang
981