1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ParsedAST.h"
9 #include "Protocol.h"
10 #include "Selection.h"
11 #include "SourceCode.h"
12 #include "refactor/Tweak.h"
13 #include "support/Logger.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/OperationKinds.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtCXX.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Tooling/Core/Replacement.h"
25 #include "llvm/ADT/None.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/Error.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 namespace clang {
33 namespace clangd {
34 namespace {
35 // information regarding the Expr that is being extracted
36 class ExtractionContext {
37 public:
38 ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM,
39 const ASTContext &Ctx);
getExpr() const40 const clang::Expr *getExpr() const { return Expr; }
getExprNode() const41 const SelectionTree::Node *getExprNode() const { return ExprNode; }
isExtractable() const42 bool isExtractable() const { return Extractable; }
43 // The half-open range for the expression to be extracted.
44 SourceRange getExtractionChars() const;
45 // Generate Replacement for replacing selected expression with given VarName
46 tooling::Replacement replaceWithVar(SourceRange Chars,
47 llvm::StringRef VarName) const;
48 // Generate Replacement for declaring the selected Expr as a new variable
49 tooling::Replacement insertDeclaration(llvm::StringRef VarName,
50 SourceRange InitChars) const;
51
52 private:
53 bool Extractable = false;
54 const clang::Expr *Expr;
55 const SelectionTree::Node *ExprNode;
56 // Stmt before which we will extract
57 const clang::Stmt *InsertionPoint = nullptr;
58 const SourceManager &SM;
59 const ASTContext &Ctx;
60 // Decls referenced in the Expr
61 std::vector<clang::Decl *> ReferencedDecls;
62 // returns true if the Expr doesn't reference any variable declared in scope
63 bool exprIsValidOutside(const clang::Stmt *Scope) const;
64 // computes the Stmt before which we will extract out Expr
65 const clang::Stmt *computeInsertionPoint() const;
66 };
67
68 // Returns all the Decls referenced inside the given Expr
69 static std::vector<clang::Decl *>
computeReferencedDecls(const clang::Expr * Expr)70 computeReferencedDecls(const clang::Expr *Expr) {
71 // RAV subclass to find all DeclRefs in a given Stmt
72 class FindDeclRefsVisitor
73 : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> {
74 public:
75 std::vector<Decl *> ReferencedDecls;
76 bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT
77 ReferencedDecls.push_back(DeclRef->getDecl());
78 return true;
79 }
80 };
81 FindDeclRefsVisitor Visitor;
82 Visitor.TraverseStmt(const_cast<Stmt *>(dyn_cast<Stmt>(Expr)));
83 return Visitor.ReferencedDecls;
84 }
85
ExtractionContext(const SelectionTree::Node * Node,const SourceManager & SM,const ASTContext & Ctx)86 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node,
87 const SourceManager &SM,
88 const ASTContext &Ctx)
89 : ExprNode(Node), SM(SM), Ctx(Ctx) {
90 Expr = Node->ASTNode.get<clang::Expr>();
91 ReferencedDecls = computeReferencedDecls(Expr);
92 InsertionPoint = computeInsertionPoint();
93 if (InsertionPoint)
94 Extractable = true;
95 }
96
97 // checks whether extracting before InsertionPoint will take a
98 // variable reference out of scope
exprIsValidOutside(const clang::Stmt * Scope) const99 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const {
100 SourceLocation ScopeBegin = Scope->getBeginLoc();
101 SourceLocation ScopeEnd = Scope->getEndLoc();
102 for (const Decl *ReferencedDecl : ReferencedDecls) {
103 if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) &&
104 SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd))
105 return false;
106 }
107 return true;
108 }
109
110 // Return the Stmt before which we need to insert the extraction.
111 // To find the Stmt, we go up the AST Tree and if the Parent of the current
112 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before
113 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a
114 // CompoundStmt
115 //
116 // FIXME: Extraction from label, switch and case statements
117 // FIXME: Doens't work for FoldExpr
118 // FIXME: Ensure extraction from loops doesn't change semantics.
computeInsertionPoint() const119 const clang::Stmt *ExtractionContext::computeInsertionPoint() const {
120 // returns true if we can extract before InsertionPoint
121 auto CanExtractOutside =
122 [](const SelectionTree::Node *InsertionPoint) -> bool {
123 if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) {
124 // Allow all expressions except LambdaExpr since we don't want to extract
125 // from the captures/default arguments of a lambda
126 if (isa<clang::Expr>(Stmt))
127 return !isa<LambdaExpr>(Stmt);
128 // We don't yet allow extraction from switch/case stmt as we would need to
129 // jump over the switch stmt even if there is a CompoundStmt inside the
130 // switch. And there are other Stmts which we don't care about (e.g.
131 // continue and break) as there can never be anything to extract from
132 // them.
133 return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) ||
134 isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) ||
135 isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) ||
136 isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt);
137 }
138 if (InsertionPoint->ASTNode.get<VarDecl>())
139 return true;
140 return false;
141 };
142 for (const SelectionTree::Node *CurNode = getExprNode();
143 CurNode->Parent && CanExtractOutside(CurNode);
144 CurNode = CurNode->Parent) {
145 const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>();
146 // give up if extraction will take a variable out of scope
147 if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint))
148 break;
149 if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) {
150 if (isa<CompoundStmt>(CurParent)) {
151 // Ensure we don't write inside a macro.
152 if (CurParent->getBeginLoc().isMacroID())
153 continue;
154 return CurInsertionPoint;
155 }
156 }
157 }
158 return nullptr;
159 }
160
161 // returns the replacement for substituting the extraction with VarName
162 tooling::Replacement
replaceWithVar(SourceRange Chars,llvm::StringRef VarName) const163 ExtractionContext::replaceWithVar(SourceRange Chars,
164 llvm::StringRef VarName) const {
165 unsigned ExtractionLength =
166 SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
167 return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
168 }
169 // returns the Replacement for declaring a new variable storing the extraction
170 tooling::Replacement
insertDeclaration(llvm::StringRef VarName,SourceRange InitializerChars) const171 ExtractionContext::insertDeclaration(llvm::StringRef VarName,
172 SourceRange InitializerChars) const {
173 llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
174 const SourceLocation InsertionLoc =
175 toHalfOpenFileRange(SM, Ctx.getLangOpts(),
176 InsertionPoint->getSourceRange())
177 ->getBegin();
178 // FIXME: Replace auto with explicit type and add &/&& as necessary
179 std::string ExtractedVarDecl = std::string("auto ") + VarName.str() + " = " +
180 ExtractionCode.str() + "; ";
181 return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
182 }
183
184 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
185 //
186 // These are special, because the formal AST doesn't match what users expect:
187 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
188 // - but extracting `b + c` is reasonable, as + is (mathematically) associative.
189 //
190 // So we try to support these cases with some restrictions:
191 // - the operator must be associative
192 // - no mixing of operators is allowed
193 // - we don't look inside macro expansions in the subexpressions
194 // - we only adjust the extracted range, so references in the unselected parts
195 // of the AST expression (e.g. `a`) are still considered referenced for
196 // the purposes of calculating the insertion point.
197 // FIXME: it would be nice to exclude these references, by micromanaging
198 // the computeReferencedDecls() calls around the binary operator tree.
199
200 // Information extracted about a binary operator encounted in a SelectionTree.
201 // It can represent either an overloaded or built-in operator.
202 struct ParsedBinaryOperator {
203 BinaryOperatorKind Kind;
204 SourceLocation ExprLoc;
205 llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands;
206
207 // If N is a binary operator, populate this and return true.
parseclang::clangd::__anon371e062d0111::ParsedBinaryOperator208 bool parse(const SelectionTree::Node &N) {
209 SelectedOperands.clear();
210
211 if (const BinaryOperator *Op =
212 llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
213 Kind = Op->getOpcode();
214 ExprLoc = Op->getExprLoc();
215 SelectedOperands = N.Children;
216 return true;
217 }
218 if (const CXXOperatorCallExpr *Op =
219 llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
220 N.ASTNode.get<Expr>())) {
221 if (!Op->isInfixBinaryOp())
222 return false;
223
224 Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
225 ExprLoc = Op->getExprLoc();
226 // Not all children are args, there's also the callee (operator).
227 for (const auto* Child : N.Children) {
228 const Expr *E = Child->ASTNode.get<Expr>();
229 assert(E && "callee and args should be Exprs!");
230 if (E == Op->getArg(0) || E == Op->getArg(1))
231 SelectedOperands.push_back(Child);
232 }
233 return true;
234 }
235 return false;
236 }
237
associativeclang::clangd::__anon371e062d0111::ParsedBinaryOperator238 bool associative() const {
239 // Must also be left-associative, or update getBinaryOperatorRange()!
240 switch (Kind) {
241 case BO_Add:
242 case BO_Mul:
243 case BO_And:
244 case BO_Or:
245 case BO_Xor:
246 case BO_LAnd:
247 case BO_LOr:
248 return true;
249 default:
250 return false;
251 }
252 }
253
crossesMacroBoundaryclang::clangd::__anon371e062d0111::ParsedBinaryOperator254 bool crossesMacroBoundary(const SourceManager &SM) {
255 FileID F = SM.getFileID(ExprLoc);
256 for (const SelectionTree::Node *Child : SelectedOperands)
257 if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F)
258 return true;
259 return false;
260 }
261 };
262
263 // If have an associative operator at the top level, then we must find
264 // the start point (rightmost in LHS) and end point (leftmost in RHS).
265 // We can only descend into subtrees where the operator matches.
266 //
267 // e.g. for a + [[b + c]] + d
268 // +
269 // / \
270 // N-> + d
271 // / \
272 // + c <- End
273 // / \
274 // a b <- Start
getBinaryOperatorRange(const SelectionTree::Node & N,const SourceManager & SM,const LangOptions & LangOpts)275 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N,
276 const SourceManager &SM,
277 const LangOptions &LangOpts) {
278 // If N is not a suitable binary operator, bail out.
279 ParsedBinaryOperator Op;
280 if (!Op.parse(N.ignoreImplicit()) || !Op.associative() ||
281 Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2)
282 return SourceRange();
283 BinaryOperatorKind OuterOp = Op.Kind;
284
285 // Because the tree we're interested in contains only one operator type, and
286 // all eligible operators are left-associative, the shape of the tree is
287 // very restricted: it's a linked list along the left edges.
288 // This simplifies our implementation.
289 const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS
290 const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS
291 // End is already correct: it can't be an OuterOp (as it's left-associative).
292 // Start needs to be pushed down int the subtree to the right spot.
293 while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp &&
294 !Op.crossesMacroBoundary(SM)) {
295 assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
296 if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected
297 Start = Op.SelectedOperands.back();
298 break;
299 }
300 // Op.LHS is (at least partially) selected, so descend into it.
301 Start = Op.SelectedOperands.front();
302 }
303
304 return SourceRange(
305 toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange())
306 ->getBegin(),
307 toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange())
308 ->getEnd());
309 }
310
getExtractionChars() const311 SourceRange ExtractionContext::getExtractionChars() const {
312 // Special case: we're extracting an associative binary subexpression.
313 SourceRange BinaryOperatorRange =
314 getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts());
315 if (BinaryOperatorRange.isValid())
316 return BinaryOperatorRange;
317
318 // Usual case: we're extracting the whole expression.
319 return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
320 }
321
322 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef
getCallExpr(const SelectionTree::Node * DeclRef)323 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) {
324 const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit();
325 const SelectionTree::Node *MaybeCall = MaybeCallee.Parent;
326 if (!MaybeCall)
327 return nullptr;
328 const CallExpr *CE =
329 llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>());
330 if (!CE)
331 return nullptr;
332 if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>())
333 return nullptr;
334 return MaybeCall;
335 }
336
337 // Returns true if Inner (which is a direct child of Outer) is appearing as
338 // a statement rather than an expression whose value can be used.
childExprIsStmt(const Stmt * Outer,const Expr * Inner)339 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) {
340 if (!Outer || !Inner)
341 return false;
342 // Exclude the most common places where an expr can appear but be unused.
343 if (llvm::isa<CompoundStmt>(Outer))
344 return true;
345 if (llvm::isa<SwitchCase>(Outer))
346 return true;
347 // Control flow statements use condition etc, but not the body.
348 if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer))
349 return Inner == WS->getBody();
350 if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer))
351 return Inner == DS->getBody();
352 if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer))
353 return Inner == FS->getBody();
354 if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer))
355 return Inner == FS->getBody();
356 if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer))
357 return Inner == IS->getThen() || Inner == IS->getElse();
358 // Assume all other cases may be actual expressions.
359 // This includes the important case of subexpressions (where Outer is Expr).
360 return false;
361 }
362
363 // check if N can and should be extracted (e.g. is not void-typed).
eligibleForExtraction(const SelectionTree::Node * N)364 bool eligibleForExtraction(const SelectionTree::Node *N) {
365 const Expr *E = N->ASTNode.get<Expr>();
366 if (!E)
367 return false;
368
369 // Void expressions can't be assigned to variables.
370 if (const Type *ExprType = E->getType().getTypePtrOrNull())
371 if (ExprType->isVoidType())
372 return false;
373
374 // A plain reference to a name (e.g. variable) isn't worth extracting.
375 // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`?
376 if (llvm::isa<DeclRefExpr>(E) || llvm::isa<MemberExpr>(E))
377 return false;
378
379 // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful.
380 // FIXME: we could still hoist the assignment, and leave the variable there?
381 ParsedBinaryOperator BinOp;
382 if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind))
383 return false;
384
385 // We don't want to extract expressions used as statements, that would leave
386 // a `dummy;` around that has no effect.
387 // Unfortunately because the AST doesn't have ExprStmt, we have to check in
388 // this roundabout way.
389 const SelectionTree::Node &OuterImplicit = N->outerImplicit();
390 if (!OuterImplicit.Parent ||
391 childExprIsStmt(OuterImplicit.Parent->ASTNode.get<Stmt>(),
392 OuterImplicit.ASTNode.get<Expr>()))
393 return false;
394
395 // FIXME: ban extracting the RHS of an assignment: `a = [[foo()]]`
396 return true;
397 }
398
399 // Find the Expr node that we're going to extract.
400 // We don't want to trigger for assignment expressions and variable/field
401 // DeclRefs. For function/member function, we want to extract the entire
402 // function call.
computeExtractedExpr(const SelectionTree::Node * N)403 const SelectionTree::Node *computeExtractedExpr(const SelectionTree::Node *N) {
404 if (!N)
405 return nullptr;
406 const SelectionTree::Node *TargetNode = N;
407 const clang::Expr *SelectedExpr = N->ASTNode.get<clang::Expr>();
408 if (!SelectedExpr)
409 return nullptr;
410 // For function and member function DeclRefs, extract the whole call.
411 if (llvm::isa<DeclRefExpr>(SelectedExpr) ||
412 llvm::isa<MemberExpr>(SelectedExpr))
413 if (const SelectionTree::Node *Call = getCallExpr(N))
414 TargetNode = Call;
415 // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful.
416 if (const BinaryOperator *BinOpExpr =
417 dyn_cast_or_null<BinaryOperator>(SelectedExpr)) {
418 if (BinOpExpr->getOpcode() == BinaryOperatorKind::BO_Assign)
419 return nullptr;
420 }
421 if (!TargetNode || !eligibleForExtraction(TargetNode))
422 return nullptr;
423 return TargetNode;
424 }
425
426 /// Extracts an expression to the variable dummy
427 /// Before:
428 /// int x = 5 + 4 * 3;
429 /// ^^^^^
430 /// After:
431 /// auto dummy = 5 + 4;
432 /// int x = dummy * 3;
433 class ExtractVariable : public Tweak {
434 public:
435 const char *id() const override final;
436 bool prepare(const Selection &Inputs) override;
437 Expected<Effect> apply(const Selection &Inputs) override;
title() const438 std::string title() const override {
439 return "Extract subexpression to variable";
440 }
intent() const441 Intent intent() const override { return Refactor; }
442
443 private:
444 // the expression to extract
445 std::unique_ptr<ExtractionContext> Target;
446 };
REGISTER_TWEAK(ExtractVariable)447 REGISTER_TWEAK(ExtractVariable)
448 bool ExtractVariable::prepare(const Selection &Inputs) {
449 // we don't trigger on empty selections for now
450 if (Inputs.SelectionBegin == Inputs.SelectionEnd)
451 return false;
452 const ASTContext &Ctx = Inputs.AST->getASTContext();
453 // FIXME: Enable non-C++ cases once we start spelling types explicitly instead
454 // of making use of auto.
455 if (!Ctx.getLangOpts().CPlusPlus)
456 return false;
457 const SourceManager &SM = Inputs.AST->getSourceManager();
458 if (const SelectionTree::Node *N =
459 computeExtractedExpr(Inputs.ASTSelection.commonAncestor()))
460 Target = std::make_unique<ExtractionContext>(N, SM, Ctx);
461 return Target && Target->isExtractable();
462 }
463
apply(const Selection & Inputs)464 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) {
465 tooling::Replacements Result;
466 // FIXME: get variable name from user or suggest based on type
467 std::string VarName = "dummy";
468 SourceRange Range = Target->getExtractionChars();
469 // insert new variable declaration
470 if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
471 return std::move(Err);
472 // replace expression with variable name
473 if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
474 return std::move(Err);
475 return Effect::mainFileEdit(Inputs.AST->getSourceManager(),
476 std::move(Result));
477 }
478
479 } // namespace
480 } // namespace clangd
481 } // namespace clang
482