1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Transformer/RangeSelector.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/SourceLocation.h"
13 #include "clang/Lex/Lexer.h"
14 #include "clang/Tooling/Transformer/SourceCode.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Support/Errc.h"
17 #include "llvm/Support/Error.h"
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 using namespace clang;
23 using namespace transformer;
24 
25 using ast_matchers::MatchFinder;
26 using llvm::Error;
27 using llvm::StringError;
28 
29 using MatchResult = MatchFinder::MatchResult;
30 
31 static Error invalidArgumentError(Twine Message) {
32   return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
33 }
34 
35 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
36   return invalidArgumentError("mismatched type (node id=" + ID +
37                               " kind=" + Kind.asStringRef() + ")");
38 }
39 
40 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
41                        Twine ExpectedType) {
42   return invalidArgumentError("mismatched type: expected one of " +
43                               ExpectedType + " (node id=" + ID +
44                               " kind=" + Kind.asStringRef() + ")");
45 }
46 
47 static Error missingPropertyError(StringRef ID, Twine Description,
48                                   StringRef Property) {
49   return invalidArgumentError(Description + " requires property '" + Property +
50                               "' (node id=" + ID + ")");
51 }
52 
53 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
54                                       StringRef ID) {
55   auto &NodesMap = Nodes.getMap();
56   auto It = NodesMap.find(ID);
57   if (It == NodesMap.end())
58     return invalidArgumentError("ID not bound: " + ID);
59   return It->second;
60 }
61 
62 // FIXME: handling of macros should be configurable.
63 static SourceLocation findPreviousTokenStart(SourceLocation Start,
64                                              const SourceManager &SM,
65                                              const LangOptions &LangOpts) {
66   if (Start.isInvalid() || Start.isMacroID())
67     return SourceLocation();
68 
69   SourceLocation BeforeStart = Start.getLocWithOffset(-1);
70   if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
71     return SourceLocation();
72 
73   return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
74 }
75 
76 // Finds the start location of the previous token of kind \p TK.
77 // FIXME: handling of macros should be configurable.
78 static SourceLocation findPreviousTokenKind(SourceLocation Start,
79                                             const SourceManager &SM,
80                                             const LangOptions &LangOpts,
81                                             tok::TokenKind TK) {
82   while (true) {
83     SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
84     if (L.isInvalid() || L.isMacroID())
85       return SourceLocation();
86 
87     Token T;
88     if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
89       return SourceLocation();
90 
91     if (T.is(TK))
92       return T.getLocation();
93 
94     Start = L;
95   }
96 }
97 
98 static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
99                                     const LangOptions &LangOpts) {
100   SourceLocation EndLoc =
101       E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
102   return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
103 }
104 
105 RangeSelector transformer::before(RangeSelector Selector) {
106   return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
107     Expected<CharSourceRange> SelectedRange = Selector(Result);
108     if (!SelectedRange)
109       return SelectedRange.takeError();
110     return CharSourceRange::getCharRange(SelectedRange->getBegin());
111   };
112 }
113 
114 RangeSelector transformer::after(RangeSelector Selector) {
115   return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
116     Expected<CharSourceRange> SelectedRange = Selector(Result);
117     if (!SelectedRange)
118       return SelectedRange.takeError();
119     SourceLocation End = SelectedRange->getEnd();
120     if (SelectedRange->isTokenRange()) {
121       // We need to find the actual (exclusive) end location from which to
122       // create a new source range. However, that's not guaranteed to be valid,
123       // even if the token location itself is valid. So, we create a token range
124       // consisting only of the last token, then map that range back to the
125       // source file. If that succeeds, we have a valid location for the end of
126       // the generated range.
127       CharSourceRange Range = Lexer::makeFileCharRange(
128           CharSourceRange::getTokenRange(SelectedRange->getEnd()),
129           *Result.SourceManager, Result.Context->getLangOpts());
130       if (Range.isInvalid())
131         return invalidArgumentError(
132             "after: can't resolve sub-range to valid source range");
133       End = Range.getEnd();
134     }
135 
136     return CharSourceRange::getCharRange(End);
137   };
138 }
139 
140 RangeSelector transformer::node(std::string ID) {
141   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
142     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
143     if (!Node)
144       return Node.takeError();
145     return (Node->get<Decl>() != nullptr ||
146             (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
147                ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
148                                            *Result.Context)
149                : CharSourceRange::getTokenRange(Node->getSourceRange());
150   };
151 }
152 
153 RangeSelector transformer::statement(std::string ID) {
154   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
155     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
156     if (!Node)
157       return Node.takeError();
158     return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
159                                      *Result.Context);
160   };
161 }
162 
163 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
164   return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
165     Expected<CharSourceRange> BeginRange = Begin(Result);
166     if (!BeginRange)
167       return BeginRange.takeError();
168     Expected<CharSourceRange> EndRange = End(Result);
169     if (!EndRange)
170       return EndRange.takeError();
171     SourceLocation B = BeginRange->getBegin();
172     SourceLocation E = EndRange->getEnd();
173     // Note: we are precluding the possibility of sub-token ranges in the case
174     // that EndRange is a token range.
175     if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
176       return invalidArgumentError("Bad range: out of order");
177     }
178     return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
179   };
180 }
181 
182 RangeSelector transformer::encloseNodes(std::string BeginID,
183                                         std::string EndID) {
184   return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
185 }
186 
187 RangeSelector transformer::member(std::string ID) {
188   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
189     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
190     if (!Node)
191       return Node.takeError();
192     if (auto *M = Node->get<clang::MemberExpr>())
193       return CharSourceRange::getTokenRange(
194           M->getMemberNameInfo().getSourceRange());
195     return typeError(ID, Node->getNodeKind(), "MemberExpr");
196   };
197 }
198 
199 RangeSelector transformer::name(std::string ID) {
200   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
201     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
202     if (!N)
203       return N.takeError();
204     auto &Node = *N;
205     if (const auto *D = Node.get<NamedDecl>()) {
206       if (!D->getDeclName().isIdentifier())
207         return missingPropertyError(ID, "name", "identifier");
208       SourceLocation L = D->getLocation();
209       auto R = CharSourceRange::getTokenRange(L, L);
210       // Verify that the range covers exactly the name.
211       // FIXME: extend this code to support cases like `operator +` or
212       // `foo<int>` for which this range will be too short.  Doing so will
213       // require subcasing `NamedDecl`, because it doesn't provide virtual
214       // access to the \c DeclarationNameInfo.
215       if (tooling::getText(R, *Result.Context) != D->getName())
216         return CharSourceRange();
217       return R;
218     }
219     if (const auto *E = Node.get<DeclRefExpr>()) {
220       if (!E->getNameInfo().getName().isIdentifier())
221         return missingPropertyError(ID, "name", "identifier");
222       SourceLocation L = E->getLocation();
223       return CharSourceRange::getTokenRange(L, L);
224     }
225     if (const auto *I = Node.get<CXXCtorInitializer>()) {
226       if (!I->isMemberInitializer() && I->isWritten())
227         return missingPropertyError(ID, "name", "explicit member initializer");
228       SourceLocation L = I->getMemberLocation();
229       return CharSourceRange::getTokenRange(L, L);
230     }
231     return typeError(ID, Node.getNodeKind(),
232                      "DeclRefExpr, NamedDecl, CXXCtorInitializer");
233   };
234 }
235 
236 namespace {
237 // FIXME: make this available in the public API for users to easily create their
238 // own selectors.
239 
240 // Creates a selector from a range-selection function \p Func, which selects a
241 // range that is relative to a bound node id.  \c T is the node type expected by
242 // \p Func.
243 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
244 class RelativeSelector {
245   std::string ID;
246 
247 public:
248   RelativeSelector(std::string ID) : ID(std::move(ID)) {}
249 
250   Expected<CharSourceRange> operator()(const MatchResult &Result) {
251     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
252     if (!N)
253       return N.takeError();
254     if (const auto *Arg = N->get<T>())
255       return Func(Result, *Arg);
256     return typeError(ID, N->getNodeKind());
257   }
258 };
259 } // namespace
260 
261 // FIXME: Change the following functions from being in an anonymous namespace
262 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
263 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
264 // namespace works around a bug in earlier versions.
265 namespace {
266 // Returns the range of the statements (all source between the braces).
267 CharSourceRange getStatementsRange(const MatchResult &,
268                                    const CompoundStmt &CS) {
269   return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
270                                        CS.getRBracLoc());
271 }
272 } // namespace
273 
274 RangeSelector transformer::statements(std::string ID) {
275   return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
276 }
277 
278 namespace {
279 // Returns the range of the source between the call's parentheses.
280 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
281                                       const CallExpr &CE) {
282   return CharSourceRange::getCharRange(
283       findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
284           .getLocWithOffset(1),
285       CE.getRParenLoc());
286 }
287 } // namespace
288 
289 RangeSelector transformer::callArgs(std::string ID) {
290   return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
291 }
292 
293 namespace {
294 // Returns the range of the elements of the initializer list. Includes all
295 // source between the braces.
296 CharSourceRange getElementsRange(const MatchResult &,
297                                  const InitListExpr &E) {
298   return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
299                                        E.getRBraceLoc());
300 }
301 } // namespace
302 
303 RangeSelector transformer::initListElements(std::string ID) {
304   return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
305 }
306 
307 namespace {
308 // Returns the range of the else branch, including the `else` keyword.
309 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
310   return tooling::maybeExtendRange(
311       CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
312       tok::TokenKind::semi, *Result.Context);
313 }
314 } // namespace
315 
316 RangeSelector transformer::elseBranch(std::string ID) {
317   return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
318 }
319 
320 RangeSelector transformer::expansion(RangeSelector S) {
321   return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
322     Expected<CharSourceRange> SRange = S(Result);
323     if (!SRange)
324       return SRange.takeError();
325     return Result.SourceManager->getExpansionRange(*SRange);
326   };
327 }
328