1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Tooling/Transformer/Parsing.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/CharInfo.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/RangeSelector.h"
16 #include "clang/Tooling/Transformer/SourceCode.h"
17 #include "llvm/ADT/None.h"
18 #include "llvm/ADT/StringMap.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/Errc.h"
21 #include "llvm/Support/Error.h"
22 #include <string>
23 #include <utility>
24 #include <vector>
25
26 using namespace clang;
27 using namespace transformer;
28
29 // FIXME: This implementation is entirely separate from that of the AST
30 // matchers. Given the similarity of the languages and uses of the two parsers,
31 // the two should share a common parsing infrastructure, as should other
32 // Transformer types. We intend to unify this implementation soon to share as
33 // much as possible with the AST Matchers parsing.
34
35 namespace {
36 using llvm::Error;
37 using llvm::Expected;
38
39 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
40
41 struct ParseState {
42 // The remaining input to be processed.
43 StringRef Input;
44 // The original input. Not modified during parsing; only for reference in
45 // error reporting.
46 StringRef OriginalInput;
47 };
48
49 // Represents an intermediate result returned by a parsing function. Functions
50 // that don't generate values should use `llvm::None`
51 template <typename ResultType> struct ParseProgress {
52 ParseState State;
53 // Intermediate result generated by the Parser.
54 ResultType Value;
55 };
56
57 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
58 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
59
60 class ParseError : public llvm::ErrorInfo<ParseError> {
61 public:
62 // Required field for all ErrorInfo derivatives.
63 static char ID;
64
ParseError(size_t Pos,std::string ErrorMsg,std::string InputExcerpt)65 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
66 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
67 Excerpt(std::move(InputExcerpt)) {}
68
log(llvm::raw_ostream & OS) const69 void log(llvm::raw_ostream &OS) const override {
70 OS << "parse error at position (" << Pos << "): " << ErrorMsg
71 << ": " + Excerpt;
72 }
73
convertToErrorCode() const74 std::error_code convertToErrorCode() const override {
75 return llvm::inconvertibleErrorCode();
76 }
77
78 // Position of the error in the input string.
79 size_t Pos;
80 std::string ErrorMsg;
81 // Excerpt of the input starting at the error position.
82 std::string Excerpt;
83 };
84
85 char ParseError::ID;
86 } // namespace
87
88 static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors()89 getUnaryStringSelectors() {
90 static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
91 {"name", name},
92 {"node", node},
93 {"statement", statement},
94 {"statements", statements},
95 {"member", member},
96 {"callArgs", callArgs},
97 {"elseBranch", elseBranch},
98 {"initListElements", initListElements}};
99 return M;
100 }
101
102 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors()103 getUnaryRangeSelectors() {
104 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
105 {"before", before}, {"after", after}, {"expansion", expansion}};
106 return M;
107 }
108
109 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors()110 getBinaryStringSelectors() {
111 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
112 {"encloseNodes", encloseNodes}};
113 return M;
114 }
115
116 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors()117 getBinaryRangeSelectors() {
118 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
119 M = {{"enclose", enclose}, {"between", between}};
120 return M;
121 }
122
123 template <typename Element>
findOptional(const llvm::StringMap<Element> & Map,llvm::StringRef Key)124 llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
125 llvm::StringRef Key) {
126 auto it = Map.find(Key);
127 if (it == Map.end())
128 return llvm::None;
129 return it->second;
130 }
131
132 template <typename ResultType>
makeParseProgress(ParseState State,ResultType Result)133 ParseProgress<ResultType> makeParseProgress(ParseState State,
134 ResultType Result) {
135 return ParseProgress<ResultType>{State, std::move(Result)};
136 }
137
makeParseError(const ParseState & S,std::string ErrorMsg)138 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
139 size_t Pos = S.OriginalInput.size() - S.Input.size();
140 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
141 S.OriginalInput.substr(Pos, 20).str());
142 }
143
144 // Returns a new ParseState that advances \c S by \c N characters.
advance(ParseState S,size_t N)145 static ParseState advance(ParseState S, size_t N) {
146 S.Input = S.Input.drop_front(N);
147 return S;
148 }
149
consumeWhitespace(StringRef S)150 static StringRef consumeWhitespace(StringRef S) {
151 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
152 }
153
154 // Parses a single expected character \c c from \c State, skipping preceding
155 // whitespace. Error if the expected character isn't found.
parseChar(char c,ParseState State)156 static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
157 State.Input = consumeWhitespace(State.Input);
158 if (State.Input.empty() || State.Input.front() != c)
159 return makeParseError(State,
160 ("expected char not found: " + llvm::Twine(c)).str());
161 return makeParseProgress(advance(State, 1), llvm::None);
162 }
163
164 // Parses an identitifer "token" -- handles preceding whitespace.
parseId(ParseState State)165 static ExpectedProgress<std::string> parseId(ParseState State) {
166 State.Input = consumeWhitespace(State.Input);
167 auto Id = State.Input.take_while(
168 [](char c) { return isASCII(c) && isIdentifierBody(c); });
169 if (Id.empty())
170 return makeParseError(State, "failed to parse name");
171 return makeParseProgress(advance(State, Id.size()), Id.str());
172 }
173
174 // For consistency with the AST matcher parser and C++ code, node ids are
175 // written as strings. However, we do not support escaping in the string.
parseStringId(ParseState State)176 static ExpectedProgress<std::string> parseStringId(ParseState State) {
177 State.Input = consumeWhitespace(State.Input);
178 if (State.Input.empty())
179 return makeParseError(State, "unexpected end of input");
180 if (!State.Input.consume_front("\""))
181 return makeParseError(
182 State,
183 "expecting string, but encountered other character or end of input");
184
185 StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
186 if (State.Input.size() == Id.size())
187 return makeParseError(State, "unterminated string");
188 // Advance past the trailing quote as well.
189 return makeParseProgress(advance(State, Id.size() + 1), Id.str());
190 }
191
192 // Parses a single element surrounded by parens. `Op` is applied to the parsed
193 // result to create the result of this function call.
194 template <typename T>
parseSingle(ParseFunction<T> ParseElement,RangeSelectorOp<T> Op,ParseState State)195 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
196 RangeSelectorOp<T> Op,
197 ParseState State) {
198 auto P = parseChar('(', State);
199 if (!P)
200 return P.takeError();
201
202 auto E = ParseElement(P->State);
203 if (!E)
204 return E.takeError();
205
206 P = parseChar(')', E->State);
207 if (!P)
208 return P.takeError();
209
210 return makeParseProgress(P->State, Op(std::move(E->Value)));
211 }
212
213 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
214 // is applied to the parsed results to create the result of this function call.
215 template <typename T>
parsePair(ParseFunction<T> ParseElement,RangeSelectorOp<T,T> Op,ParseState State)216 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
217 RangeSelectorOp<T, T> Op,
218 ParseState State) {
219 auto P = parseChar('(', State);
220 if (!P)
221 return P.takeError();
222
223 auto Left = ParseElement(P->State);
224 if (!Left)
225 return Left.takeError();
226
227 P = parseChar(',', Left->State);
228 if (!P)
229 return P.takeError();
230
231 auto Right = ParseElement(P->State);
232 if (!Right)
233 return Right.takeError();
234
235 P = parseChar(')', Right->State);
236 if (!P)
237 return P.takeError();
238
239 return makeParseProgress(P->State,
240 Op(std::move(Left->Value), std::move(Right->Value)));
241 }
242
243 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
244 // Id operator). Returns StencilType representing the operator on success and
245 // error if it fails to parse input for an operator.
246 static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State)247 parseRangeSelectorImpl(ParseState State) {
248 auto Id = parseId(State);
249 if (!Id)
250 return Id.takeError();
251
252 std::string OpName = std::move(Id->Value);
253 if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
254 return parseSingle(parseStringId, *Op, Id->State);
255
256 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
257 return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
258
259 if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
260 return parsePair(parseStringId, *Op, Id->State);
261
262 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
263 return parsePair(parseRangeSelectorImpl, *Op, Id->State);
264
265 return makeParseError(State, "unknown selector name: " + OpName);
266 }
267
parseRangeSelector(llvm::StringRef Input)268 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
269 ParseState State = {Input, Input};
270 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
271 if (!Result)
272 return Result.takeError();
273 State = Result->State;
274 // Discard any potentially trailing whitespace.
275 State.Input = consumeWhitespace(State.Input);
276 if (State.Input.empty())
277 return Result->Value;
278 return makeParseError(State, "unexpected input after selector");
279 }
280