15ffd83dbSDimitry Andric //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/Parsing.h"
105ffd83dbSDimitry Andric #include "clang/AST/Expr.h"
115ffd83dbSDimitry Andric #include "clang/ASTMatchers/ASTMatchFinder.h"
125ffd83dbSDimitry Andric #include "clang/Basic/CharInfo.h"
135ffd83dbSDimitry Andric #include "clang/Basic/SourceLocation.h"
145ffd83dbSDimitry Andric #include "clang/Lex/Lexer.h"
155ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/RangeSelector.h"
165ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/SourceCode.h"
175ffd83dbSDimitry Andric #include "llvm/ADT/StringMap.h"
185ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
195ffd83dbSDimitry Andric #include "llvm/Support/Errc.h"
205ffd83dbSDimitry Andric #include "llvm/Support/Error.h"
21*bdd1243dSDimitry Andric #include <optional>
225ffd83dbSDimitry Andric #include <string>
235ffd83dbSDimitry Andric #include <utility>
245ffd83dbSDimitry Andric #include <vector>
255ffd83dbSDimitry Andric 
265ffd83dbSDimitry Andric using namespace clang;
275ffd83dbSDimitry Andric using namespace transformer;
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric // FIXME: This implementation is entirely separate from that of the AST
305ffd83dbSDimitry Andric // matchers. Given the similarity of the languages and uses of the two parsers,
315ffd83dbSDimitry Andric // the two should share a common parsing infrastructure, as should other
325ffd83dbSDimitry Andric // Transformer types. We intend to unify this implementation soon to share as
335ffd83dbSDimitry Andric // much as possible with the AST Matchers parsing.
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric namespace {
365ffd83dbSDimitry Andric using llvm::Expected;
375ffd83dbSDimitry Andric 
385ffd83dbSDimitry Andric template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
395ffd83dbSDimitry Andric 
405ffd83dbSDimitry Andric struct ParseState {
415ffd83dbSDimitry Andric   // The remaining input to be processed.
425ffd83dbSDimitry Andric   StringRef Input;
435ffd83dbSDimitry Andric   // The original input. Not modified during parsing; only for reference in
445ffd83dbSDimitry Andric   // error reporting.
455ffd83dbSDimitry Andric   StringRef OriginalInput;
465ffd83dbSDimitry Andric };
475ffd83dbSDimitry Andric 
485ffd83dbSDimitry Andric // Represents an intermediate result returned by a parsing function. Functions
49*bdd1243dSDimitry Andric // that don't generate values should use `std::nullopt`
505ffd83dbSDimitry Andric template <typename ResultType> struct ParseProgress {
515ffd83dbSDimitry Andric   ParseState State;
525ffd83dbSDimitry Andric   // Intermediate result generated by the Parser.
535ffd83dbSDimitry Andric   ResultType Value;
545ffd83dbSDimitry Andric };
555ffd83dbSDimitry Andric 
565ffd83dbSDimitry Andric template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
575ffd83dbSDimitry Andric template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
585ffd83dbSDimitry Andric 
595ffd83dbSDimitry Andric class ParseError : public llvm::ErrorInfo<ParseError> {
605ffd83dbSDimitry Andric public:
615ffd83dbSDimitry Andric   // Required field for all ErrorInfo derivatives.
625ffd83dbSDimitry Andric   static char ID;
635ffd83dbSDimitry Andric 
ParseError(size_t Pos,std::string ErrorMsg,std::string InputExcerpt)645ffd83dbSDimitry Andric   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
655ffd83dbSDimitry Andric       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
665ffd83dbSDimitry Andric         Excerpt(std::move(InputExcerpt)) {}
675ffd83dbSDimitry Andric 
log(llvm::raw_ostream & OS) const685ffd83dbSDimitry Andric   void log(llvm::raw_ostream &OS) const override {
695ffd83dbSDimitry Andric     OS << "parse error at position (" << Pos << "): " << ErrorMsg
705ffd83dbSDimitry Andric        << ": " + Excerpt;
715ffd83dbSDimitry Andric   }
725ffd83dbSDimitry Andric 
convertToErrorCode() const735ffd83dbSDimitry Andric   std::error_code convertToErrorCode() const override {
745ffd83dbSDimitry Andric     return llvm::inconvertibleErrorCode();
755ffd83dbSDimitry Andric   }
765ffd83dbSDimitry Andric 
775ffd83dbSDimitry Andric   // Position of the error in the input string.
785ffd83dbSDimitry Andric   size_t Pos;
795ffd83dbSDimitry Andric   std::string ErrorMsg;
805ffd83dbSDimitry Andric   // Excerpt of the input starting at the error position.
815ffd83dbSDimitry Andric   std::string Excerpt;
825ffd83dbSDimitry Andric };
835ffd83dbSDimitry Andric 
845ffd83dbSDimitry Andric char ParseError::ID;
855ffd83dbSDimitry Andric } // namespace
865ffd83dbSDimitry Andric 
875ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors()885ffd83dbSDimitry Andric getUnaryStringSelectors() {
895ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
905ffd83dbSDimitry Andric       {"name", name},
915ffd83dbSDimitry Andric       {"node", node},
925ffd83dbSDimitry Andric       {"statement", statement},
935ffd83dbSDimitry Andric       {"statements", statements},
945ffd83dbSDimitry Andric       {"member", member},
955ffd83dbSDimitry Andric       {"callArgs", callArgs},
965ffd83dbSDimitry Andric       {"elseBranch", elseBranch},
975ffd83dbSDimitry Andric       {"initListElements", initListElements}};
985ffd83dbSDimitry Andric   return M;
995ffd83dbSDimitry Andric }
1005ffd83dbSDimitry Andric 
1015ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors()1025ffd83dbSDimitry Andric getUnaryRangeSelectors() {
1035ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
1045ffd83dbSDimitry Andric       {"before", before}, {"after", after}, {"expansion", expansion}};
1055ffd83dbSDimitry Andric   return M;
1065ffd83dbSDimitry Andric }
1075ffd83dbSDimitry Andric 
1085ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors()1095ffd83dbSDimitry Andric getBinaryStringSelectors() {
1105ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
111e8d8bef9SDimitry Andric       {"encloseNodes", encloseNodes}};
1125ffd83dbSDimitry Andric   return M;
1135ffd83dbSDimitry Andric }
1145ffd83dbSDimitry Andric 
1155ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors()1165ffd83dbSDimitry Andric getBinaryRangeSelectors() {
1175ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
118e8d8bef9SDimitry Andric       M = {{"enclose", enclose}, {"between", between}};
1195ffd83dbSDimitry Andric   return M;
1205ffd83dbSDimitry Andric }
1215ffd83dbSDimitry Andric 
1225ffd83dbSDimitry Andric template <typename Element>
findOptional(const llvm::StringMap<Element> & Map,llvm::StringRef Key)123*bdd1243dSDimitry Andric std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
1245ffd83dbSDimitry Andric                                     llvm::StringRef Key) {
1255ffd83dbSDimitry Andric   auto it = Map.find(Key);
1265ffd83dbSDimitry Andric   if (it == Map.end())
127*bdd1243dSDimitry Andric     return std::nullopt;
1285ffd83dbSDimitry Andric   return it->second;
1295ffd83dbSDimitry Andric }
1305ffd83dbSDimitry Andric 
1315ffd83dbSDimitry Andric template <typename ResultType>
makeParseProgress(ParseState State,ResultType Result)1325ffd83dbSDimitry Andric ParseProgress<ResultType> makeParseProgress(ParseState State,
1335ffd83dbSDimitry Andric                                             ResultType Result) {
1345ffd83dbSDimitry Andric   return ParseProgress<ResultType>{State, std::move(Result)};
1355ffd83dbSDimitry Andric }
1365ffd83dbSDimitry Andric 
makeParseError(const ParseState & S,std::string ErrorMsg)1375ffd83dbSDimitry Andric static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
1385ffd83dbSDimitry Andric   size_t Pos = S.OriginalInput.size() - S.Input.size();
1395ffd83dbSDimitry Andric   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
1405ffd83dbSDimitry Andric                                       S.OriginalInput.substr(Pos, 20).str());
1415ffd83dbSDimitry Andric }
1425ffd83dbSDimitry Andric 
1435ffd83dbSDimitry Andric // Returns a new ParseState that advances \c S by \c N characters.
advance(ParseState S,size_t N)1445ffd83dbSDimitry Andric static ParseState advance(ParseState S, size_t N) {
1455ffd83dbSDimitry Andric   S.Input = S.Input.drop_front(N);
1465ffd83dbSDimitry Andric   return S;
1475ffd83dbSDimitry Andric }
1485ffd83dbSDimitry Andric 
consumeWhitespace(StringRef S)1495ffd83dbSDimitry Andric static StringRef consumeWhitespace(StringRef S) {
150e8d8bef9SDimitry Andric   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
1515ffd83dbSDimitry Andric }
1525ffd83dbSDimitry Andric 
1535ffd83dbSDimitry Andric // Parses a single expected character \c c from \c State, skipping preceding
1545ffd83dbSDimitry Andric // whitespace.  Error if the expected character isn't found.
parseChar(char c,ParseState State)155*bdd1243dSDimitry Andric static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
1565ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
1575ffd83dbSDimitry Andric   if (State.Input.empty() || State.Input.front() != c)
1585ffd83dbSDimitry Andric     return makeParseError(State,
1595ffd83dbSDimitry Andric                           ("expected char not found: " + llvm::Twine(c)).str());
160*bdd1243dSDimitry Andric   return makeParseProgress(advance(State, 1), std::nullopt);
1615ffd83dbSDimitry Andric }
1625ffd83dbSDimitry Andric 
1635ffd83dbSDimitry Andric // Parses an identitifer "token" -- handles preceding whitespace.
parseId(ParseState State)1645ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseId(ParseState State) {
1655ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
1665ffd83dbSDimitry Andric   auto Id = State.Input.take_while(
167349cc55cSDimitry Andric       [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
1685ffd83dbSDimitry Andric   if (Id.empty())
1695ffd83dbSDimitry Andric     return makeParseError(State, "failed to parse name");
1705ffd83dbSDimitry Andric   return makeParseProgress(advance(State, Id.size()), Id.str());
1715ffd83dbSDimitry Andric }
1725ffd83dbSDimitry Andric 
1735ffd83dbSDimitry Andric // For consistency with the AST matcher parser and C++ code, node ids are
1745ffd83dbSDimitry Andric // written as strings. However, we do not support escaping in the string.
parseStringId(ParseState State)1755ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseStringId(ParseState State) {
1765ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
1775ffd83dbSDimitry Andric   if (State.Input.empty())
1785ffd83dbSDimitry Andric     return makeParseError(State, "unexpected end of input");
1795ffd83dbSDimitry Andric   if (!State.Input.consume_front("\""))
1805ffd83dbSDimitry Andric     return makeParseError(
1815ffd83dbSDimitry Andric         State,
1825ffd83dbSDimitry Andric         "expecting string, but encountered other character or end of input");
1835ffd83dbSDimitry Andric 
1845ffd83dbSDimitry Andric   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
1855ffd83dbSDimitry Andric   if (State.Input.size() == Id.size())
1865ffd83dbSDimitry Andric     return makeParseError(State, "unterminated string");
1875ffd83dbSDimitry Andric   // Advance past the trailing quote as well.
1885ffd83dbSDimitry Andric   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
1895ffd83dbSDimitry Andric }
1905ffd83dbSDimitry Andric 
1915ffd83dbSDimitry Andric // Parses a single element surrounded by parens. `Op` is applied to the parsed
1925ffd83dbSDimitry Andric // result to create the result of this function call.
1935ffd83dbSDimitry Andric template <typename T>
parseSingle(ParseFunction<T> ParseElement,RangeSelectorOp<T> Op,ParseState State)1945ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
1955ffd83dbSDimitry Andric                                             RangeSelectorOp<T> Op,
1965ffd83dbSDimitry Andric                                             ParseState State) {
1975ffd83dbSDimitry Andric   auto P = parseChar('(', State);
1985ffd83dbSDimitry Andric   if (!P)
1995ffd83dbSDimitry Andric     return P.takeError();
2005ffd83dbSDimitry Andric 
2015ffd83dbSDimitry Andric   auto E = ParseElement(P->State);
2025ffd83dbSDimitry Andric   if (!E)
2035ffd83dbSDimitry Andric     return E.takeError();
2045ffd83dbSDimitry Andric 
2055ffd83dbSDimitry Andric   P = parseChar(')', E->State);
2065ffd83dbSDimitry Andric   if (!P)
2075ffd83dbSDimitry Andric     return P.takeError();
2085ffd83dbSDimitry Andric 
2095ffd83dbSDimitry Andric   return makeParseProgress(P->State, Op(std::move(E->Value)));
2105ffd83dbSDimitry Andric }
2115ffd83dbSDimitry Andric 
2125ffd83dbSDimitry Andric // Parses a pair of elements surrounded by parens and separated by comma. `Op`
2135ffd83dbSDimitry Andric // is applied to the parsed results to create the result of this function call.
2145ffd83dbSDimitry Andric template <typename T>
parsePair(ParseFunction<T> ParseElement,RangeSelectorOp<T,T> Op,ParseState State)2155ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
2165ffd83dbSDimitry Andric                                           RangeSelectorOp<T, T> Op,
2175ffd83dbSDimitry Andric                                           ParseState State) {
2185ffd83dbSDimitry Andric   auto P = parseChar('(', State);
2195ffd83dbSDimitry Andric   if (!P)
2205ffd83dbSDimitry Andric     return P.takeError();
2215ffd83dbSDimitry Andric 
2225ffd83dbSDimitry Andric   auto Left = ParseElement(P->State);
2235ffd83dbSDimitry Andric   if (!Left)
2245ffd83dbSDimitry Andric     return Left.takeError();
2255ffd83dbSDimitry Andric 
2265ffd83dbSDimitry Andric   P = parseChar(',', Left->State);
2275ffd83dbSDimitry Andric   if (!P)
2285ffd83dbSDimitry Andric     return P.takeError();
2295ffd83dbSDimitry Andric 
2305ffd83dbSDimitry Andric   auto Right = ParseElement(P->State);
2315ffd83dbSDimitry Andric   if (!Right)
2325ffd83dbSDimitry Andric     return Right.takeError();
2335ffd83dbSDimitry Andric 
2345ffd83dbSDimitry Andric   P = parseChar(')', Right->State);
2355ffd83dbSDimitry Andric   if (!P)
2365ffd83dbSDimitry Andric     return P.takeError();
2375ffd83dbSDimitry Andric 
2385ffd83dbSDimitry Andric   return makeParseProgress(P->State,
2395ffd83dbSDimitry Andric                            Op(std::move(Left->Value), std::move(Right->Value)));
2405ffd83dbSDimitry Andric }
2415ffd83dbSDimitry Andric 
2425ffd83dbSDimitry Andric // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
2435ffd83dbSDimitry Andric // Id operator). Returns StencilType representing the operator on success and
2445ffd83dbSDimitry Andric // error if it fails to parse input for an operator.
2455ffd83dbSDimitry Andric static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State)2465ffd83dbSDimitry Andric parseRangeSelectorImpl(ParseState State) {
2475ffd83dbSDimitry Andric   auto Id = parseId(State);
2485ffd83dbSDimitry Andric   if (!Id)
2495ffd83dbSDimitry Andric     return Id.takeError();
2505ffd83dbSDimitry Andric 
2515ffd83dbSDimitry Andric   std::string OpName = std::move(Id->Value);
2525ffd83dbSDimitry Andric   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
2535ffd83dbSDimitry Andric     return parseSingle(parseStringId, *Op, Id->State);
2545ffd83dbSDimitry Andric 
2555ffd83dbSDimitry Andric   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
2565ffd83dbSDimitry Andric     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
2575ffd83dbSDimitry Andric 
2585ffd83dbSDimitry Andric   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
2595ffd83dbSDimitry Andric     return parsePair(parseStringId, *Op, Id->State);
2605ffd83dbSDimitry Andric 
2615ffd83dbSDimitry Andric   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
2625ffd83dbSDimitry Andric     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
2635ffd83dbSDimitry Andric 
2645ffd83dbSDimitry Andric   return makeParseError(State, "unknown selector name: " + OpName);
2655ffd83dbSDimitry Andric }
2665ffd83dbSDimitry Andric 
parseRangeSelector(llvm::StringRef Input)2675ffd83dbSDimitry Andric Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
2685ffd83dbSDimitry Andric   ParseState State = {Input, Input};
2695ffd83dbSDimitry Andric   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
2705ffd83dbSDimitry Andric   if (!Result)
2715ffd83dbSDimitry Andric     return Result.takeError();
2725ffd83dbSDimitry Andric   State = Result->State;
2735ffd83dbSDimitry Andric   // Discard any potentially trailing whitespace.
2745ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
2755ffd83dbSDimitry Andric   if (State.Input.empty())
2765ffd83dbSDimitry Andric     return Result->Value;
2775ffd83dbSDimitry Andric   return makeParseError(State, "unexpected input after selector");
2785ffd83dbSDimitry Andric }
279