1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Recursive parser implementation for the matcher expression grammar.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/ASTMatchers/Dynamic/Parser.h"
15 #include "clang/ASTMatchers/ASTMatchersInternal.h"
16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17 #include "clang/ASTMatchers/Dynamic/Registry.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <cerrno>
25 #include <cstddef>
26 #include <cstdlib>
27 #include <optional>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 namespace clang {
33 namespace ast_matchers {
34 namespace dynamic {
35 
36 /// Simple structure to hold information for one token from the parser.
37 struct Parser::TokenInfo {
38   /// Different possible tokens.
39   enum TokenKind {
40     TK_Eof,
41     TK_NewLine,
42     TK_OpenParen,
43     TK_CloseParen,
44     TK_Comma,
45     TK_Period,
46     TK_Literal,
47     TK_Ident,
48     TK_InvalidChar,
49     TK_Error,
50     TK_CodeCompletion
51   };
52 
53   /// Some known identifiers.
54   static const char* const ID_Bind;
55   static const char *const ID_With;
56 
57   TokenInfo() = default;
58 
59   StringRef Text;
60   TokenKind Kind = TK_Eof;
61   SourceRange Range;
62   VariantValue Value;
63 };
64 
65 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 const char *const Parser::TokenInfo::ID_With = "with";
67 
68 /// Simple tokenizer for the parser.
69 class Parser::CodeTokenizer {
70 public:
CodeTokenizer(StringRef & MatcherCode,Diagnostics * Error)71   explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
72       : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
73     NextToken = getNextToken();
74   }
75 
CodeTokenizer(StringRef & MatcherCode,Diagnostics * Error,unsigned CodeCompletionOffset)76   CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
77                 unsigned CodeCompletionOffset)
78       : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
79         CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
80     NextToken = getNextToken();
81   }
82 
83   /// Returns but doesn't consume the next token.
peekNextToken() const84   const TokenInfo &peekNextToken() const { return NextToken; }
85 
86   /// Consumes and returns the next token.
consumeNextToken()87   TokenInfo consumeNextToken() {
88     TokenInfo ThisToken = NextToken;
89     NextToken = getNextToken();
90     return ThisToken;
91   }
92 
SkipNewlines()93   TokenInfo SkipNewlines() {
94     while (NextToken.Kind == TokenInfo::TK_NewLine)
95       NextToken = getNextToken();
96     return NextToken;
97   }
98 
consumeNextTokenIgnoreNewlines()99   TokenInfo consumeNextTokenIgnoreNewlines() {
100     SkipNewlines();
101     if (NextToken.Kind == TokenInfo::TK_Eof)
102       return NextToken;
103     return consumeNextToken();
104   }
105 
nextTokenKind() const106   TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
107 
108 private:
getNextToken()109   TokenInfo getNextToken() {
110     consumeWhitespace();
111     TokenInfo Result;
112     Result.Range.Start = currentLocation();
113 
114     if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
115       Result.Kind = TokenInfo::TK_CodeCompletion;
116       Result.Text = StringRef(CodeCompletionLocation, 0);
117       CodeCompletionLocation = nullptr;
118       return Result;
119     }
120 
121     if (Code.empty()) {
122       Result.Kind = TokenInfo::TK_Eof;
123       Result.Text = "";
124       return Result;
125     }
126 
127     switch (Code[0]) {
128     case '#':
129       Code = Code.drop_until([](char c) { return c == '\n'; });
130       return getNextToken();
131     case ',':
132       Result.Kind = TokenInfo::TK_Comma;
133       Result.Text = Code.substr(0, 1);
134       Code = Code.drop_front();
135       break;
136     case '.':
137       Result.Kind = TokenInfo::TK_Period;
138       Result.Text = Code.substr(0, 1);
139       Code = Code.drop_front();
140       break;
141     case '\n':
142       ++Line;
143       StartOfLine = Code.drop_front();
144       Result.Kind = TokenInfo::TK_NewLine;
145       Result.Text = Code.substr(0, 1);
146       Code = Code.drop_front();
147       break;
148     case '(':
149       Result.Kind = TokenInfo::TK_OpenParen;
150       Result.Text = Code.substr(0, 1);
151       Code = Code.drop_front();
152       break;
153     case ')':
154       Result.Kind = TokenInfo::TK_CloseParen;
155       Result.Text = Code.substr(0, 1);
156       Code = Code.drop_front();
157       break;
158 
159     case '"':
160     case '\'':
161       // Parse a string literal.
162       consumeStringLiteral(&Result);
163       break;
164 
165     case '0': case '1': case '2': case '3': case '4':
166     case '5': case '6': case '7': case '8': case '9':
167       // Parse an unsigned and float literal.
168       consumeNumberLiteral(&Result);
169       break;
170 
171     default:
172       if (isAlphanumeric(Code[0])) {
173         // Parse an identifier
174         size_t TokenLength = 1;
175         while (true) {
176           // A code completion location in/immediately after an identifier will
177           // cause the portion of the identifier before the code completion
178           // location to become a code completion token.
179           if (CodeCompletionLocation == Code.data() + TokenLength) {
180             CodeCompletionLocation = nullptr;
181             Result.Kind = TokenInfo::TK_CodeCompletion;
182             Result.Text = Code.substr(0, TokenLength);
183             Code = Code.drop_front(TokenLength);
184             return Result;
185           }
186           if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
187             break;
188           ++TokenLength;
189         }
190         if (TokenLength == 4 && Code.starts_with("true")) {
191           Result.Kind = TokenInfo::TK_Literal;
192           Result.Value = true;
193         } else if (TokenLength == 5 && Code.starts_with("false")) {
194           Result.Kind = TokenInfo::TK_Literal;
195           Result.Value = false;
196         } else {
197           Result.Kind = TokenInfo::TK_Ident;
198           Result.Text = Code.substr(0, TokenLength);
199         }
200         Code = Code.drop_front(TokenLength);
201       } else {
202         Result.Kind = TokenInfo::TK_InvalidChar;
203         Result.Text = Code.substr(0, 1);
204         Code = Code.drop_front(1);
205       }
206       break;
207     }
208 
209     Result.Range.End = currentLocation();
210     return Result;
211   }
212 
213   /// Consume an unsigned and float literal.
consumeNumberLiteral(TokenInfo * Result)214   void consumeNumberLiteral(TokenInfo *Result) {
215     bool isFloatingLiteral = false;
216     unsigned Length = 1;
217     if (Code.size() > 1) {
218       // Consume the 'x' or 'b' radix modifier, if present.
219       switch (toLowercase(Code[1])) {
220       case 'x': case 'b': Length = 2;
221       }
222     }
223     while (Length < Code.size() && isHexDigit(Code[Length]))
224       ++Length;
225 
226     // Try to recognize a floating point literal.
227     while (Length < Code.size()) {
228       char c = Code[Length];
229       if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
230         isFloatingLiteral = true;
231         Length++;
232       } else {
233         break;
234       }
235     }
236 
237     Result->Text = Code.substr(0, Length);
238     Code = Code.drop_front(Length);
239 
240     if (isFloatingLiteral) {
241       char *end;
242       errno = 0;
243       std::string Text = Result->Text.str();
244       double doubleValue = strtod(Text.c_str(), &end);
245       if (*end == 0 && errno == 0) {
246         Result->Kind = TokenInfo::TK_Literal;
247         Result->Value = doubleValue;
248         return;
249       }
250     } else {
251       unsigned Value;
252       if (!Result->Text.getAsInteger(0, Value)) {
253         Result->Kind = TokenInfo::TK_Literal;
254         Result->Value = Value;
255         return;
256       }
257     }
258 
259     SourceRange Range;
260     Range.Start = Result->Range.Start;
261     Range.End = currentLocation();
262     Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
263     Result->Kind = TokenInfo::TK_Error;
264   }
265 
266   /// Consume a string literal.
267   ///
268   /// \c Code must be positioned at the start of the literal (the opening
269   /// quote). Consumed until it finds the same closing quote character.
consumeStringLiteral(TokenInfo * Result)270   void consumeStringLiteral(TokenInfo *Result) {
271     bool InEscape = false;
272     const char Marker = Code[0];
273     for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
274       if (InEscape) {
275         InEscape = false;
276         continue;
277       }
278       if (Code[Length] == '\\') {
279         InEscape = true;
280         continue;
281       }
282       if (Code[Length] == Marker) {
283         Result->Kind = TokenInfo::TK_Literal;
284         Result->Text = Code.substr(0, Length + 1);
285         Result->Value = Code.substr(1, Length - 1);
286         Code = Code.drop_front(Length + 1);
287         return;
288       }
289     }
290 
291     StringRef ErrorText = Code;
292     Code = Code.drop_front(Code.size());
293     SourceRange Range;
294     Range.Start = Result->Range.Start;
295     Range.End = currentLocation();
296     Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
297     Result->Kind = TokenInfo::TK_Error;
298   }
299 
300   /// Consume all leading whitespace from \c Code.
consumeWhitespace()301   void consumeWhitespace() {
302     // Don't trim newlines.
303     Code = Code.ltrim(" \t\v\f\r");
304   }
305 
currentLocation()306   SourceLocation currentLocation() {
307     SourceLocation Location;
308     Location.Line = Line;
309     Location.Column = Code.data() - StartOfLine.data() + 1;
310     return Location;
311   }
312 
313   StringRef &Code;
314   StringRef StartOfLine;
315   unsigned Line = 1;
316   Diagnostics *Error;
317   TokenInfo NextToken;
318   const char *CodeCompletionLocation = nullptr;
319 };
320 
321 Parser::Sema::~Sema() = default;
322 
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> Context)323 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
324     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
325   return {};
326 }
327 
328 std::vector<MatcherCompletion>
getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes)329 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
330   return {};
331 }
332 
333 struct Parser::ScopedContextEntry {
334   Parser *P;
335 
ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry336   ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
337     P->ContextStack.push_back(std::make_pair(C, 0u));
338   }
339 
~ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry340   ~ScopedContextEntry() {
341     P->ContextStack.pop_back();
342   }
343 
nextArgclang::ast_matchers::dynamic::Parser::ScopedContextEntry344   void nextArg() {
345     ++P->ContextStack.back().second;
346   }
347 };
348 
349 /// Parse expressions that start with an identifier.
350 ///
351 /// This function can parse named values and matchers.
352 /// In case of failure it will try to determine the user's intent to give
353 /// an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * Value)354 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
355   const TokenInfo NameToken = Tokenizer->consumeNextToken();
356 
357   if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
358     // Parse as a named value.
359     if (const VariantValue NamedValue =
360             NamedValues ? NamedValues->lookup(NameToken.Text)
361                         : VariantValue()) {
362 
363       if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
364         *Value = NamedValue;
365         return true;
366       }
367 
368       std::string BindID;
369       Tokenizer->consumeNextToken();
370       TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
371       if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
372         addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
373         return false;
374       }
375 
376       if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
377           (ChainCallToken.Text != TokenInfo::ID_Bind &&
378            ChainCallToken.Text != TokenInfo::ID_With)) {
379         Error->addError(ChainCallToken.Range,
380                         Error->ET_ParserMalformedChainedExpr);
381         return false;
382       }
383       if (ChainCallToken.Text == TokenInfo::ID_With) {
384 
385         Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
386                                  NameToken.Text, NameToken.Range);
387 
388         Error->addError(ChainCallToken.Range,
389                         Error->ET_RegistryMatcherNoWithSupport);
390         return false;
391       }
392       if (!parseBindID(BindID))
393         return false;
394 
395       assert(NamedValue.isMatcher());
396       std::optional<DynTypedMatcher> Result =
397           NamedValue.getMatcher().getSingleMatcher();
398       if (Result) {
399         std::optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
400         if (Bound) {
401           *Value = VariantMatcher::SingleMatcher(*Bound);
402           return true;
403         }
404       }
405       return false;
406     }
407 
408     if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
409       Error->addError(Tokenizer->peekNextToken().Range,
410                       Error->ET_ParserNoOpenParen)
411           << "NewLine";
412       return false;
413     }
414 
415     // If the syntax is correct and the name is not a matcher either, report
416     // unknown named value.
417     if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
418          Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
419          Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
420          Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
421         !S->lookupMatcherCtor(NameToken.Text)) {
422       Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
423           << NameToken.Text;
424       return false;
425     }
426     // Otherwise, fallback to the matcher parser.
427   }
428 
429   Tokenizer->SkipNewlines();
430 
431   assert(NameToken.Kind == TokenInfo::TK_Ident);
432   TokenInfo OpenToken = Tokenizer->consumeNextToken();
433   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
434     Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
435         << OpenToken.Text;
436     return false;
437   }
438 
439   std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
440 
441   // Parse as a matcher expression.
442   return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
443 }
444 
parseBindID(std::string & BindID)445 bool Parser::parseBindID(std::string &BindID) {
446   // Parse the parenthesized argument to .bind("foo")
447   const TokenInfo OpenToken = Tokenizer->consumeNextToken();
448   const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449   const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
450 
451   // TODO: We could use different error codes for each/some to be more
452   //       explicit about the syntax error.
453   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
454     Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
455     return false;
456   }
457   if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
458     Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
459     return false;
460   }
461   if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
462     Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
463     return false;
464   }
465   BindID = IDToken.Value.getString();
466   return true;
467 }
468 
parseMatcherBuilder(MatcherCtor Ctor,const TokenInfo & NameToken,const TokenInfo & OpenToken,VariantValue * Value)469 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
470                                  const TokenInfo &OpenToken,
471                                  VariantValue *Value) {
472   std::vector<ParserValue> Args;
473   TokenInfo EndToken;
474 
475   Tokenizer->SkipNewlines();
476 
477   {
478     ScopedContextEntry SCE(this, Ctor);
479 
480     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
481       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
482         // End of args.
483         EndToken = Tokenizer->consumeNextToken();
484         break;
485       }
486       if (!Args.empty()) {
487         // We must find a , token to continue.
488         TokenInfo CommaToken = Tokenizer->consumeNextToken();
489         if (CommaToken.Kind != TokenInfo::TK_Comma) {
490           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
491               << CommaToken.Text;
492           return false;
493         }
494       }
495 
496       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
497                                NameToken.Text, NameToken.Range,
498                                Args.size() + 1);
499       ParserValue ArgValue;
500       Tokenizer->SkipNewlines();
501 
502       if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
503         addExpressionCompletions();
504         return false;
505       }
506 
507       TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
508 
509       if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
510         Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
511             << NameToken.Text;
512         return false;
513       }
514 
515       ArgValue.Text = NodeMatcherToken.Text;
516       ArgValue.Range = NodeMatcherToken.Range;
517 
518       std::optional<MatcherCtor> MappedMatcher =
519           S->lookupMatcherCtor(ArgValue.Text);
520 
521       if (!MappedMatcher) {
522         Error->addError(NodeMatcherToken.Range,
523                         Error->ET_RegistryMatcherNotFound)
524             << NodeMatcherToken.Text;
525         return false;
526       }
527 
528       ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
529 
530       if (NK.isNone()) {
531         Error->addError(NodeMatcherToken.Range,
532                         Error->ET_RegistryNonNodeMatcher)
533             << NodeMatcherToken.Text;
534         return false;
535       }
536 
537       ArgValue.Value = NK;
538 
539       Tokenizer->SkipNewlines();
540       Args.push_back(ArgValue);
541 
542       SCE.nextArg();
543     }
544   }
545 
546   if (EndToken.Kind == TokenInfo::TK_Eof) {
547     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
548     return false;
549   }
550 
551   internal::MatcherDescriptorPtr BuiltCtor =
552       S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error);
553 
554   if (!BuiltCtor.get()) {
555     Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
556         << NameToken.Text;
557     return false;
558   }
559 
560   std::string BindID;
561   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
562     Tokenizer->consumeNextToken();
563     TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
564     if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
565       addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
566       addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));
567       return false;
568     }
569     if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
570         (ChainCallToken.Text != TokenInfo::ID_Bind &&
571          ChainCallToken.Text != TokenInfo::ID_With)) {
572       Error->addError(ChainCallToken.Range,
573                       Error->ET_ParserMalformedChainedExpr);
574       return false;
575     }
576     if (ChainCallToken.Text == TokenInfo::ID_Bind) {
577       if (!parseBindID(BindID))
578         return false;
579       Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
580                                NameToken.Text, NameToken.Range);
581       SourceRange MatcherRange = NameToken.Range;
582       MatcherRange.End = ChainCallToken.Range.End;
583       VariantMatcher Result = S->actOnMatcherExpression(
584           BuiltCtor.get(), MatcherRange, BindID, {}, Error);
585       if (Result.isNull())
586         return false;
587 
588       *Value = Result;
589       return true;
590     } else if (ChainCallToken.Text == TokenInfo::ID_With) {
591       Tokenizer->SkipNewlines();
592 
593       if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
594         StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
595                                ? StringRef("EOF")
596                                : Tokenizer->peekNextToken().Text;
597         Error->addError(Tokenizer->peekNextToken().Range,
598                         Error->ET_ParserNoOpenParen)
599             << ErrTxt;
600         return false;
601       }
602 
603       TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
604 
605       return parseMatcherExpressionImpl(NameToken, WithOpenToken,
606                                         BuiltCtor.get(), Value);
607     }
608   }
609 
610   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
611                            NameToken.Text, NameToken.Range);
612   SourceRange MatcherRange = NameToken.Range;
613   MatcherRange.End = EndToken.Range.End;
614   VariantMatcher Result = S->actOnMatcherExpression(
615       BuiltCtor.get(), MatcherRange, BindID, {}, Error);
616   if (Result.isNull())
617     return false;
618 
619   *Value = Result;
620   return true;
621 }
622 
623 /// Parse and validate a matcher expression.
624 /// \return \c true on success, in which case \c Value has the matcher parsed.
625 ///   If the input is malformed, or some argument has an error, it
626 ///   returns \c false.
parseMatcherExpressionImpl(const TokenInfo & NameToken,const TokenInfo & OpenToken,std::optional<MatcherCtor> Ctor,VariantValue * Value)627 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
628                                         const TokenInfo &OpenToken,
629                                         std::optional<MatcherCtor> Ctor,
630                                         VariantValue *Value) {
631   if (!Ctor) {
632     Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
633         << NameToken.Text;
634     // Do not return here. We need to continue to give completion suggestions.
635   }
636 
637   if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
638     return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);
639 
640   std::vector<ParserValue> Args;
641   TokenInfo EndToken;
642 
643   Tokenizer->SkipNewlines();
644 
645   {
646     ScopedContextEntry SCE(this, Ctor.value_or(nullptr));
647 
648     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
649       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
650         // End of args.
651         EndToken = Tokenizer->consumeNextToken();
652         break;
653       }
654       if (!Args.empty()) {
655         // We must find a , token to continue.
656         const TokenInfo CommaToken = Tokenizer->consumeNextToken();
657         if (CommaToken.Kind != TokenInfo::TK_Comma) {
658           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
659               << CommaToken.Text;
660           return false;
661         }
662       }
663 
664       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
665                                NameToken.Text, NameToken.Range,
666                                Args.size() + 1);
667       ParserValue ArgValue;
668       Tokenizer->SkipNewlines();
669       ArgValue.Text = Tokenizer->peekNextToken().Text;
670       ArgValue.Range = Tokenizer->peekNextToken().Range;
671       if (!parseExpressionImpl(&ArgValue.Value)) {
672         return false;
673       }
674 
675       Tokenizer->SkipNewlines();
676       Args.push_back(ArgValue);
677       SCE.nextArg();
678     }
679   }
680 
681   if (EndToken.Kind == TokenInfo::TK_Eof) {
682     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
683     return false;
684   }
685 
686   std::string BindID;
687   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
688     Tokenizer->consumeNextToken();
689     TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
690     if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
691       addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
692       return false;
693     }
694 
695     if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
696       Error->addError(ChainCallToken.Range,
697                       Error->ET_ParserMalformedChainedExpr);
698       return false;
699     }
700     if (ChainCallToken.Text == TokenInfo::ID_With) {
701 
702       Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
703                                NameToken.Text, NameToken.Range);
704 
705       Error->addError(ChainCallToken.Range,
706                       Error->ET_RegistryMatcherNoWithSupport);
707       return false;
708     }
709     if (ChainCallToken.Text != TokenInfo::ID_Bind) {
710       Error->addError(ChainCallToken.Range,
711                       Error->ET_ParserMalformedChainedExpr);
712       return false;
713     }
714     if (!parseBindID(BindID))
715       return false;
716   }
717 
718   if (!Ctor)
719     return false;
720 
721   // Merge the start and end infos.
722   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
723                            NameToken.Text, NameToken.Range);
724   SourceRange MatcherRange = NameToken.Range;
725   MatcherRange.End = EndToken.Range.End;
726   VariantMatcher Result = S->actOnMatcherExpression(
727       *Ctor, MatcherRange, BindID, Args, Error);
728   if (Result.isNull()) return false;
729 
730   *Value = Result;
731   return true;
732 }
733 
734 // If the prefix of this completion matches the completion token, add it to
735 // Completions minus the prefix.
addCompletion(const TokenInfo & CompToken,const MatcherCompletion & Completion)736 void Parser::addCompletion(const TokenInfo &CompToken,
737                            const MatcherCompletion& Completion) {
738   if (StringRef(Completion.TypedText).starts_with(CompToken.Text) &&
739       Completion.Specificity > 0) {
740     Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
741                              Completion.MatcherDecl, Completion.Specificity);
742   }
743 }
744 
getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes)745 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
746     ArrayRef<ArgKind> AcceptedTypes) {
747   if (!NamedValues) return std::vector<MatcherCompletion>();
748   std::vector<MatcherCompletion> Result;
749   for (const auto &Entry : *NamedValues) {
750     unsigned Specificity;
751     if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
752       std::string Decl =
753           (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
754       Result.emplace_back(Entry.getKey(), Decl, Specificity);
755     }
756   }
757   return Result;
758 }
759 
addExpressionCompletions()760 void Parser::addExpressionCompletions() {
761   const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
762   assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
763 
764   // We cannot complete code if there is an invalid element on the context
765   // stack.
766   for (ContextStackTy::iterator I = ContextStack.begin(),
767                                 E = ContextStack.end();
768        I != E; ++I) {
769     if (!I->first)
770       return;
771   }
772 
773   auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
774   for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
775     addCompletion(CompToken, Completion);
776   }
777 
778   for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
779     addCompletion(CompToken, Completion);
780   }
781 }
782 
783 /// Parse an <Expression>
parseExpressionImpl(VariantValue * Value)784 bool Parser::parseExpressionImpl(VariantValue *Value) {
785   switch (Tokenizer->nextTokenKind()) {
786   case TokenInfo::TK_Literal:
787     *Value = Tokenizer->consumeNextToken().Value;
788     return true;
789 
790   case TokenInfo::TK_Ident:
791     return parseIdentifierPrefixImpl(Value);
792 
793   case TokenInfo::TK_CodeCompletion:
794     addExpressionCompletions();
795     return false;
796 
797   case TokenInfo::TK_Eof:
798     Error->addError(Tokenizer->consumeNextToken().Range,
799                     Error->ET_ParserNoCode);
800     return false;
801 
802   case TokenInfo::TK_Error:
803     // This error was already reported by the tokenizer.
804     return false;
805   case TokenInfo::TK_NewLine:
806   case TokenInfo::TK_OpenParen:
807   case TokenInfo::TK_CloseParen:
808   case TokenInfo::TK_Comma:
809   case TokenInfo::TK_Period:
810   case TokenInfo::TK_InvalidChar:
811     const TokenInfo Token = Tokenizer->consumeNextToken();
812     Error->addError(Token.Range, Error->ET_ParserInvalidToken)
813         << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
814     return false;
815   }
816 
817   llvm_unreachable("Unknown token kind.");
818 }
819 
820 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
821 
Parser(CodeTokenizer * Tokenizer,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)822 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
823                const NamedValueMap *NamedValues, Diagnostics *Error)
824     : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
825       NamedValues(NamedValues), Error(Error) {}
826 
827 Parser::RegistrySema::~RegistrySema() = default;
828 
829 std::optional<MatcherCtor>
lookupMatcherCtor(StringRef MatcherName)830 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
831   return Registry::lookupMatcherCtor(MatcherName);
832 }
833 
actOnMatcherExpression(MatcherCtor Ctor,SourceRange NameRange,StringRef BindID,ArrayRef<ParserValue> Args,Diagnostics * Error)834 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
835     MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
836     ArrayRef<ParserValue> Args, Diagnostics *Error) {
837   if (BindID.empty()) {
838     return Registry::constructMatcher(Ctor, NameRange, Args, Error);
839   } else {
840     return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
841                                            Error);
842   }
843 }
844 
getAcceptedCompletionTypes(ArrayRef<std::pair<MatcherCtor,unsigned>> Context)845 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
846     ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
847   return Registry::getAcceptedCompletionTypes(Context);
848 }
849 
getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes)850 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
851     ArrayRef<ArgKind> AcceptedTypes) {
852   return Registry::getMatcherCompletions(AcceptedTypes);
853 }
854 
isBuilderMatcher(MatcherCtor Ctor) const855 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
856   return Registry::isBuilderMatcher(Ctor);
857 }
858 
nodeMatcherType(MatcherCtor Ctor) const859 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
860   return Registry::nodeMatcherType(Ctor);
861 }
862 
863 internal::MatcherDescriptorPtr
buildMatcherCtor(MatcherCtor Ctor,SourceRange NameRange,ArrayRef<ParserValue> Args,Diagnostics * Error) const864 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
865                                        ArrayRef<ParserValue> Args,
866                                        Diagnostics *Error) const {
867   return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
868 }
869 
parseExpression(StringRef & Code,Sema * S,const NamedValueMap * NamedValues,VariantValue * Value,Diagnostics * Error)870 bool Parser::parseExpression(StringRef &Code, Sema *S,
871                              const NamedValueMap *NamedValues,
872                              VariantValue *Value, Diagnostics *Error) {
873   CodeTokenizer Tokenizer(Code, Error);
874   if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
875     return false;
876   auto NT = Tokenizer.peekNextToken();
877   if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
878     Error->addError(Tokenizer.peekNextToken().Range,
879                     Error->ET_ParserTrailingCode);
880     return false;
881   }
882   return true;
883 }
884 
885 std::vector<MatcherCompletion>
completeExpression(StringRef & Code,unsigned CompletionOffset,Sema * S,const NamedValueMap * NamedValues)886 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
887                            const NamedValueMap *NamedValues) {
888   Diagnostics Error;
889   CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
890   Parser P(&Tokenizer, S, NamedValues, &Error);
891   VariantValue Dummy;
892   P.parseExpressionImpl(&Dummy);
893 
894   // Sort by specificity, then by name.
895   llvm::sort(P.Completions,
896              [](const MatcherCompletion &A, const MatcherCompletion &B) {
897                if (A.Specificity != B.Specificity)
898                  return A.Specificity > B.Specificity;
899                return A.TypedText < B.TypedText;
900              });
901 
902   return P.Completions;
903 }
904 
905 std::optional<DynTypedMatcher>
parseMatcherExpression(StringRef & Code,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)906 Parser::parseMatcherExpression(StringRef &Code, Sema *S,
907                                const NamedValueMap *NamedValues,
908                                Diagnostics *Error) {
909   VariantValue Value;
910   if (!parseExpression(Code, S, NamedValues, &Value, Error))
911     return std::nullopt;
912   if (!Value.isMatcher()) {
913     Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
914     return std::nullopt;
915   }
916   std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
917   if (!Result) {
918     Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
919         << Value.getTypeAsString();
920   }
921   return Result;
922 }
923 
924 } // namespace dynamic
925 } // namespace ast_matchers
926 } // namespace clang
927