1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Recursive parser implementation for the matcher expression grammar.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/ASTMatchers/Dynamic/Parser.h"
15 #include "clang/ASTMatchers/ASTMatchersInternal.h"
16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17 #include "clang/ASTMatchers/Dynamic/Registry.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/ManagedStatic.h"
23 #include <algorithm>
24 #include <cassert>
25 #include <cerrno>
26 #include <cstddef>
27 #include <cstdlib>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 namespace clang {
33 namespace ast_matchers {
34 namespace dynamic {
35 
36 /// Simple structure to hold information for one token from the parser.
37 struct Parser::TokenInfo {
38   /// Different possible tokens.
39   enum TokenKind {
40     TK_Eof,
41     TK_NewLine,
42     TK_OpenParen,
43     TK_CloseParen,
44     TK_Comma,
45     TK_Period,
46     TK_Literal,
47     TK_Ident,
48     TK_InvalidChar,
49     TK_Error,
50     TK_CodeCompletion
51   };
52 
53   /// Some known identifiers.
54   static const char* const ID_Bind;
55   static const char *const ID_With;
56 
57   TokenInfo() = default;
58 
59   StringRef Text;
60   TokenKind Kind = TK_Eof;
61   SourceRange Range;
62   VariantValue Value;
63 };
64 
65 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 const char *const Parser::TokenInfo::ID_With = "with";
67 
68 /// Simple tokenizer for the parser.
69 class Parser::CodeTokenizer {
70 public:
CodeTokenizer(StringRef & MatcherCode,Diagnostics * Error)71   explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
72       : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
73     NextToken = getNextToken();
74   }
75 
CodeTokenizer(StringRef & MatcherCode,Diagnostics * Error,unsigned CodeCompletionOffset)76   CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
77                 unsigned CodeCompletionOffset)
78       : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
79         CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
80     NextToken = getNextToken();
81   }
82 
83   /// Returns but doesn't consume the next token.
peekNextToken() const84   const TokenInfo &peekNextToken() const { return NextToken; }
85 
86   /// Consumes and returns the next token.
consumeNextToken()87   TokenInfo consumeNextToken() {
88     TokenInfo ThisToken = NextToken;
89     NextToken = getNextToken();
90     return ThisToken;
91   }
92 
SkipNewlines()93   TokenInfo SkipNewlines() {
94     while (NextToken.Kind == TokenInfo::TK_NewLine)
95       NextToken = getNextToken();
96     return NextToken;
97   }
98 
consumeNextTokenIgnoreNewlines()99   TokenInfo consumeNextTokenIgnoreNewlines() {
100     SkipNewlines();
101     if (NextToken.Kind == TokenInfo::TK_Eof)
102       return NextToken;
103     return consumeNextToken();
104   }
105 
nextTokenKind() const106   TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
107 
108 private:
getNextToken()109   TokenInfo getNextToken() {
110     consumeWhitespace();
111     TokenInfo Result;
112     Result.Range.Start = currentLocation();
113 
114     if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
115       Result.Kind = TokenInfo::TK_CodeCompletion;
116       Result.Text = StringRef(CodeCompletionLocation, 0);
117       CodeCompletionLocation = nullptr;
118       return Result;
119     }
120 
121     if (Code.empty()) {
122       Result.Kind = TokenInfo::TK_Eof;
123       Result.Text = "";
124       return Result;
125     }
126 
127     switch (Code[0]) {
128     case '#':
129       Code = Code.drop_until([](char c) { return c == '\n'; });
130       return getNextToken();
131     case ',':
132       Result.Kind = TokenInfo::TK_Comma;
133       Result.Text = Code.substr(0, 1);
134       Code = Code.drop_front();
135       break;
136     case '.':
137       Result.Kind = TokenInfo::TK_Period;
138       Result.Text = Code.substr(0, 1);
139       Code = Code.drop_front();
140       break;
141     case '\n':
142       ++Line;
143       StartOfLine = Code.drop_front();
144       Result.Kind = TokenInfo::TK_NewLine;
145       Result.Text = Code.substr(0, 1);
146       Code = Code.drop_front();
147       break;
148     case '(':
149       Result.Kind = TokenInfo::TK_OpenParen;
150       Result.Text = Code.substr(0, 1);
151       Code = Code.drop_front();
152       break;
153     case ')':
154       Result.Kind = TokenInfo::TK_CloseParen;
155       Result.Text = Code.substr(0, 1);
156       Code = Code.drop_front();
157       break;
158 
159     case '"':
160     case '\'':
161       // Parse a string literal.
162       consumeStringLiteral(&Result);
163       break;
164 
165     case '0': case '1': case '2': case '3': case '4':
166     case '5': case '6': case '7': case '8': case '9':
167       // Parse an unsigned and float literal.
168       consumeNumberLiteral(&Result);
169       break;
170 
171     default:
172       if (isAlphanumeric(Code[0])) {
173         // Parse an identifier
174         size_t TokenLength = 1;
175         while (true) {
176           // A code completion location in/immediately after an identifier will
177           // cause the portion of the identifier before the code completion
178           // location to become a code completion token.
179           if (CodeCompletionLocation == Code.data() + TokenLength) {
180             CodeCompletionLocation = nullptr;
181             Result.Kind = TokenInfo::TK_CodeCompletion;
182             Result.Text = Code.substr(0, TokenLength);
183             Code = Code.drop_front(TokenLength);
184             return Result;
185           }
186           if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
187             break;
188           ++TokenLength;
189         }
190         if (TokenLength == 4 && Code.startswith("true")) {
191           Result.Kind = TokenInfo::TK_Literal;
192           Result.Value = true;
193         } else if (TokenLength == 5 && Code.startswith("false")) {
194           Result.Kind = TokenInfo::TK_Literal;
195           Result.Value = false;
196         } else {
197           Result.Kind = TokenInfo::TK_Ident;
198           Result.Text = Code.substr(0, TokenLength);
199         }
200         Code = Code.drop_front(TokenLength);
201       } else {
202         Result.Kind = TokenInfo::TK_InvalidChar;
203         Result.Text = Code.substr(0, 1);
204         Code = Code.drop_front(1);
205       }
206       break;
207     }
208 
209     Result.Range.End = currentLocation();
210     return Result;
211   }
212 
213   /// Consume an unsigned and float literal.
consumeNumberLiteral(TokenInfo * Result)214   void consumeNumberLiteral(TokenInfo *Result) {
215     bool isFloatingLiteral = false;
216     unsigned Length = 1;
217     if (Code.size() > 1) {
218       // Consume the 'x' or 'b' radix modifier, if present.
219       switch (toLowercase(Code[1])) {
220       case 'x': case 'b': Length = 2;
221       }
222     }
223     while (Length < Code.size() && isHexDigit(Code[Length]))
224       ++Length;
225 
226     // Try to recognize a floating point literal.
227     while (Length < Code.size()) {
228       char c = Code[Length];
229       if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
230         isFloatingLiteral = true;
231         Length++;
232       } else {
233         break;
234       }
235     }
236 
237     Result->Text = Code.substr(0, Length);
238     Code = Code.drop_front(Length);
239 
240     if (isFloatingLiteral) {
241       char *end;
242       errno = 0;
243       std::string Text = Result->Text.str();
244       double doubleValue = strtod(Text.c_str(), &end);
245       if (*end == 0 && errno == 0) {
246         Result->Kind = TokenInfo::TK_Literal;
247         Result->Value = doubleValue;
248         return;
249       }
250     } else {
251       unsigned Value;
252       if (!Result->Text.getAsInteger(0, Value)) {
253         Result->Kind = TokenInfo::TK_Literal;
254         Result->Value = Value;
255         return;
256       }
257     }
258 
259     SourceRange Range;
260     Range.Start = Result->Range.Start;
261     Range.End = currentLocation();
262     Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
263     Result->Kind = TokenInfo::TK_Error;
264   }
265 
266   /// Consume a string literal.
267   ///
268   /// \c Code must be positioned at the start of the literal (the opening
269   /// quote). Consumed until it finds the same closing quote character.
consumeStringLiteral(TokenInfo * Result)270   void consumeStringLiteral(TokenInfo *Result) {
271     bool InEscape = false;
272     const char Marker = Code[0];
273     for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
274       if (InEscape) {
275         InEscape = false;
276         continue;
277       }
278       if (Code[Length] == '\\') {
279         InEscape = true;
280         continue;
281       }
282       if (Code[Length] == Marker) {
283         Result->Kind = TokenInfo::TK_Literal;
284         Result->Text = Code.substr(0, Length + 1);
285         Result->Value = Code.substr(1, Length - 1);
286         Code = Code.drop_front(Length + 1);
287         return;
288       }
289     }
290 
291     StringRef ErrorText = Code;
292     Code = Code.drop_front(Code.size());
293     SourceRange Range;
294     Range.Start = Result->Range.Start;
295     Range.End = currentLocation();
296     Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
297     Result->Kind = TokenInfo::TK_Error;
298   }
299 
300   /// Consume all leading whitespace from \c Code.
consumeWhitespace()301   void consumeWhitespace() {
302     Code = Code.drop_while([](char c) {
303       // Don't trim newlines.
304       return StringRef(" \t\v\f\r").contains(c);
305     });
306   }
307 
currentLocation()308   SourceLocation currentLocation() {
309     SourceLocation Location;
310     Location.Line = Line;
311     Location.Column = Code.data() - StartOfLine.data() + 1;
312     return Location;
313   }
314 
315   StringRef &Code;
316   StringRef StartOfLine;
317   unsigned Line = 1;
318   Diagnostics *Error;
319   TokenInfo NextToken;
320   const char *CodeCompletionLocation = nullptr;
321 };
322 
323 Parser::Sema::~Sema() = default;
324 
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> Context)325 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
326     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
327   return {};
328 }
329 
330 std::vector<MatcherCompletion>
getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes)331 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
332   return {};
333 }
334 
335 struct Parser::ScopedContextEntry {
336   Parser *P;
337 
ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry338   ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
339     P->ContextStack.push_back(std::make_pair(C, 0u));
340   }
341 
~ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry342   ~ScopedContextEntry() {
343     P->ContextStack.pop_back();
344   }
345 
nextArgclang::ast_matchers::dynamic::Parser::ScopedContextEntry346   void nextArg() {
347     ++P->ContextStack.back().second;
348   }
349 };
350 
351 /// Parse expressions that start with an identifier.
352 ///
353 /// This function can parse named values and matchers.
354 /// In case of failure it will try to determine the user's intent to give
355 /// an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * Value)356 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
357   const TokenInfo NameToken = Tokenizer->consumeNextToken();
358 
359   if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
360     // Parse as a named value.
361     if (const VariantValue NamedValue =
362             NamedValues ? NamedValues->lookup(NameToken.Text)
363                         : VariantValue()) {
364 
365       if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
366         *Value = NamedValue;
367         return true;
368       }
369 
370       std::string BindID;
371       Tokenizer->consumeNextToken();
372       TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
373       if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
374         addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
375         return false;
376       }
377 
378       if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
379           (ChainCallToken.Text != TokenInfo::ID_Bind &&
380            ChainCallToken.Text != TokenInfo::ID_With)) {
381         Error->addError(ChainCallToken.Range,
382                         Error->ET_ParserMalformedChainedExpr);
383         return false;
384       }
385       if (ChainCallToken.Text == TokenInfo::ID_With) {
386 
387         Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
388                                  NameToken.Text, NameToken.Range);
389 
390         Error->addError(ChainCallToken.Range,
391                         Error->ET_RegistryMatcherNoWithSupport);
392         return false;
393       }
394       if (!parseBindID(BindID))
395         return false;
396 
397       assert(NamedValue.isMatcher());
398       llvm::Optional<DynTypedMatcher> Result =
399           NamedValue.getMatcher().getSingleMatcher();
400       if (Result.hasValue()) {
401         llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
402         if (Bound.hasValue()) {
403           *Value = VariantMatcher::SingleMatcher(*Bound);
404           return true;
405         }
406       }
407       return false;
408     }
409 
410     if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
411       Error->addError(Tokenizer->peekNextToken().Range,
412                       Error->ET_ParserNoOpenParen)
413           << "NewLine";
414       return false;
415     }
416 
417     // If the syntax is correct and the name is not a matcher either, report
418     // unknown named value.
419     if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
420          Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
421          Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
422          Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
423         !S->lookupMatcherCtor(NameToken.Text)) {
424       Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
425           << NameToken.Text;
426       return false;
427     }
428     // Otherwise, fallback to the matcher parser.
429   }
430 
431   Tokenizer->SkipNewlines();
432 
433   assert(NameToken.Kind == TokenInfo::TK_Ident);
434   TokenInfo OpenToken = Tokenizer->consumeNextToken();
435   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
436     Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
437         << OpenToken.Text;
438     return false;
439   }
440 
441   llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
442 
443   // Parse as a matcher expression.
444   return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
445 }
446 
parseBindID(std::string & BindID)447 bool Parser::parseBindID(std::string &BindID) {
448   // Parse the parenthesized argument to .bind("foo")
449   const TokenInfo OpenToken = Tokenizer->consumeNextToken();
450   const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
451   const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
452 
453   // TODO: We could use different error codes for each/some to be more
454   //       explicit about the syntax error.
455   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
456     Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
457     return false;
458   }
459   if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
460     Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
461     return false;
462   }
463   if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
464     Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
465     return false;
466   }
467   BindID = IDToken.Value.getString();
468   return true;
469 }
470 
parseMatcherBuilder(MatcherCtor Ctor,const TokenInfo & NameToken,const TokenInfo & OpenToken,VariantValue * Value)471 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
472                                  const TokenInfo &OpenToken,
473                                  VariantValue *Value) {
474   std::vector<ParserValue> Args;
475   TokenInfo EndToken;
476 
477   Tokenizer->SkipNewlines();
478 
479   {
480     ScopedContextEntry SCE(this, Ctor);
481 
482     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
483       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
484         // End of args.
485         EndToken = Tokenizer->consumeNextToken();
486         break;
487       }
488       if (!Args.empty()) {
489         // We must find a , token to continue.
490         TokenInfo CommaToken = Tokenizer->consumeNextToken();
491         if (CommaToken.Kind != TokenInfo::TK_Comma) {
492           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
493               << CommaToken.Text;
494           return false;
495         }
496       }
497 
498       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
499                                NameToken.Text, NameToken.Range,
500                                Args.size() + 1);
501       ParserValue ArgValue;
502       Tokenizer->SkipNewlines();
503 
504       if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
505         addExpressionCompletions();
506         return false;
507       }
508 
509       TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
510 
511       if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
512         Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
513             << NameToken.Text;
514         return false;
515       }
516 
517       ArgValue.Text = NodeMatcherToken.Text;
518       ArgValue.Range = NodeMatcherToken.Range;
519 
520       llvm::Optional<MatcherCtor> MappedMatcher =
521           S->lookupMatcherCtor(ArgValue.Text);
522 
523       if (!MappedMatcher) {
524         Error->addError(NodeMatcherToken.Range,
525                         Error->ET_RegistryMatcherNotFound)
526             << NodeMatcherToken.Text;
527         return false;
528       }
529 
530       ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
531 
532       if (NK.isNone()) {
533         Error->addError(NodeMatcherToken.Range,
534                         Error->ET_RegistryNonNodeMatcher)
535             << NodeMatcherToken.Text;
536         return false;
537       }
538 
539       ArgValue.Value = NK;
540 
541       Tokenizer->SkipNewlines();
542       Args.push_back(ArgValue);
543 
544       SCE.nextArg();
545     }
546   }
547 
548   if (EndToken.Kind == TokenInfo::TK_Eof) {
549     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
550     return false;
551   }
552 
553   internal::MatcherDescriptorPtr BuiltCtor =
554       S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error);
555 
556   if (!BuiltCtor.get()) {
557     Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
558         << NameToken.Text;
559     return false;
560   }
561 
562   std::string BindID;
563   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
564     Tokenizer->consumeNextToken();
565     TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
566     if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
567       addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
568       addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));
569       return false;
570     }
571     if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
572         (ChainCallToken.Text != TokenInfo::ID_Bind &&
573          ChainCallToken.Text != TokenInfo::ID_With)) {
574       Error->addError(ChainCallToken.Range,
575                       Error->ET_ParserMalformedChainedExpr);
576       return false;
577     }
578     if (ChainCallToken.Text == TokenInfo::ID_Bind) {
579       if (!parseBindID(BindID))
580         return false;
581       Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
582                                NameToken.Text, NameToken.Range);
583       SourceRange MatcherRange = NameToken.Range;
584       MatcherRange.End = ChainCallToken.Range.End;
585       VariantMatcher Result = S->actOnMatcherExpression(
586           BuiltCtor.get(), MatcherRange, BindID, {}, Error);
587       if (Result.isNull())
588         return false;
589 
590       *Value = Result;
591       return true;
592     } else if (ChainCallToken.Text == TokenInfo::ID_With) {
593       Tokenizer->SkipNewlines();
594 
595       if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
596         StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
597                                ? StringRef("EOF")
598                                : Tokenizer->peekNextToken().Text;
599         Error->addError(Tokenizer->peekNextToken().Range,
600                         Error->ET_ParserNoOpenParen)
601             << ErrTxt;
602         return false;
603       }
604 
605       TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
606 
607       return parseMatcherExpressionImpl(NameToken, WithOpenToken,
608                                         BuiltCtor.get(), Value);
609     }
610   }
611 
612   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
613                            NameToken.Text, NameToken.Range);
614   SourceRange MatcherRange = NameToken.Range;
615   MatcherRange.End = EndToken.Range.End;
616   VariantMatcher Result = S->actOnMatcherExpression(
617       BuiltCtor.get(), MatcherRange, BindID, {}, Error);
618   if (Result.isNull())
619     return false;
620 
621   *Value = Result;
622   return true;
623 }
624 
625 /// Parse and validate a matcher expression.
626 /// \return \c true on success, in which case \c Value has the matcher parsed.
627 ///   If the input is malformed, or some argument has an error, it
628 ///   returns \c false.
parseMatcherExpressionImpl(const TokenInfo & NameToken,const TokenInfo & OpenToken,llvm::Optional<MatcherCtor> Ctor,VariantValue * Value)629 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
630                                         const TokenInfo &OpenToken,
631                                         llvm::Optional<MatcherCtor> Ctor,
632                                         VariantValue *Value) {
633   if (!Ctor) {
634     Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
635         << NameToken.Text;
636     // Do not return here. We need to continue to give completion suggestions.
637   }
638 
639   if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
640     return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);
641 
642   std::vector<ParserValue> Args;
643   TokenInfo EndToken;
644 
645   Tokenizer->SkipNewlines();
646 
647   {
648     ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
649 
650     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
651       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
652         // End of args.
653         EndToken = Tokenizer->consumeNextToken();
654         break;
655       }
656       if (!Args.empty()) {
657         // We must find a , token to continue.
658         const TokenInfo CommaToken = Tokenizer->consumeNextToken();
659         if (CommaToken.Kind != TokenInfo::TK_Comma) {
660           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
661               << CommaToken.Text;
662           return false;
663         }
664       }
665 
666       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
667                                NameToken.Text, NameToken.Range,
668                                Args.size() + 1);
669       ParserValue ArgValue;
670       Tokenizer->SkipNewlines();
671       ArgValue.Text = Tokenizer->peekNextToken().Text;
672       ArgValue.Range = Tokenizer->peekNextToken().Range;
673       if (!parseExpressionImpl(&ArgValue.Value)) {
674         return false;
675       }
676 
677       Tokenizer->SkipNewlines();
678       Args.push_back(ArgValue);
679       SCE.nextArg();
680     }
681   }
682 
683   if (EndToken.Kind == TokenInfo::TK_Eof) {
684     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
685     return false;
686   }
687 
688   std::string BindID;
689   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
690     Tokenizer->consumeNextToken();
691     TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
692     if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
693       addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
694       return false;
695     }
696 
697     if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
698       Error->addError(ChainCallToken.Range,
699                       Error->ET_ParserMalformedChainedExpr);
700       return false;
701     }
702     if (ChainCallToken.Text == TokenInfo::ID_With) {
703 
704       Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
705                                NameToken.Text, NameToken.Range);
706 
707       Error->addError(ChainCallToken.Range,
708                       Error->ET_RegistryMatcherNoWithSupport);
709       return false;
710     }
711     if (ChainCallToken.Text != TokenInfo::ID_Bind) {
712       Error->addError(ChainCallToken.Range,
713                       Error->ET_ParserMalformedChainedExpr);
714       return false;
715     }
716     if (!parseBindID(BindID))
717       return false;
718   }
719 
720   if (!Ctor)
721     return false;
722 
723   // Merge the start and end infos.
724   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
725                            NameToken.Text, NameToken.Range);
726   SourceRange MatcherRange = NameToken.Range;
727   MatcherRange.End = EndToken.Range.End;
728   VariantMatcher Result = S->actOnMatcherExpression(
729       *Ctor, MatcherRange, BindID, Args, Error);
730   if (Result.isNull()) return false;
731 
732   *Value = Result;
733   return true;
734 }
735 
736 // If the prefix of this completion matches the completion token, add it to
737 // Completions minus the prefix.
addCompletion(const TokenInfo & CompToken,const MatcherCompletion & Completion)738 void Parser::addCompletion(const TokenInfo &CompToken,
739                            const MatcherCompletion& Completion) {
740   if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
741       Completion.Specificity > 0) {
742     Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
743                              Completion.MatcherDecl, Completion.Specificity);
744   }
745 }
746 
getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes)747 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
748     ArrayRef<ArgKind> AcceptedTypes) {
749   if (!NamedValues) return std::vector<MatcherCompletion>();
750   std::vector<MatcherCompletion> Result;
751   for (const auto &Entry : *NamedValues) {
752     unsigned Specificity;
753     if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
754       std::string Decl =
755           (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
756       Result.emplace_back(Entry.getKey(), Decl, Specificity);
757     }
758   }
759   return Result;
760 }
761 
addExpressionCompletions()762 void Parser::addExpressionCompletions() {
763   const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
764   assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
765 
766   // We cannot complete code if there is an invalid element on the context
767   // stack.
768   for (ContextStackTy::iterator I = ContextStack.begin(),
769                                 E = ContextStack.end();
770        I != E; ++I) {
771     if (!I->first)
772       return;
773   }
774 
775   auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
776   for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
777     addCompletion(CompToken, Completion);
778   }
779 
780   for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
781     addCompletion(CompToken, Completion);
782   }
783 }
784 
785 /// Parse an <Expression>
parseExpressionImpl(VariantValue * Value)786 bool Parser::parseExpressionImpl(VariantValue *Value) {
787   switch (Tokenizer->nextTokenKind()) {
788   case TokenInfo::TK_Literal:
789     *Value = Tokenizer->consumeNextToken().Value;
790     return true;
791 
792   case TokenInfo::TK_Ident:
793     return parseIdentifierPrefixImpl(Value);
794 
795   case TokenInfo::TK_CodeCompletion:
796     addExpressionCompletions();
797     return false;
798 
799   case TokenInfo::TK_Eof:
800     Error->addError(Tokenizer->consumeNextToken().Range,
801                     Error->ET_ParserNoCode);
802     return false;
803 
804   case TokenInfo::TK_Error:
805     // This error was already reported by the tokenizer.
806     return false;
807   case TokenInfo::TK_NewLine:
808   case TokenInfo::TK_OpenParen:
809   case TokenInfo::TK_CloseParen:
810   case TokenInfo::TK_Comma:
811   case TokenInfo::TK_Period:
812   case TokenInfo::TK_InvalidChar:
813     const TokenInfo Token = Tokenizer->consumeNextToken();
814     Error->addError(Token.Range, Error->ET_ParserInvalidToken)
815         << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
816     return false;
817   }
818 
819   llvm_unreachable("Unknown token kind.");
820 }
821 
822 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
823 
Parser(CodeTokenizer * Tokenizer,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)824 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
825                const NamedValueMap *NamedValues, Diagnostics *Error)
826     : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
827       NamedValues(NamedValues), Error(Error) {}
828 
829 Parser::RegistrySema::~RegistrySema() = default;
830 
831 llvm::Optional<MatcherCtor>
lookupMatcherCtor(StringRef MatcherName)832 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
833   return Registry::lookupMatcherCtor(MatcherName);
834 }
835 
actOnMatcherExpression(MatcherCtor Ctor,SourceRange NameRange,StringRef BindID,ArrayRef<ParserValue> Args,Diagnostics * Error)836 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
837     MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
838     ArrayRef<ParserValue> Args, Diagnostics *Error) {
839   if (BindID.empty()) {
840     return Registry::constructMatcher(Ctor, NameRange, Args, Error);
841   } else {
842     return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
843                                            Error);
844   }
845 }
846 
getAcceptedCompletionTypes(ArrayRef<std::pair<MatcherCtor,unsigned>> Context)847 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
848     ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
849   return Registry::getAcceptedCompletionTypes(Context);
850 }
851 
getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes)852 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
853     ArrayRef<ArgKind> AcceptedTypes) {
854   return Registry::getMatcherCompletions(AcceptedTypes);
855 }
856 
isBuilderMatcher(MatcherCtor Ctor) const857 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
858   return Registry::isBuilderMatcher(Ctor);
859 }
860 
nodeMatcherType(MatcherCtor Ctor) const861 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
862   return Registry::nodeMatcherType(Ctor);
863 }
864 
865 internal::MatcherDescriptorPtr
buildMatcherCtor(MatcherCtor Ctor,SourceRange NameRange,ArrayRef<ParserValue> Args,Diagnostics * Error) const866 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
867                                        ArrayRef<ParserValue> Args,
868                                        Diagnostics *Error) const {
869   return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
870 }
871 
parseExpression(StringRef & Code,Sema * S,const NamedValueMap * NamedValues,VariantValue * Value,Diagnostics * Error)872 bool Parser::parseExpression(StringRef &Code, Sema *S,
873                              const NamedValueMap *NamedValues,
874                              VariantValue *Value, Diagnostics *Error) {
875   CodeTokenizer Tokenizer(Code, Error);
876   if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
877     return false;
878   auto NT = Tokenizer.peekNextToken();
879   if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
880     Error->addError(Tokenizer.peekNextToken().Range,
881                     Error->ET_ParserTrailingCode);
882     return false;
883   }
884   return true;
885 }
886 
887 std::vector<MatcherCompletion>
completeExpression(StringRef & Code,unsigned CompletionOffset,Sema * S,const NamedValueMap * NamedValues)888 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
889                            const NamedValueMap *NamedValues) {
890   Diagnostics Error;
891   CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
892   Parser P(&Tokenizer, S, NamedValues, &Error);
893   VariantValue Dummy;
894   P.parseExpressionImpl(&Dummy);
895 
896   // Sort by specificity, then by name.
897   llvm::sort(P.Completions,
898              [](const MatcherCompletion &A, const MatcherCompletion &B) {
899                if (A.Specificity != B.Specificity)
900                  return A.Specificity > B.Specificity;
901                return A.TypedText < B.TypedText;
902              });
903 
904   return P.Completions;
905 }
906 
907 llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef & Code,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)908 Parser::parseMatcherExpression(StringRef &Code, Sema *S,
909                                const NamedValueMap *NamedValues,
910                                Diagnostics *Error) {
911   VariantValue Value;
912   if (!parseExpression(Code, S, NamedValues, &Value, Error))
913     return llvm::Optional<DynTypedMatcher>();
914   if (!Value.isMatcher()) {
915     Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
916     return llvm::Optional<DynTypedMatcher>();
917   }
918   llvm::Optional<DynTypedMatcher> Result =
919       Value.getMatcher().getSingleMatcher();
920   if (!Result.hasValue()) {
921     Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
922         << Value.getTypeAsString();
923   }
924   return Result;
925 }
926 
927 } // namespace dynamic
928 } // namespace ast_matchers
929 } // namespace clang
930