1 //===- Parser.cpp - Matcher expression parser -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Recursive parser implementation for the matcher expression grammar.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/ASTMatchers/Dynamic/Parser.h"
15 #include "clang/ASTMatchers/ASTMatchersInternal.h"
16 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17 #include "clang/ASTMatchers/Dynamic/Registry.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/ManagedStatic.h"
23 #include <algorithm>
24 #include <cassert>
25 #include <cerrno>
26 #include <cstddef>
27 #include <cstdlib>
28 #include <string>
29 #include <utility>
30 #include <vector>
31
32 namespace clang {
33 namespace ast_matchers {
34 namespace dynamic {
35
36 /// Simple structure to hold information for one token from the parser.
37 struct Parser::TokenInfo {
38 /// Different possible tokens.
39 enum TokenKind {
40 TK_Eof,
41 TK_NewLine,
42 TK_OpenParen,
43 TK_CloseParen,
44 TK_Comma,
45 TK_Period,
46 TK_Literal,
47 TK_Ident,
48 TK_InvalidChar,
49 TK_Error,
50 TK_CodeCompletion
51 };
52
53 /// Some known identifiers.
54 static const char* const ID_Bind;
55 static const char *const ID_With;
56
57 TokenInfo() = default;
58
59 StringRef Text;
60 TokenKind Kind = TK_Eof;
61 SourceRange Range;
62 VariantValue Value;
63 };
64
65 const char* const Parser::TokenInfo::ID_Bind = "bind";
66 const char *const Parser::TokenInfo::ID_With = "with";
67
68 /// Simple tokenizer for the parser.
69 class Parser::CodeTokenizer {
70 public:
CodeTokenizer(StringRef & MatcherCode,Diagnostics * Error)71 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
72 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
73 NextToken = getNextToken();
74 }
75
CodeTokenizer(StringRef & MatcherCode,Diagnostics * Error,unsigned CodeCompletionOffset)76 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
77 unsigned CodeCompletionOffset)
78 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
79 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
80 NextToken = getNextToken();
81 }
82
83 /// Returns but doesn't consume the next token.
peekNextToken() const84 const TokenInfo &peekNextToken() const { return NextToken; }
85
86 /// Consumes and returns the next token.
consumeNextToken()87 TokenInfo consumeNextToken() {
88 TokenInfo ThisToken = NextToken;
89 NextToken = getNextToken();
90 return ThisToken;
91 }
92
SkipNewlines()93 TokenInfo SkipNewlines() {
94 while (NextToken.Kind == TokenInfo::TK_NewLine)
95 NextToken = getNextToken();
96 return NextToken;
97 }
98
consumeNextTokenIgnoreNewlines()99 TokenInfo consumeNextTokenIgnoreNewlines() {
100 SkipNewlines();
101 if (NextToken.Kind == TokenInfo::TK_Eof)
102 return NextToken;
103 return consumeNextToken();
104 }
105
nextTokenKind() const106 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
107
108 private:
getNextToken()109 TokenInfo getNextToken() {
110 consumeWhitespace();
111 TokenInfo Result;
112 Result.Range.Start = currentLocation();
113
114 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
115 Result.Kind = TokenInfo::TK_CodeCompletion;
116 Result.Text = StringRef(CodeCompletionLocation, 0);
117 CodeCompletionLocation = nullptr;
118 return Result;
119 }
120
121 if (Code.empty()) {
122 Result.Kind = TokenInfo::TK_Eof;
123 Result.Text = "";
124 return Result;
125 }
126
127 switch (Code[0]) {
128 case '#':
129 Code = Code.drop_until([](char c) { return c == '\n'; });
130 return getNextToken();
131 case ',':
132 Result.Kind = TokenInfo::TK_Comma;
133 Result.Text = Code.substr(0, 1);
134 Code = Code.drop_front();
135 break;
136 case '.':
137 Result.Kind = TokenInfo::TK_Period;
138 Result.Text = Code.substr(0, 1);
139 Code = Code.drop_front();
140 break;
141 case '\n':
142 ++Line;
143 StartOfLine = Code.drop_front();
144 Result.Kind = TokenInfo::TK_NewLine;
145 Result.Text = Code.substr(0, 1);
146 Code = Code.drop_front();
147 break;
148 case '(':
149 Result.Kind = TokenInfo::TK_OpenParen;
150 Result.Text = Code.substr(0, 1);
151 Code = Code.drop_front();
152 break;
153 case ')':
154 Result.Kind = TokenInfo::TK_CloseParen;
155 Result.Text = Code.substr(0, 1);
156 Code = Code.drop_front();
157 break;
158
159 case '"':
160 case '\'':
161 // Parse a string literal.
162 consumeStringLiteral(&Result);
163 break;
164
165 case '0': case '1': case '2': case '3': case '4':
166 case '5': case '6': case '7': case '8': case '9':
167 // Parse an unsigned and float literal.
168 consumeNumberLiteral(&Result);
169 break;
170
171 default:
172 if (isAlphanumeric(Code[0])) {
173 // Parse an identifier
174 size_t TokenLength = 1;
175 while (true) {
176 // A code completion location in/immediately after an identifier will
177 // cause the portion of the identifier before the code completion
178 // location to become a code completion token.
179 if (CodeCompletionLocation == Code.data() + TokenLength) {
180 CodeCompletionLocation = nullptr;
181 Result.Kind = TokenInfo::TK_CodeCompletion;
182 Result.Text = Code.substr(0, TokenLength);
183 Code = Code.drop_front(TokenLength);
184 return Result;
185 }
186 if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
187 break;
188 ++TokenLength;
189 }
190 if (TokenLength == 4 && Code.startswith("true")) {
191 Result.Kind = TokenInfo::TK_Literal;
192 Result.Value = true;
193 } else if (TokenLength == 5 && Code.startswith("false")) {
194 Result.Kind = TokenInfo::TK_Literal;
195 Result.Value = false;
196 } else {
197 Result.Kind = TokenInfo::TK_Ident;
198 Result.Text = Code.substr(0, TokenLength);
199 }
200 Code = Code.drop_front(TokenLength);
201 } else {
202 Result.Kind = TokenInfo::TK_InvalidChar;
203 Result.Text = Code.substr(0, 1);
204 Code = Code.drop_front(1);
205 }
206 break;
207 }
208
209 Result.Range.End = currentLocation();
210 return Result;
211 }
212
213 /// Consume an unsigned and float literal.
consumeNumberLiteral(TokenInfo * Result)214 void consumeNumberLiteral(TokenInfo *Result) {
215 bool isFloatingLiteral = false;
216 unsigned Length = 1;
217 if (Code.size() > 1) {
218 // Consume the 'x' or 'b' radix modifier, if present.
219 switch (toLowercase(Code[1])) {
220 case 'x': case 'b': Length = 2;
221 }
222 }
223 while (Length < Code.size() && isHexDigit(Code[Length]))
224 ++Length;
225
226 // Try to recognize a floating point literal.
227 while (Length < Code.size()) {
228 char c = Code[Length];
229 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
230 isFloatingLiteral = true;
231 Length++;
232 } else {
233 break;
234 }
235 }
236
237 Result->Text = Code.substr(0, Length);
238 Code = Code.drop_front(Length);
239
240 if (isFloatingLiteral) {
241 char *end;
242 errno = 0;
243 std::string Text = Result->Text.str();
244 double doubleValue = strtod(Text.c_str(), &end);
245 if (*end == 0 && errno == 0) {
246 Result->Kind = TokenInfo::TK_Literal;
247 Result->Value = doubleValue;
248 return;
249 }
250 } else {
251 unsigned Value;
252 if (!Result->Text.getAsInteger(0, Value)) {
253 Result->Kind = TokenInfo::TK_Literal;
254 Result->Value = Value;
255 return;
256 }
257 }
258
259 SourceRange Range;
260 Range.Start = Result->Range.Start;
261 Range.End = currentLocation();
262 Error->addError(Range, Error->ET_ParserNumberError) << Result->Text;
263 Result->Kind = TokenInfo::TK_Error;
264 }
265
266 /// Consume a string literal.
267 ///
268 /// \c Code must be positioned at the start of the literal (the opening
269 /// quote). Consumed until it finds the same closing quote character.
consumeStringLiteral(TokenInfo * Result)270 void consumeStringLiteral(TokenInfo *Result) {
271 bool InEscape = false;
272 const char Marker = Code[0];
273 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
274 if (InEscape) {
275 InEscape = false;
276 continue;
277 }
278 if (Code[Length] == '\\') {
279 InEscape = true;
280 continue;
281 }
282 if (Code[Length] == Marker) {
283 Result->Kind = TokenInfo::TK_Literal;
284 Result->Text = Code.substr(0, Length + 1);
285 Result->Value = Code.substr(1, Length - 1);
286 Code = Code.drop_front(Length + 1);
287 return;
288 }
289 }
290
291 StringRef ErrorText = Code;
292 Code = Code.drop_front(Code.size());
293 SourceRange Range;
294 Range.Start = Result->Range.Start;
295 Range.End = currentLocation();
296 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
297 Result->Kind = TokenInfo::TK_Error;
298 }
299
300 /// Consume all leading whitespace from \c Code.
consumeWhitespace()301 void consumeWhitespace() {
302 Code = Code.drop_while([](char c) {
303 // Don't trim newlines.
304 return StringRef(" \t\v\f\r").contains(c);
305 });
306 }
307
currentLocation()308 SourceLocation currentLocation() {
309 SourceLocation Location;
310 Location.Line = Line;
311 Location.Column = Code.data() - StartOfLine.data() + 1;
312 return Location;
313 }
314
315 StringRef &Code;
316 StringRef StartOfLine;
317 unsigned Line = 1;
318 Diagnostics *Error;
319 TokenInfo NextToken;
320 const char *CodeCompletionLocation = nullptr;
321 };
322
323 Parser::Sema::~Sema() = default;
324
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> Context)325 std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
326 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
327 return {};
328 }
329
330 std::vector<MatcherCompletion>
getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes)331 Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
332 return {};
333 }
334
335 struct Parser::ScopedContextEntry {
336 Parser *P;
337
ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry338 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
339 P->ContextStack.push_back(std::make_pair(C, 0u));
340 }
341
~ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry342 ~ScopedContextEntry() {
343 P->ContextStack.pop_back();
344 }
345
nextArgclang::ast_matchers::dynamic::Parser::ScopedContextEntry346 void nextArg() {
347 ++P->ContextStack.back().second;
348 }
349 };
350
351 /// Parse expressions that start with an identifier.
352 ///
353 /// This function can parse named values and matchers.
354 /// In case of failure it will try to determine the user's intent to give
355 /// an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * Value)356 bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
357 const TokenInfo NameToken = Tokenizer->consumeNextToken();
358
359 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
360 // Parse as a named value.
361 if (const VariantValue NamedValue =
362 NamedValues ? NamedValues->lookup(NameToken.Text)
363 : VariantValue()) {
364
365 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
366 *Value = NamedValue;
367 return true;
368 }
369
370 std::string BindID;
371 Tokenizer->consumeNextToken();
372 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
373 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
374 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
375 return false;
376 }
377
378 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
379 (ChainCallToken.Text != TokenInfo::ID_Bind &&
380 ChainCallToken.Text != TokenInfo::ID_With)) {
381 Error->addError(ChainCallToken.Range,
382 Error->ET_ParserMalformedChainedExpr);
383 return false;
384 }
385 if (ChainCallToken.Text == TokenInfo::ID_With) {
386
387 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
388 NameToken.Text, NameToken.Range);
389
390 Error->addError(ChainCallToken.Range,
391 Error->ET_RegistryMatcherNoWithSupport);
392 return false;
393 }
394 if (!parseBindID(BindID))
395 return false;
396
397 assert(NamedValue.isMatcher());
398 llvm::Optional<DynTypedMatcher> Result =
399 NamedValue.getMatcher().getSingleMatcher();
400 if (Result.hasValue()) {
401 llvm::Optional<DynTypedMatcher> Bound = Result->tryBind(BindID);
402 if (Bound.hasValue()) {
403 *Value = VariantMatcher::SingleMatcher(*Bound);
404 return true;
405 }
406 }
407 return false;
408 }
409
410 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
411 Error->addError(Tokenizer->peekNextToken().Range,
412 Error->ET_ParserNoOpenParen)
413 << "NewLine";
414 return false;
415 }
416
417 // If the syntax is correct and the name is not a matcher either, report
418 // unknown named value.
419 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
420 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
421 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
422 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
423 !S->lookupMatcherCtor(NameToken.Text)) {
424 Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
425 << NameToken.Text;
426 return false;
427 }
428 // Otherwise, fallback to the matcher parser.
429 }
430
431 Tokenizer->SkipNewlines();
432
433 assert(NameToken.Kind == TokenInfo::TK_Ident);
434 TokenInfo OpenToken = Tokenizer->consumeNextToken();
435 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
436 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
437 << OpenToken.Text;
438 return false;
439 }
440
441 llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
442
443 // Parse as a matcher expression.
444 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
445 }
446
parseBindID(std::string & BindID)447 bool Parser::parseBindID(std::string &BindID) {
448 // Parse the parenthesized argument to .bind("foo")
449 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
450 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
451 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
452
453 // TODO: We could use different error codes for each/some to be more
454 // explicit about the syntax error.
455 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
456 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
457 return false;
458 }
459 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
460 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
461 return false;
462 }
463 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
464 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
465 return false;
466 }
467 BindID = IDToken.Value.getString();
468 return true;
469 }
470
parseMatcherBuilder(MatcherCtor Ctor,const TokenInfo & NameToken,const TokenInfo & OpenToken,VariantValue * Value)471 bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
472 const TokenInfo &OpenToken,
473 VariantValue *Value) {
474 std::vector<ParserValue> Args;
475 TokenInfo EndToken;
476
477 Tokenizer->SkipNewlines();
478
479 {
480 ScopedContextEntry SCE(this, Ctor);
481
482 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
483 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
484 // End of args.
485 EndToken = Tokenizer->consumeNextToken();
486 break;
487 }
488 if (!Args.empty()) {
489 // We must find a , token to continue.
490 TokenInfo CommaToken = Tokenizer->consumeNextToken();
491 if (CommaToken.Kind != TokenInfo::TK_Comma) {
492 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
493 << CommaToken.Text;
494 return false;
495 }
496 }
497
498 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
499 NameToken.Text, NameToken.Range,
500 Args.size() + 1);
501 ParserValue ArgValue;
502 Tokenizer->SkipNewlines();
503
504 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
505 addExpressionCompletions();
506 return false;
507 }
508
509 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
510
511 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
512 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
513 << NameToken.Text;
514 return false;
515 }
516
517 ArgValue.Text = NodeMatcherToken.Text;
518 ArgValue.Range = NodeMatcherToken.Range;
519
520 llvm::Optional<MatcherCtor> MappedMatcher =
521 S->lookupMatcherCtor(ArgValue.Text);
522
523 if (!MappedMatcher) {
524 Error->addError(NodeMatcherToken.Range,
525 Error->ET_RegistryMatcherNotFound)
526 << NodeMatcherToken.Text;
527 return false;
528 }
529
530 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
531
532 if (NK.isNone()) {
533 Error->addError(NodeMatcherToken.Range,
534 Error->ET_RegistryNonNodeMatcher)
535 << NodeMatcherToken.Text;
536 return false;
537 }
538
539 ArgValue.Value = NK;
540
541 Tokenizer->SkipNewlines();
542 Args.push_back(ArgValue);
543
544 SCE.nextArg();
545 }
546 }
547
548 if (EndToken.Kind == TokenInfo::TK_Eof) {
549 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
550 return false;
551 }
552
553 internal::MatcherDescriptorPtr BuiltCtor =
554 S->buildMatcherCtor(Ctor, NameToken.Range, Args, Error);
555
556 if (!BuiltCtor.get()) {
557 Error->addError(NameToken.Range, Error->ET_ParserFailedToBuildMatcher)
558 << NameToken.Text;
559 return false;
560 }
561
562 std::string BindID;
563 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
564 Tokenizer->consumeNextToken();
565 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
566 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
567 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
568 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));
569 return false;
570 }
571 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
572 (ChainCallToken.Text != TokenInfo::ID_Bind &&
573 ChainCallToken.Text != TokenInfo::ID_With)) {
574 Error->addError(ChainCallToken.Range,
575 Error->ET_ParserMalformedChainedExpr);
576 return false;
577 }
578 if (ChainCallToken.Text == TokenInfo::ID_Bind) {
579 if (!parseBindID(BindID))
580 return false;
581 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
582 NameToken.Text, NameToken.Range);
583 SourceRange MatcherRange = NameToken.Range;
584 MatcherRange.End = ChainCallToken.Range.End;
585 VariantMatcher Result = S->actOnMatcherExpression(
586 BuiltCtor.get(), MatcherRange, BindID, {}, Error);
587 if (Result.isNull())
588 return false;
589
590 *Value = Result;
591 return true;
592 } else if (ChainCallToken.Text == TokenInfo::ID_With) {
593 Tokenizer->SkipNewlines();
594
595 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
596 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
597 ? StringRef("EOF")
598 : Tokenizer->peekNextToken().Text;
599 Error->addError(Tokenizer->peekNextToken().Range,
600 Error->ET_ParserNoOpenParen)
601 << ErrTxt;
602 return false;
603 }
604
605 TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
606
607 return parseMatcherExpressionImpl(NameToken, WithOpenToken,
608 BuiltCtor.get(), Value);
609 }
610 }
611
612 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
613 NameToken.Text, NameToken.Range);
614 SourceRange MatcherRange = NameToken.Range;
615 MatcherRange.End = EndToken.Range.End;
616 VariantMatcher Result = S->actOnMatcherExpression(
617 BuiltCtor.get(), MatcherRange, BindID, {}, Error);
618 if (Result.isNull())
619 return false;
620
621 *Value = Result;
622 return true;
623 }
624
625 /// Parse and validate a matcher expression.
626 /// \return \c true on success, in which case \c Value has the matcher parsed.
627 /// If the input is malformed, or some argument has an error, it
628 /// returns \c false.
parseMatcherExpressionImpl(const TokenInfo & NameToken,const TokenInfo & OpenToken,llvm::Optional<MatcherCtor> Ctor,VariantValue * Value)629 bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
630 const TokenInfo &OpenToken,
631 llvm::Optional<MatcherCtor> Ctor,
632 VariantValue *Value) {
633 if (!Ctor) {
634 Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
635 << NameToken.Text;
636 // Do not return here. We need to continue to give completion suggestions.
637 }
638
639 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
640 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);
641
642 std::vector<ParserValue> Args;
643 TokenInfo EndToken;
644
645 Tokenizer->SkipNewlines();
646
647 {
648 ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
649
650 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
651 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
652 // End of args.
653 EndToken = Tokenizer->consumeNextToken();
654 break;
655 }
656 if (!Args.empty()) {
657 // We must find a , token to continue.
658 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
659 if (CommaToken.Kind != TokenInfo::TK_Comma) {
660 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
661 << CommaToken.Text;
662 return false;
663 }
664 }
665
666 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
667 NameToken.Text, NameToken.Range,
668 Args.size() + 1);
669 ParserValue ArgValue;
670 Tokenizer->SkipNewlines();
671 ArgValue.Text = Tokenizer->peekNextToken().Text;
672 ArgValue.Range = Tokenizer->peekNextToken().Range;
673 if (!parseExpressionImpl(&ArgValue.Value)) {
674 return false;
675 }
676
677 Tokenizer->SkipNewlines();
678 Args.push_back(ArgValue);
679 SCE.nextArg();
680 }
681 }
682
683 if (EndToken.Kind == TokenInfo::TK_Eof) {
684 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
685 return false;
686 }
687
688 std::string BindID;
689 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
690 Tokenizer->consumeNextToken();
691 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
692 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
693 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));
694 return false;
695 }
696
697 if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
698 Error->addError(ChainCallToken.Range,
699 Error->ET_ParserMalformedChainedExpr);
700 return false;
701 }
702 if (ChainCallToken.Text == TokenInfo::ID_With) {
703
704 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
705 NameToken.Text, NameToken.Range);
706
707 Error->addError(ChainCallToken.Range,
708 Error->ET_RegistryMatcherNoWithSupport);
709 return false;
710 }
711 if (ChainCallToken.Text != TokenInfo::ID_Bind) {
712 Error->addError(ChainCallToken.Range,
713 Error->ET_ParserMalformedChainedExpr);
714 return false;
715 }
716 if (!parseBindID(BindID))
717 return false;
718 }
719
720 if (!Ctor)
721 return false;
722
723 // Merge the start and end infos.
724 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
725 NameToken.Text, NameToken.Range);
726 SourceRange MatcherRange = NameToken.Range;
727 MatcherRange.End = EndToken.Range.End;
728 VariantMatcher Result = S->actOnMatcherExpression(
729 *Ctor, MatcherRange, BindID, Args, Error);
730 if (Result.isNull()) return false;
731
732 *Value = Result;
733 return true;
734 }
735
736 // If the prefix of this completion matches the completion token, add it to
737 // Completions minus the prefix.
addCompletion(const TokenInfo & CompToken,const MatcherCompletion & Completion)738 void Parser::addCompletion(const TokenInfo &CompToken,
739 const MatcherCompletion& Completion) {
740 if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
741 Completion.Specificity > 0) {
742 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
743 Completion.MatcherDecl, Completion.Specificity);
744 }
745 }
746
getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes)747 std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
748 ArrayRef<ArgKind> AcceptedTypes) {
749 if (!NamedValues) return std::vector<MatcherCompletion>();
750 std::vector<MatcherCompletion> Result;
751 for (const auto &Entry : *NamedValues) {
752 unsigned Specificity;
753 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
754 std::string Decl =
755 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
756 Result.emplace_back(Entry.getKey(), Decl, Specificity);
757 }
758 }
759 return Result;
760 }
761
addExpressionCompletions()762 void Parser::addExpressionCompletions() {
763 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
764 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
765
766 // We cannot complete code if there is an invalid element on the context
767 // stack.
768 for (ContextStackTy::iterator I = ContextStack.begin(),
769 E = ContextStack.end();
770 I != E; ++I) {
771 if (!I->first)
772 return;
773 }
774
775 auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
776 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
777 addCompletion(CompToken, Completion);
778 }
779
780 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
781 addCompletion(CompToken, Completion);
782 }
783 }
784
785 /// Parse an <Expression>
parseExpressionImpl(VariantValue * Value)786 bool Parser::parseExpressionImpl(VariantValue *Value) {
787 switch (Tokenizer->nextTokenKind()) {
788 case TokenInfo::TK_Literal:
789 *Value = Tokenizer->consumeNextToken().Value;
790 return true;
791
792 case TokenInfo::TK_Ident:
793 return parseIdentifierPrefixImpl(Value);
794
795 case TokenInfo::TK_CodeCompletion:
796 addExpressionCompletions();
797 return false;
798
799 case TokenInfo::TK_Eof:
800 Error->addError(Tokenizer->consumeNextToken().Range,
801 Error->ET_ParserNoCode);
802 return false;
803
804 case TokenInfo::TK_Error:
805 // This error was already reported by the tokenizer.
806 return false;
807 case TokenInfo::TK_NewLine:
808 case TokenInfo::TK_OpenParen:
809 case TokenInfo::TK_CloseParen:
810 case TokenInfo::TK_Comma:
811 case TokenInfo::TK_Period:
812 case TokenInfo::TK_InvalidChar:
813 const TokenInfo Token = Tokenizer->consumeNextToken();
814 Error->addError(Token.Range, Error->ET_ParserInvalidToken)
815 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
816 return false;
817 }
818
819 llvm_unreachable("Unknown token kind.");
820 }
821
822 static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
823
Parser(CodeTokenizer * Tokenizer,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)824 Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
825 const NamedValueMap *NamedValues, Diagnostics *Error)
826 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
827 NamedValues(NamedValues), Error(Error) {}
828
829 Parser::RegistrySema::~RegistrySema() = default;
830
831 llvm::Optional<MatcherCtor>
lookupMatcherCtor(StringRef MatcherName)832 Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
833 return Registry::lookupMatcherCtor(MatcherName);
834 }
835
actOnMatcherExpression(MatcherCtor Ctor,SourceRange NameRange,StringRef BindID,ArrayRef<ParserValue> Args,Diagnostics * Error)836 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
837 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
838 ArrayRef<ParserValue> Args, Diagnostics *Error) {
839 if (BindID.empty()) {
840 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
841 } else {
842 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
843 Error);
844 }
845 }
846
getAcceptedCompletionTypes(ArrayRef<std::pair<MatcherCtor,unsigned>> Context)847 std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
848 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
849 return Registry::getAcceptedCompletionTypes(Context);
850 }
851
getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes)852 std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
853 ArrayRef<ArgKind> AcceptedTypes) {
854 return Registry::getMatcherCompletions(AcceptedTypes);
855 }
856
isBuilderMatcher(MatcherCtor Ctor) const857 bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
858 return Registry::isBuilderMatcher(Ctor);
859 }
860
nodeMatcherType(MatcherCtor Ctor) const861 ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
862 return Registry::nodeMatcherType(Ctor);
863 }
864
865 internal::MatcherDescriptorPtr
buildMatcherCtor(MatcherCtor Ctor,SourceRange NameRange,ArrayRef<ParserValue> Args,Diagnostics * Error) const866 Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
867 ArrayRef<ParserValue> Args,
868 Diagnostics *Error) const {
869 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
870 }
871
parseExpression(StringRef & Code,Sema * S,const NamedValueMap * NamedValues,VariantValue * Value,Diagnostics * Error)872 bool Parser::parseExpression(StringRef &Code, Sema *S,
873 const NamedValueMap *NamedValues,
874 VariantValue *Value, Diagnostics *Error) {
875 CodeTokenizer Tokenizer(Code, Error);
876 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
877 return false;
878 auto NT = Tokenizer.peekNextToken();
879 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
880 Error->addError(Tokenizer.peekNextToken().Range,
881 Error->ET_ParserTrailingCode);
882 return false;
883 }
884 return true;
885 }
886
887 std::vector<MatcherCompletion>
completeExpression(StringRef & Code,unsigned CompletionOffset,Sema * S,const NamedValueMap * NamedValues)888 Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
889 const NamedValueMap *NamedValues) {
890 Diagnostics Error;
891 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
892 Parser P(&Tokenizer, S, NamedValues, &Error);
893 VariantValue Dummy;
894 P.parseExpressionImpl(&Dummy);
895
896 // Sort by specificity, then by name.
897 llvm::sort(P.Completions,
898 [](const MatcherCompletion &A, const MatcherCompletion &B) {
899 if (A.Specificity != B.Specificity)
900 return A.Specificity > B.Specificity;
901 return A.TypedText < B.TypedText;
902 });
903
904 return P.Completions;
905 }
906
907 llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef & Code,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)908 Parser::parseMatcherExpression(StringRef &Code, Sema *S,
909 const NamedValueMap *NamedValues,
910 Diagnostics *Error) {
911 VariantValue Value;
912 if (!parseExpression(Code, S, NamedValues, &Value, Error))
913 return llvm::Optional<DynTypedMatcher>();
914 if (!Value.isMatcher()) {
915 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
916 return llvm::Optional<DynTypedMatcher>();
917 }
918 llvm::Optional<DynTypedMatcher> Result =
919 Value.getMatcher().getSingleMatcher();
920 if (!Result.hasValue()) {
921 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
922 << Value.getTypeAsString();
923 }
924 return Result;
925 }
926
927 } // namespace dynamic
928 } // namespace ast_matchers
929 } // namespace clang
930