1# GENERATE RECURSIVE DESCENT PARSER OBJECTS FROM A GRAMMAR 2 3use 5.006; 4use strict; 5 6package Parse::RecDescent; 7 8use Text::Balanced qw ( extract_codeblock extract_bracketed extract_quotelike extract_delimited ); 9 10use vars qw ( $skip ); 11 12 *defskip = \ '\s*'; # DEFAULT SEPARATOR IS OPTIONAL WHITESPACE 13 $skip = '\s*'; # UNIVERSAL SEPARATOR IS OPTIONAL WHITESPACE 14my $MAXREP = 100_000_000; # REPETITIONS MATCH AT MOST 100,000,000 TIMES 15 16 17#ifndef RUNTIME 18sub import # IMPLEMENT PRECOMPILER BEHAVIOUR UNDER: 19 # perl -MParse::RecDescent - <grammarfile> <classname> [runtimeclassname] 20{ 21 local *_die = sub { print @_, "\n"; exit }; 22 23 my ($package, $file, $line) = caller; 24 25 if ($file eq '-' && $line == 0) 26 { 27 _die("Usage: perl -MLocalTest - <grammarfile> <classname>") 28 unless @ARGV >= 2 and $ARGV <= 3; 29 30 my ($sourcefile, $class, $runtime_class) = @ARGV; 31 32 local *IN; 33 open IN, $sourcefile 34 or _die(qq{Can't open grammar file "$sourcefile"}); 35 local $/; # 36 my $grammar = <IN>; 37 close IN; 38 39 Parse::RecDescent->Precompile({ -runtime_class => $runtime_class }, 40 $grammar, $class, $sourcefile); 41 exit; 42 } 43} 44 45sub Save 46{ 47 my $self = shift; 48 my %opt; 49 if ('HASH' eq ref $_[0]) { 50 %opt = (%opt, %{$_[0]}); 51 shift; 52 } 53 my ($class) = @_; 54 $self->{saving} = 1; 55 $self->Precompile(undef,$class); 56 $self->{saving} = 0; 57} 58 59sub PrecompiledRuntime 60{ 61 my ($self, $class) = @_; 62 my $opt = { 63 -standalone => 1, 64 -runtime_class => $class, 65 }; 66 $self->Precompile($opt, '', $class); 67} 68 69sub Precompile 70{ 71 my $self = shift; 72 my %opt = ( -standalone => 0, 73 ); 74 if ('HASH' eq ref $_[0]) { 75 %opt = (%opt, %{$_[0]}); 76 shift; 77 } 78 my ($grammar, $class, $sourcefile) = @_; 79 80 $class =~ /^(\w+::)*\w+$/ or croak("Bad class name: $class"); 81 82 my $modulefile = $class; 83 $modulefile =~ s/.*:://; 84 $modulefile .= ".pm"; 85 86 my $code = ''; 87 88 local *OUT; 89 open OUT, ">", $modulefile 90 or croak("Can't write to new module file '$modulefile'"); 91 92 print OUT "#\n", 93 "# This parser was generated with\n", 94 "# Parse::RecDescent version $Parse::RecDescent::VERSION\n", 95 "#\n\n"; 96 97 print STDERR "precompiling grammar from file '$sourcefile'\n", 98 "to class $class in module file '$modulefile'\n" 99 if $grammar && $sourcefile; 100 101 if ($grammar) { 102 $self = Parse::RecDescent->new($grammar, # $grammar 103 1, # $compiling 104 $class # $namespace 105 ) 106 || croak("Can't compile bad grammar") 107 if $grammar; 108 109 # Do not allow &DESTROY to remove the precompiled namespace 110 delete $self->{_not_precompiled}; 111 112 foreach ( keys %{$self->{rules}} ) { 113 $self->{rules}{$_}{changed} = 1; 114 } 115 116 $code = $self->_code(); 117 } 118 119 # If a name for the runtime package was not provided, 120 # generate one based on the module output name and the generated 121 # code 122 if (not defined($opt{-runtime_class})) { 123 if ($opt{-standalone}) { 124 my $basename = $class . '::_Runtime'; 125 126 my $name = $basename; 127 128 for (my $i = 0; $code =~ /$basename/; ++$i) { 129 $name = sprintf("%s%06d", $basename, $i); 130 } 131 132 $opt{-runtime_class} = $name; 133 } else { 134 my $package = ref $self; 135 local $::RD_HINT = defined $::RD_HINT ? $::RD_HINT : 1; 136 _hint(<<EOWARNING); 137The precompiled grammar did not specify the -runtime_class 138option. The resulting parser will "use $package". Future changes to 139$package may cause $class to stop working. 140 141Consider building a -standalone parser, or providing the 142-runtime_class option as described in Parse::RecDescent's POD. 143 144Use \$::RD_HINT = 0 to disable this message. 145EOWARNING 146 $opt{-runtime_class} = $package; 147 } 148 } 149 150 $code =~ s/Parse::RecDescent/$opt{-runtime_class}/gs; 151 152 # Make the resulting pre-compiled parser stand-alone by including 153 # the contents of Parse::RecDescent as -runtime_class in the 154 # resulting precompiled parser. 155 if ($opt{-standalone}) { 156 local *IN; 157 open IN, '<', $Parse::RecDescent::_FILENAME 158 or croak("Can't open $Parse::RecDescent::_FILENAME for standalone pre-compilation: $!\n"); 159 my $exclude = 0; 160 print OUT "{\n"; 161 while (<IN>) { 162 if ($_ =~ /^\s*#\s*ifndef\s+RUNTIME\s*$/) { 163 ++$exclude; 164 } 165 if ($exclude) { 166 if ($_ =~ /^\s*#\s*endif\s$/) { 167 --$exclude; 168 } 169 } else { 170 if ($_ =~ m/^__END__/) { 171 last; 172 } 173 174 # Standalone parsers shouldn't trigger the CPAN 175 # indexer to index the runtime, as it shouldn't be 176 # exposed as a user-consumable package. 177 # 178 # Trick the indexer by including a newline in the package declarations 179 s/^package /package # this should not be indexed by CPAN\n/gs; 180 s/Parse::RecDescent/$opt{-runtime_class}/gs; 181 print OUT $_; 182 } 183 } 184 close IN; 185 print OUT "}\n"; 186 } 187 188 if ($grammar) { 189 print OUT "package $class;\n"; 190 } 191 192 if (not $opt{-standalone}) { 193 print OUT "use $opt{-runtime_class};\n"; 194 } 195 196 if ($grammar) { 197 print OUT "{ my \$ERRORS;\n\n"; 198 199 print OUT $code; 200 201 print OUT "}\npackage $class; sub new { "; 202 print OUT "my "; 203 204 $code = $self->_dump([$self], [qw(self)]); 205 $code =~ s/Parse::RecDescent/$opt{-runtime_class}/gs; 206 207 print OUT $code; 208 209 print OUT "}"; 210 } 211 212 close OUT 213 or croak("Can't write to new module file '$modulefile'"); 214} 215#endif 216 217package Parse::RecDescent::LineCounter; 218 219 220sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) 221{ 222 bless { 223 text => $_[1], 224 parser => $_[2], 225 prev => $_[3]?1:0, 226 }, $_[0]; 227} 228 229sub FETCH 230{ 231 my $parser = $_[0]->{parser}; 232 my $cache = $parser->{linecounter_cache}; 233 my $from = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev} 234; 235 236 unless (exists $cache->{$from}) 237 { 238 $parser->{lastlinenum} = $parser->{offsetlinenum} 239 - Parse::RecDescent::_linecount(substr($parser->{fulltext},$from)) 240 + 1; 241 $cache->{$from} = $parser->{lastlinenum}; 242 } 243 return $cache->{$from}; 244} 245 246sub STORE 247{ 248 my $parser = $_[0]->{parser}; 249 $parser->{offsetlinenum} -= $parser->{lastlinenum} - $_[1]; 250 return undef; 251} 252 253sub resync # ($linecounter) 254{ 255 my $self = tied($_[0]); 256 die "Tried to alter something other than a LineCounter\n" 257 unless $self =~ /Parse::RecDescent::LineCounter/; 258 259 my $parser = $self->{parser}; 260 my $apparently = $parser->{offsetlinenum} 261 - Parse::RecDescent::_linecount(${$self->{text}}) 262 + 1; 263 264 $parser->{offsetlinenum} += $parser->{lastlinenum} - $apparently; 265 return 1; 266} 267 268package Parse::RecDescent::ColCounter; 269 270sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) 271{ 272 bless { 273 text => $_[1], 274 parser => $_[2], 275 prev => $_[3]?1:0, 276 }, $_[0]; 277} 278 279sub FETCH 280{ 281 my $parser = $_[0]->{parser}; 282 my $missing = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}+1; 283 substr($parser->{fulltext},0,$missing) =~ m/^(.*)\Z/m; 284 return length($1); 285} 286 287sub STORE 288{ 289 die "Can't set column number via \$thiscolumn\n"; 290} 291 292 293package Parse::RecDescent::OffsetCounter; 294 295sub TIESCALAR # ($classname, \$text, $thisparser, $prev) 296{ 297 bless { 298 text => $_[1], 299 parser => $_[2], 300 prev => $_[3]?-1:0, 301 }, $_[0]; 302} 303 304sub FETCH 305{ 306 my $parser = $_[0]->{parser}; 307 return $parser->{fulltextlen}-length(${$_[0]->{text}})+$_[0]->{prev}; 308} 309 310sub STORE 311{ 312 die "Can't set current offset via \$thisoffset or \$prevoffset\n"; 313} 314 315 316 317package Parse::RecDescent::Rule; 318 319sub new ($$$$$) 320{ 321 my $class = ref($_[0]) || $_[0]; 322 my $name = $_[1]; 323 my $owner = $_[2]; 324 my $line = $_[3]; 325 my $replace = $_[4]; 326 327 if (defined $owner->{"rules"}{$name}) 328 { 329 my $self = $owner->{"rules"}{$name}; 330 if ($replace && !$self->{"changed"}) 331 { 332 $self->reset; 333 } 334 return $self; 335 } 336 else 337 { 338 return $owner->{"rules"}{$name} = 339 bless 340 { 341 "name" => $name, 342 "prods" => [], 343 "calls" => [], 344 "changed" => 0, 345 "line" => $line, 346 "impcount" => 0, 347 "opcount" => 0, 348 "vars" => "", 349 }, $class; 350 } 351} 352 353sub reset($) 354{ 355 @{$_[0]->{"prods"}} = (); 356 @{$_[0]->{"calls"}} = (); 357 $_[0]->{"changed"} = 0; 358 $_[0]->{"impcount"} = 0; 359 $_[0]->{"opcount"} = 0; 360 $_[0]->{"vars"} = ""; 361} 362 363sub DESTROY {} 364 365sub hasleftmost($$) 366{ 367 my ($self, $ref) = @_; 368 369 my $prod; 370 foreach $prod ( @{$self->{"prods"}} ) 371 { 372 return 1 if $prod->hasleftmost($ref); 373 } 374 375 return 0; 376} 377 378sub leftmostsubrules($) 379{ 380 my $self = shift; 381 my @subrules = (); 382 383 my $prod; 384 foreach $prod ( @{$self->{"prods"}} ) 385 { 386 push @subrules, $prod->leftmostsubrule(); 387 } 388 389 return @subrules; 390} 391 392sub expected($) 393{ 394 my $self = shift; 395 my @expected = (); 396 397 my $prod; 398 foreach $prod ( @{$self->{"prods"}} ) 399 { 400 my $next = $prod->expected(); 401 unless (! $next or _contains($next,@expected) ) 402 { 403 push @expected, $next; 404 } 405 } 406 407 return join ', or ', @expected; 408} 409 410sub _contains($@) 411{ 412 my $target = shift; 413 my $item; 414 foreach $item ( @_ ) { return 1 if $target eq $item; } 415 return 0; 416} 417 418sub addcall($$) 419{ 420 my ( $self, $subrule ) = @_; 421 unless ( _contains($subrule, @{$self->{"calls"}}) ) 422 { 423 push @{$self->{"calls"}}, $subrule; 424 } 425} 426 427sub addprod($$) 428{ 429 my ( $self, $prod ) = @_; 430 push @{$self->{"prods"}}, $prod; 431 $self->{"changed"} = 1; 432 $self->{"impcount"} = 0; 433 $self->{"opcount"} = 0; 434 $prod->{"number"} = $#{$self->{"prods"}}; 435 return $prod; 436} 437 438sub addvar 439{ 440 my ( $self, $var, $parser ) = @_; 441 if ($var =~ /\A\s*local\s+([%@\$]\w+)/) 442 { 443 $parser->{localvars} .= " $1"; 444 $self->{"vars"} .= "$var;\n" } 445 else 446 { $self->{"vars"} .= "my $var;\n" } 447 $self->{"changed"} = 1; 448 return 1; 449} 450 451sub addautoscore 452{ 453 my ( $self, $code ) = @_; 454 $self->{"autoscore"} = $code; 455 $self->{"changed"} = 1; 456 return 1; 457} 458 459sub nextoperator($) 460{ 461 my $self = shift; 462 my $prodcount = scalar @{$self->{"prods"}}; 463 my $opcount = ++$self->{"opcount"}; 464 return "_operator_${opcount}_of_production_${prodcount}_of_rule_$self->{name}"; 465} 466 467sub nextimplicit($) 468{ 469 my $self = shift; 470 my $prodcount = scalar @{$self->{"prods"}}; 471 my $impcount = ++$self->{"impcount"}; 472 return "_alternation_${impcount}_of_production_${prodcount}_of_rule_$self->{name}"; 473} 474 475 476sub code 477{ 478 my ($self, $namespace, $parser, $check) = @_; 479 480eval 'undef &' . $namespace . '::' . $self->{"name"} unless $parser->{saving}; 481 482 my $code = 483' 484# ARGS ARE: ($parser, $text; $repeating, $_noactions, \@args, $_itempos) 485sub ' . $namespace . '::' . $self->{"name"} . ' 486{ 487 my $thisparser = $_[0]; 488 use vars q{$tracelevel}; 489 local $tracelevel = ($tracelevel||0)+1; 490 $ERRORS = 0; 491 my $thisrule = $thisparser->{"rules"}{"' . $self->{"name"} . '"}; 492 493 Parse::RecDescent::_trace(q{Trying rule: [' . $self->{"name"} . ']}, 494 Parse::RecDescent::_tracefirst($_[1]), 495 q{' . $self->{"name"} . '}, 496 $tracelevel) 497 if defined $::RD_TRACE; 498 499 ' . ($parser->{deferrable} 500 ? 'my $def_at = @{$thisparser->{deferred}};' 501 : '') . 502 ' 503 my $err_at = @{$thisparser->{errors}}; 504 505 my $score; 506 my $score_return; 507 my $_tok; 508 my $return = undef; 509 my $_matched=0; 510 my $commit=0; 511 my @item = (); 512 my %item = (); 513 my $repeating = $_[2]; 514 my $_noactions = $_[3]; 515 my @arg = defined $_[4] ? @{ &{$_[4]} } : (); 516 my $_itempos = $_[5]; 517 my %arg = ($#arg & 01) ? @arg : (@arg, undef); 518 my $text; 519 my $lastsep; 520 my $current_match; 521 my $expectation = new Parse::RecDescent::Expectation(q{' . $self->expected() . '}); 522 $expectation->at($_[1]); 523 '. ($parser->{_check}{thisoffset}?' 524 my $thisoffset; 525 tie $thisoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser; 526 ':'') . ($parser->{_check}{prevoffset}?' 527 my $prevoffset; 528 tie $prevoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser, 1; 529 ':'') . ($parser->{_check}{thiscolumn}?' 530 my $thiscolumn; 531 tie $thiscolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser; 532 ':'') . ($parser->{_check}{prevcolumn}?' 533 my $prevcolumn; 534 tie $prevcolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser, 1; 535 ':'') . ($parser->{_check}{prevline}?' 536 my $prevline; 537 tie $prevline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser, 1; 538 ':'') . ' 539 my $thisline; 540 tie $thisline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser; 541 542 '. $self->{vars} .' 543'; 544 545 my $prod; 546 foreach $prod ( @{$self->{"prods"}} ) 547 { 548 $prod->addscore($self->{autoscore},0,0) if $self->{autoscore}; 549 next unless $prod->checkleftmost(); 550 $code .= $prod->code($namespace,$self,$parser); 551 552 $code .= $parser->{deferrable} 553 ? ' splice 554 @{$thisparser->{deferred}}, $def_at unless $_matched; 555 ' 556 : ''; 557 } 558 559 $code .= 560' 561 unless ( $_matched || defined($score) ) 562 { 563 ' .($parser->{deferrable} 564 ? ' splice @{$thisparser->{deferred}}, $def_at; 565 ' 566 : '') . ' 567 568 $_[1] = $text; # NOT SURE THIS IS NEEDED 569 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' rule>>}, 570 Parse::RecDescent::_tracefirst($_[1]), 571 q{' . $self->{"name"} .'}, 572 $tracelevel) 573 if defined $::RD_TRACE; 574 return undef; 575 } 576 if (!defined($return) && defined($score)) 577 { 578 Parse::RecDescent::_trace(q{>>Accepted scored production<<}, "", 579 q{' . $self->{"name"} .'}, 580 $tracelevel) 581 if defined $::RD_TRACE; 582 $return = $score_return; 583 } 584 splice @{$thisparser->{errors}}, $err_at; 585 $return = $item[$#item] unless defined $return; 586 if (defined $::RD_TRACE) 587 { 588 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' rule<< (return value: [} . 589 $return . q{])}, "", 590 q{' . $self->{"name"} .'}, 591 $tracelevel); 592 Parse::RecDescent::_trace(q{(consumed: [} . 593 Parse::RecDescent::_tracemax(substr($_[1],0,-length($text))) . q{])}, 594 Parse::RecDescent::_tracefirst($text), 595 , q{' . $self->{"name"} .'}, 596 $tracelevel) 597 } 598 $_[1] = $text; 599 return $return; 600} 601'; 602 603 return $code; 604} 605 606my @left; 607sub isleftrec($$) 608{ 609 my ($self, $rules) = @_; 610 my $root = $self->{"name"}; 611 @left = $self->leftmostsubrules(); 612 my $next; 613 foreach $next ( @left ) 614 { 615 next unless defined $rules->{$next}; # SKIP NON-EXISTENT RULES 616 return 1 if $next eq $root; 617 my $child; 618 foreach $child ( $rules->{$next}->leftmostsubrules() ) 619 { 620 push(@left, $child) 621 if ! _contains($child, @left) ; 622 } 623 } 624 return 0; 625} 626 627package Parse::RecDescent::Production; 628 629sub describe ($;$) 630{ 631 return join ' ', map { $_->describe($_[1]) or () } @{$_[0]->{items}}; 632} 633 634sub new ($$;$$) 635{ 636 my ($self, $line, $uncommit, $error) = @_; 637 my $class = ref($self) || $self; 638 639 bless 640 { 641 "items" => [], 642 "uncommit" => $uncommit, 643 "error" => $error, 644 "line" => $line, 645 strcount => 0, 646 patcount => 0, 647 dircount => 0, 648 actcount => 0, 649 }, $class; 650} 651 652sub expected ($) 653{ 654 my $itemcount = scalar @{$_[0]->{"items"}}; 655 return ($itemcount) ? $_[0]->{"items"}[0]->describe(1) : ''; 656} 657 658sub hasleftmost ($$) 659{ 660 my ($self, $ref) = @_; 661 return ${$self->{"items"}}[0] eq $ref if scalar @{$self->{"items"}}; 662 return 0; 663} 664 665sub isempty($) 666{ 667 my $self = shift; 668 return 0 == @{$self->{"items"}}; 669} 670 671sub leftmostsubrule($) 672{ 673 my $self = shift; 674 675 if ( $#{$self->{"items"}} >= 0 ) 676 { 677 my $subrule = $self->{"items"}[0]->issubrule(); 678 return $subrule if defined $subrule; 679 } 680 681 return (); 682} 683 684sub checkleftmost($) 685{ 686 my @items = @{$_[0]->{"items"}}; 687 if (@items==1 && ref($items[0]) =~ /\AParse::RecDescent::Error/ 688 && $items[0]->{commitonly} ) 689 { 690 Parse::RecDescent::_warn(2,"Lone <error?> in production treated 691 as <error?> <reject>"); 692 Parse::RecDescent::_hint("A production consisting of a single 693 conditional <error?> directive would 694 normally succeed (with the value zero) if the 695 rule is not 'commited' when it is 696 tried. Since you almost certainly wanted 697 '<error?> <reject>' Parse::RecDescent 698 supplied it for you."); 699 push @{$_[0]->{items}}, 700 Parse::RecDescent::UncondReject->new(0,0,'<reject>'); 701 } 702 elsif (@items==1 && ($items[0]->describe||"") =~ /<rulevar|<autoscore/) 703 { 704 # Do nothing 705 } 706 elsif (@items && 707 ( ref($items[0]) =~ /\AParse::RecDescent::UncondReject/ 708 || ($items[0]->describe||"") =~ /<autoscore/ 709 )) 710 { 711 Parse::RecDescent::_warn(1,"Optimizing away production: [". $_[0]->describe ."]"); 712 my $what = $items[0]->describe =~ /<rulevar/ 713 ? "a <rulevar> (which acts like an unconditional <reject> during parsing)" 714 : $items[0]->describe =~ /<autoscore/ 715 ? "an <autoscore> (which acts like an unconditional <reject> during parsing)" 716 : "an unconditional <reject>"; 717 my $caveat = $items[0]->describe =~ /<rulevar/ 718 ? " after the specified variable was set up" 719 : ""; 720 my $advice = @items > 1 721 ? "However, there were also other (useless) items after the leading " 722 . $items[0]->describe 723 . ", so you may have been expecting some other behaviour." 724 : "You can safely ignore this message."; 725 Parse::RecDescent::_hint("The production starts with $what. That means that the 726 production can never successfully match, so it was 727 optimized out of the final parser$caveat. $advice"); 728 return 0; 729 } 730 return 1; 731} 732 733sub changesskip($) 734{ 735 my $item; 736 foreach $item (@{$_[0]->{"items"}}) 737 { 738 if (ref($item) =~ /Parse::RecDescent::(Action|Directive)/) 739 { 740 return 1 if $item->{code} =~ /\$skip\s*=/; 741 } 742 } 743 return 0; 744} 745 746sub adddirective 747{ 748 my ( $self, $whichop, $line, $name ) = @_; 749 push @{$self->{op}}, 750 { type=>$whichop, line=>$line, name=>$name, 751 offset=> scalar(@{$self->{items}}) }; 752} 753 754sub addscore 755{ 756 my ( $self, $code, $lookahead, $line ) = @_; 757 $self->additem(Parse::RecDescent::Directive->new( 758 "local \$^W; 759 my \$thisscore = do { $code } + 0; 760 if (!defined(\$score) || \$thisscore>\$score) 761 { \$score=\$thisscore; \$score_return=\$item[-1]; } 762 undef;", $lookahead, $line,"<score: $code>") ) 763 unless $self->{items}[-1]->describe =~ /<score/; 764 return 1; 765} 766 767sub check_pending 768{ 769 my ( $self, $line ) = @_; 770 if ($self->{op}) 771 { 772 while (my $next = pop @{$self->{op}}) 773 { 774 Parse::RecDescent::_error("Incomplete <$next->{type}op:...>.", $line); 775 Parse::RecDescent::_hint( 776 "The current production ended without completing the 777 <$next->{type}op:...> directive that started near line 778 $next->{line}. Did you forget the closing '>'?"); 779 } 780 } 781 return 1; 782} 783 784sub enddirective 785{ 786 my ( $self, $line, $minrep, $maxrep ) = @_; 787 unless ($self->{op}) 788 { 789 Parse::RecDescent::_error("Unmatched > found.", $line); 790 Parse::RecDescent::_hint( 791 "A '>' angle bracket was encountered, which typically 792 indicates the end of a directive. However no suitable 793 preceding directive was encountered. Typically this 794 indicates either a extra '>' in the grammar, or a 795 problem inside the previous directive."); 796 return; 797 } 798 my $op = pop @{$self->{op}}; 799 my $span = @{$self->{items}} - $op->{offset}; 800 if ($op->{type} =~ /left|right/) 801 { 802 if ($span != 3) 803 { 804 Parse::RecDescent::_error( 805 "Incorrect <$op->{type}op:...> specification: 806 expected 3 args, but found $span instead", $line); 807 Parse::RecDescent::_hint( 808 "The <$op->{type}op:...> directive requires a 809 sequence of exactly three elements. For example: 810 <$op->{type}op:leftarg /op/ rightarg>"); 811 } 812 else 813 { 814 push @{$self->{items}}, 815 Parse::RecDescent::Operator->new( 816 $op->{type}, $minrep, $maxrep, splice(@{$self->{"items"}}, -3)); 817 $self->{items}[-1]->sethashname($self); 818 $self->{items}[-1]{name} = $op->{name}; 819 } 820 } 821} 822 823sub prevwasreturn 824{ 825 my ( $self, $line ) = @_; 826 unless (@{$self->{items}}) 827 { 828 Parse::RecDescent::_error( 829 "Incorrect <return:...> specification: 830 expected item missing", $line); 831 Parse::RecDescent::_hint( 832 "The <return:...> directive requires a 833 sequence of at least one item. For example: 834 <return: list>"); 835 return; 836 } 837 push @{$self->{items}}, 838 Parse::RecDescent::Result->new(); 839} 840 841sub additem 842{ 843 my ( $self, $item ) = @_; 844 $item->sethashname($self); 845 push @{$self->{"items"}}, $item; 846 return $item; 847} 848 849sub _duplicate_itempos 850{ 851 my ($src) = @_; 852 my $dst = {}; 853 854 foreach (keys %$src) 855 { 856 %{$dst->{$_}} = %{$src->{$_}}; 857 } 858 $dst; 859} 860 861sub _update_itempos 862{ 863 my ($dst, $src, $typekeys, $poskeys) = @_; 864 865 my @typekeys = 'ARRAY' eq ref $typekeys ? 866 @$typekeys : 867 keys %$src; 868 869 foreach my $k (keys %$src) 870 { 871 if ('ARRAY' eq ref $poskeys) 872 { 873 @{$dst->{$k}}{@$poskeys} = @{$src->{$k}}{@$poskeys}; 874 } 875 else 876 { 877 %{$dst->{$k}} = %{$src->{$k}}; 878 } 879 } 880} 881 882sub preitempos 883{ 884 return q 885 { 886 push @itempos, {'offset' => {'from'=>$thisoffset, 'to'=>undef}, 887 'line' => {'from'=>$thisline, 'to'=>undef}, 888 'column' => {'from'=>$thiscolumn, 'to'=>undef} }; 889 } 890} 891 892sub incitempos 893{ 894 return q 895 { 896 $itempos[$#itempos]{'offset'}{'from'} += length($lastsep); 897 $itempos[$#itempos]{'line'}{'from'} = $thisline; 898 $itempos[$#itempos]{'column'}{'from'} = $thiscolumn; 899 } 900} 901 902sub unincitempos 903{ 904 # the next incitempos will properly set these two fields, but 905 # {'offset'}{'from'} needs to be decreased by length($lastsep) 906 # $itempos[$#itempos]{'line'}{'from'} 907 # $itempos[$#itempos]{'column'}{'from'} 908 return q 909 { 910 $itempos[$#itempos]{'offset'}{'from'} -= length($lastsep) if defined $lastsep; 911 } 912} 913 914sub postitempos 915{ 916 return q 917 { 918 $itempos[$#itempos]{'offset'}{'to'} = $prevoffset; 919 $itempos[$#itempos]{'line'}{'to'} = $prevline; 920 $itempos[$#itempos]{'column'}{'to'} = $prevcolumn; 921 } 922} 923 924sub code($$$$) 925{ 926 my ($self,$namespace,$rule,$parser) = @_; 927 my $code = 928' 929 while (!$_matched' 930 . (defined $self->{"uncommit"} ? '' : ' && !$commit') 931 . ') 932 { 933 ' . 934 ($self->changesskip() 935 ? 'local $skip = defined($skip) ? $skip : $Parse::RecDescent::skip;' 936 : '') .' 937 Parse::RecDescent::_trace(q{Trying production: [' 938 . $self->describe . ']}, 939 Parse::RecDescent::_tracefirst($_[1]), 940 q{' . $rule ->{name}. '}, 941 $tracelevel) 942 if defined $::RD_TRACE; 943 my $thisprod = $thisrule->{"prods"}[' . $self->{"number"} . ']; 944 ' . (defined $self->{"error"} ? '' : '$text = $_[1];' ) . ' 945 my $_savetext; 946 @item = (q{' . $rule->{"name"} . '}); 947 %item = (__RULE__ => q{' . $rule->{"name"} . '}); 948 my $repcount = 0; 949 950'; 951 $code .= 952' my @itempos = ({}); 953' if $parser->{_check}{itempos}; 954 955 my $item; 956 my $i; 957 958 for ($i = 0; $i < @{$self->{"items"}}; $i++) 959 { 960 $item = ${$self->{items}}[$i]; 961 962 $code .= preitempos() if $parser->{_check}{itempos}; 963 964 $code .= $item->code($namespace,$rule,$parser->{_check}); 965 966 $code .= postitempos() if $parser->{_check}{itempos}; 967 968 } 969 970 if ($parser->{_AUTOACTION} && defined($item) && !$item->isa("Parse::RecDescent::Action")) 971 { 972 $code .= $parser->{_AUTOACTION}->code($namespace,$rule); 973 Parse::RecDescent::_warn(1,"Autogenerating action in rule 974 \"$rule->{name}\": 975 $parser->{_AUTOACTION}{code}") 976 and 977 Parse::RecDescent::_hint("The \$::RD_AUTOACTION was defined, 978 so any production not ending in an 979 explicit action has the specified 980 \"auto-action\" automatically 981 appended."); 982 } 983 elsif ($parser->{_AUTOTREE} && defined($item) && !$item->isa("Parse::RecDescent::Action")) 984 { 985 if ($i==1 && $item->isterminal) 986 { 987 $code .= $parser->{_AUTOTREE}{TERMINAL}->code($namespace,$rule); 988 } 989 else 990 { 991 $code .= $parser->{_AUTOTREE}{NODE}->code($namespace,$rule); 992 } 993 Parse::RecDescent::_warn(1,"Autogenerating tree-building action in rule 994 \"$rule->{name}\"") 995 and 996 Parse::RecDescent::_hint("The directive <autotree> was specified, 997 so any production not ending 998 in an explicit action has 999 some parse-tree building code 1000 automatically appended."); 1001 } 1002 1003 $code .= 1004' 1005 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' production: [' 1006 . $self->describe . ']<<}, 1007 Parse::RecDescent::_tracefirst($text), 1008 q{' . $rule->{name} . '}, 1009 $tracelevel) 1010 if defined $::RD_TRACE; 1011 1012' . ( $parser->{_check}{itempos} ? ' 1013 if ( defined($_itempos) ) 1014 { 1015 Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[ 1], undef, [qw(from)]); 1016 Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[-1], undef, [qw(to)]); 1017 } 1018' : '' ) . ' 1019 1020 $_matched = 1; 1021 last; 1022 } 1023 1024'; 1025 return $code; 1026} 1027 10281; 1029 1030package Parse::RecDescent::Action; 1031 1032sub describe { undef } 1033 1034sub sethashname { $_[0]->{hashname} = '__ACTION' . ++$_[1]->{actcount} .'__'; } 1035 1036sub new 1037{ 1038 my $class = ref($_[0]) || $_[0]; 1039 bless 1040 { 1041 "code" => $_[1], 1042 "lookahead" => $_[2], 1043 "line" => $_[3], 1044 }, $class; 1045} 1046 1047sub issubrule { undef } 1048sub isterminal { 0 } 1049 1050sub code($$$$) 1051{ 1052 my ($self, $namespace, $rule) = @_; 1053 1054' 1055 Parse::RecDescent::_trace(q{Trying action}, 1056 Parse::RecDescent::_tracefirst($text), 1057 q{' . $rule->{name} . '}, 1058 $tracelevel) 1059 if defined $::RD_TRACE; 1060 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' 1061 1062 $_tok = ($_noactions) ? 0 : do ' . $self->{"code"} . '; 1063 ' . ($self->{"lookahead"}<0?'if':'unless') . ' (defined $_tok) 1064 { 1065 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' action>> (return value: [undef])}) 1066 if defined $::RD_TRACE; 1067 last; 1068 } 1069 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' action<< (return value: [} 1070 . $_tok . q{])}, 1071 Parse::RecDescent::_tracefirst($text)) 1072 if defined $::RD_TRACE; 1073 push @item, $_tok; 1074 ' . ($self->{line}>=0 ? '$item{'. $self->{hashname} .'}=$_tok;' : '' ) .' 1075 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1076' 1077} 1078 1079 10801; 1081 1082package Parse::RecDescent::Directive; 1083 1084sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } 1085 1086sub issubrule { undef } 1087sub isterminal { 0 } 1088sub describe { $_[1] ? '' : $_[0]->{name} } 1089 1090sub new ($$$$$) 1091{ 1092 my $class = ref($_[0]) || $_[0]; 1093 bless 1094 { 1095 "code" => $_[1], 1096 "lookahead" => $_[2], 1097 "line" => $_[3], 1098 "name" => $_[4], 1099 }, $class; 1100} 1101 1102sub code($$$$) 1103{ 1104 my ($self, $namespace, $rule) = @_; 1105 1106' 1107 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' 1108 1109 Parse::RecDescent::_trace(q{Trying directive: [' 1110 . $self->describe . ']}, 1111 Parse::RecDescent::_tracefirst($text), 1112 q{' . $rule->{name} . '}, 1113 $tracelevel) 1114 if defined $::RD_TRACE; ' .' 1115 $_tok = do { ' . $self->{"code"} . ' }; 1116 if (defined($_tok)) 1117 { 1118 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' directive<< (return value: [} 1119 . $_tok . q{])}, 1120 Parse::RecDescent::_tracefirst($text)) 1121 if defined $::RD_TRACE; 1122 } 1123 else 1124 { 1125 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' directive>>}, 1126 Parse::RecDescent::_tracefirst($text)) 1127 if defined $::RD_TRACE; 1128 } 1129 ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' 1130 last ' 1131 . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; 1132 push @item, $item{'.$self->{hashname}.'}=$_tok; 1133 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1134' 1135} 1136 11371; 1138 1139package Parse::RecDescent::UncondReject; 1140 1141sub issubrule { undef } 1142sub isterminal { 0 } 1143sub describe { $_[1] ? '' : $_[0]->{name} } 1144sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } 1145 1146sub new ($$$;$) 1147{ 1148 my $class = ref($_[0]) || $_[0]; 1149 bless 1150 { 1151 "lookahead" => $_[1], 1152 "line" => $_[2], 1153 "name" => $_[3], 1154 }, $class; 1155} 1156 1157# MARK, YOU MAY WANT TO OPTIMIZE THIS. 1158 1159 1160sub code($$$$) 1161{ 1162 my ($self, $namespace, $rule) = @_; 1163 1164' 1165 Parse::RecDescent::_trace(q{>>Rejecting production<< (found ' 1166 . $self->describe . ')}, 1167 Parse::RecDescent::_tracefirst($text), 1168 q{' . $rule->{name} . '}, 1169 $tracelevel) 1170 if defined $::RD_TRACE; 1171 undef $return; 1172 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' 1173 1174 $_tok = undef; 1175 ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' 1176 last ' 1177 . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; 1178' 1179} 1180 11811; 1182 1183package Parse::RecDescent::Error; 1184 1185sub issubrule { undef } 1186sub isterminal { 0 } 1187sub describe { $_[1] ? '' : $_[0]->{commitonly} ? '<error?:...>' : '<error...>' } 1188sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } 1189 1190sub new ($$$$$) 1191{ 1192 my $class = ref($_[0]) || $_[0]; 1193 bless 1194 { 1195 "msg" => $_[1], 1196 "lookahead" => $_[2], 1197 "commitonly" => $_[3], 1198 "line" => $_[4], 1199 }, $class; 1200} 1201 1202sub code($$$$) 1203{ 1204 my ($self, $namespace, $rule) = @_; 1205 1206 my $action = ''; 1207 1208 if ($self->{"msg"}) # ERROR MESSAGE SUPPLIED 1209 { 1210 #WAS: $action .= "Parse::RecDescent::_error(qq{$self->{msg}}" . ',$thisline);'; 1211 $action .= 'push @{$thisparser->{errors}}, [qq{'.$self->{msg}.'},$thisline];'; 1212 1213 } 1214 else # GENERATE ERROR MESSAGE DURING PARSE 1215 { 1216 $action .= ' 1217 my $rule = $item[0]; 1218 $rule =~ s/_/ /g; 1219 #WAS: Parse::RecDescent::_error("Invalid $rule: " . $expectation->message() ,$thisline); 1220 push @{$thisparser->{errors}}, ["Invalid $rule: " . $expectation->message() ,$thisline]; 1221 '; 1222 } 1223 1224 my $dir = 1225 new Parse::RecDescent::Directive('if (' . 1226 ($self->{"commitonly"} ? '$commit' : '1') . 1227 ") { do {$action} unless ".' $_noactions; undef } else {0}', 1228 $self->{"lookahead"},0,$self->describe); 1229 $dir->{hashname} = $self->{hashname}; 1230 return $dir->code($namespace, $rule, 0); 1231} 1232 12331; 1234 1235package Parse::RecDescent::Token; 1236 1237sub sethashname { $_[0]->{hashname} = '__PATTERN' . ++$_[1]->{patcount} . '__'; } 1238 1239sub issubrule { undef } 1240sub isterminal { 1 } 1241sub describe ($) { shift->{'description'}} 1242 1243 1244# ARGS ARE: $self, $pattern, $left_delim, $modifiers, $lookahead, $linenum 1245sub new ($$$$$$) 1246{ 1247 my $class = ref($_[0]) || $_[0]; 1248 my $pattern = $_[1]; 1249 my $pat = $_[1]; 1250 my $ldel = $_[2]; 1251 my $rdel = $ldel; 1252 $rdel =~ tr/{[(</}])>/; 1253 1254 my $mod = $_[3]; 1255 1256 my $desc; 1257 1258 if ($ldel eq '/') { $desc = "$ldel$pattern$rdel$mod" } 1259 else { $desc = "m$ldel$pattern$rdel$mod" } 1260 $desc =~ s/\\/\\\\/g; 1261 $desc =~ s/\$$/\\\$/g; 1262 $desc =~ s/}/\\}/g; 1263 $desc =~ s/{/\\{/g; 1264 1265 if (!eval "no strict; 1266 local \$SIG{__WARN__} = sub {0}; 1267 '' =~ m$ldel$pattern$rdel$mod" and $@) 1268 { 1269 Parse::RecDescent::_warn(3, "Token pattern \"m$ldel$pattern$rdel$mod\" 1270 may not be a valid regular expression", 1271 $_[5]); 1272 $@ =~ s/ at \(eval.*/./; 1273 Parse::RecDescent::_hint($@); 1274 } 1275 1276 # QUIETLY PREVENT (WELL-INTENTIONED) CALAMITY 1277 $mod =~ s/[gc]//g; 1278 $pattern =~ s/(\A|[^\\])\\G/$1/g; 1279 1280 bless 1281 { 1282 "pattern" => $pattern, 1283 "ldelim" => $ldel, 1284 "rdelim" => $rdel, 1285 "mod" => $mod, 1286 "lookahead" => $_[4], 1287 "line" => $_[5], 1288 "description" => $desc, 1289 }, $class; 1290} 1291 1292 1293sub code($$$$$) 1294{ 1295 my ($self, $namespace, $rule, $check) = @_; 1296 my $ldel = $self->{"ldelim"}; 1297 my $rdel = $self->{"rdelim"}; 1298 my $sdel = $ldel; 1299 my $mod = $self->{"mod"}; 1300 1301 $sdel =~ s/[[{(<]/{}/; 1302 1303my $code = ' 1304 Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe 1305 . ']}, Parse::RecDescent::_tracefirst($text), 1306 q{' . $rule->{name} . '}, 1307 $tracelevel) 1308 if defined $::RD_TRACE; 1309 undef $lastsep; 1310 $expectation->is(q{' . ($rule->hasleftmost($self) ? '' 1311 : $self->describe ) . '})->at($text); 1312 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' 1313 1314 ' . ($self->{"lookahead"}<0?'if':'unless') 1315 . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' 1316 . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') 1317 . ' $text =~ m' . $ldel . '\A(?:' . $self->{"pattern"} . ')' . $rdel . $mod . ') 1318 { 1319 '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;') . 1320 ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' 1321 $expectation->failed(); 1322 Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, 1323 Parse::RecDescent::_tracefirst($text)) 1324 if defined $::RD_TRACE; 1325 1326 last; 1327 } 1328 $current_match = substr($text, $-[0], $+[0] - $-[0]); 1329 substr($text,0,length($current_match),q{}); 1330 Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} 1331 . $current_match . q{])}, 1332 Parse::RecDescent::_tracefirst($text)) 1333 if defined $::RD_TRACE; 1334 push @item, $item{'.$self->{hashname}.'}=$current_match; 1335 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1336'; 1337 1338 return $code; 1339} 1340 13411; 1342 1343package Parse::RecDescent::Literal; 1344 1345sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } 1346 1347sub issubrule { undef } 1348sub isterminal { 1 } 1349sub describe ($) { shift->{'description'} } 1350 1351sub new ($$$$) 1352{ 1353 my $class = ref($_[0]) || $_[0]; 1354 1355 my $pattern = $_[1]; 1356 1357 my $desc = $pattern; 1358 $desc=~s/\\/\\\\/g; 1359 $desc=~s/}/\\}/g; 1360 $desc=~s/{/\\{/g; 1361 1362 bless 1363 { 1364 "pattern" => $pattern, 1365 "lookahead" => $_[2], 1366 "line" => $_[3], 1367 "description" => "'$desc'", 1368 }, $class; 1369} 1370 1371 1372sub code($$$$) 1373{ 1374 my ($self, $namespace, $rule, $check) = @_; 1375 1376my $code = ' 1377 Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe 1378 . ']}, 1379 Parse::RecDescent::_tracefirst($text), 1380 q{' . $rule->{name} . '}, 1381 $tracelevel) 1382 if defined $::RD_TRACE; 1383 undef $lastsep; 1384 $expectation->is(q{' . ($rule->hasleftmost($self) ? '' 1385 : $self->describe ) . '})->at($text); 1386 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' 1387 1388 ' . ($self->{"lookahead"}<0?'if':'unless') 1389 . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' 1390 . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') 1391 . ' $text =~ m/\A' . quotemeta($self->{"pattern"}) . '/) 1392 { 1393 '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').' 1394 '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' 1395 $expectation->failed(); 1396 Parse::RecDescent::_trace(qq{<<Didn\'t match terminal>>}, 1397 Parse::RecDescent::_tracefirst($text)) 1398 if defined $::RD_TRACE; 1399 last; 1400 } 1401 $current_match = substr($text, $-[0], $+[0] - $-[0]); 1402 substr($text,0,length($current_match),q{}); 1403 Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} 1404 . $current_match . q{])}, 1405 Parse::RecDescent::_tracefirst($text)) 1406 if defined $::RD_TRACE; 1407 push @item, $item{'.$self->{hashname}.'}=$current_match; 1408 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1409'; 1410 1411 return $code; 1412} 1413 14141; 1415 1416package Parse::RecDescent::InterpLit; 1417 1418sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } 1419 1420sub issubrule { undef } 1421sub isterminal { 1 } 1422sub describe ($) { shift->{'description'} } 1423 1424sub new ($$$$) 1425{ 1426 my $class = ref($_[0]) || $_[0]; 1427 1428 my $pattern = $_[1]; 1429 $pattern =~ s#/#\\/#g; 1430 1431 my $desc = $pattern; 1432 $desc=~s/\\/\\\\/g; 1433 $desc=~s/}/\\}/g; 1434 $desc=~s/{/\\{/g; 1435 1436 bless 1437 { 1438 "pattern" => $pattern, 1439 "lookahead" => $_[2], 1440 "line" => $_[3], 1441 "description" => "'$desc'", 1442 }, $class; 1443} 1444 1445sub code($$$$) 1446{ 1447 my ($self, $namespace, $rule, $check) = @_; 1448 1449my $code = ' 1450 Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe 1451 . ']}, 1452 Parse::RecDescent::_tracefirst($text), 1453 q{' . $rule->{name} . '}, 1454 $tracelevel) 1455 if defined $::RD_TRACE; 1456 undef $lastsep; 1457 $expectation->is(q{' . ($rule->hasleftmost($self) ? '' 1458 : $self->describe ) . '})->at($text); 1459 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' 1460 1461 ' . ($self->{"lookahead"}<0?'if':'unless') 1462 . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' 1463 . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') 1464 . ' do { $_tok = "' . $self->{"pattern"} . '"; 1 } and 1465 substr($text,0,length($_tok)) eq $_tok and 1466 do { substr($text,0,length($_tok)) = ""; 1; } 1467 ) 1468 { 1469 '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').' 1470 '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' 1471 $expectation->failed(); 1472 Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, 1473 Parse::RecDescent::_tracefirst($text)) 1474 if defined $::RD_TRACE; 1475 last; 1476 } 1477 Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} 1478 . $_tok . q{])}, 1479 Parse::RecDescent::_tracefirst($text)) 1480 if defined $::RD_TRACE; 1481 push @item, $item{'.$self->{hashname}.'}=$_tok; 1482 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1483'; 1484 1485 return $code; 1486} 1487 14881; 1489 1490package Parse::RecDescent::Subrule; 1491 1492sub issubrule ($) { return $_[0]->{"subrule"} } 1493sub isterminal { 0 } 1494sub sethashname {} 1495 1496sub describe ($) 1497{ 1498 my $desc = $_[0]->{"implicit"} || $_[0]->{"subrule"}; 1499 $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; 1500 return $desc; 1501} 1502 1503sub callsyntax($$) 1504{ 1505 if ($_[0]->{"matchrule"}) 1506 { 1507 return "&{'$_[1]'.qq{$_[0]->{subrule}}}"; 1508 } 1509 else 1510 { 1511 return $_[1].$_[0]->{"subrule"}; 1512 } 1513} 1514 1515sub new ($$$$;$$$) 1516{ 1517 my $class = ref($_[0]) || $_[0]; 1518 bless 1519 { 1520 "subrule" => $_[1], 1521 "lookahead" => $_[2], 1522 "line" => $_[3], 1523 "implicit" => $_[4] || undef, 1524 "matchrule" => $_[5], 1525 "argcode" => $_[6] || undef, 1526 }, $class; 1527} 1528 1529 1530sub code($$$$) 1531{ 1532 my ($self, $namespace, $rule, $check) = @_; 1533 1534' 1535 Parse::RecDescent::_trace(q{Trying subrule: [' . $self->{"subrule"} . ']}, 1536 Parse::RecDescent::_tracefirst($text), 1537 q{' . $rule->{"name"} . '}, 1538 $tracelevel) 1539 if defined $::RD_TRACE; 1540 if (1) { no strict qw{refs}; 1541 $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' 1542 # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); 1543 : 'q{'.$self->describe.'}' ) . ')->at($text); 1544 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) 1545 . ($self->{"lookahead"}<0?'if':'unless') 1546 . ' (defined ($_tok = ' 1547 . $self->callsyntax($namespace.'::') 1548 . '($thisparser,$text,$repeating,' 1549 . ($self->{"lookahead"}?'1':'$_noactions') 1550 . ($self->{argcode} ? ",sub { return $self->{argcode} }" 1551 : ',sub { \\@arg }') 1552 . ($check->{"itempos"}?',$itempos[$#itempos]':',undef') 1553 . '))) 1554 { 1555 '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' 1556 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' subrule: [' 1557 . $self->{subrule} . ']>>}, 1558 Parse::RecDescent::_tracefirst($text), 1559 q{' . $rule->{"name"} .'}, 1560 $tracelevel) 1561 if defined $::RD_TRACE; 1562 $expectation->failed(); 1563 last; 1564 } 1565 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' subrule: [' 1566 . $self->{subrule} . ']<< (return value: [} 1567 . $_tok . q{]}, 1568 1569 Parse::RecDescent::_tracefirst($text), 1570 q{' . $rule->{"name"} .'}, 1571 $tracelevel) 1572 if defined $::RD_TRACE; 1573 $item{q{' . $self->{subrule} . '}} = $_tok; 1574 push @item, $_tok; 1575 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1576 } 1577' 1578} 1579 1580package Parse::RecDescent::Repetition; 1581 1582sub issubrule ($) { return $_[0]->{"subrule"} } 1583sub isterminal { 0 } 1584sub sethashname { } 1585 1586sub describe ($) 1587{ 1588 my $desc = $_[0]->{"expected"} || $_[0]->{"subrule"}; 1589 $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; 1590 return $desc; 1591} 1592 1593sub callsyntax($$) 1594{ 1595 if ($_[0]->{matchrule}) 1596 { return "sub { goto &{''.qq{$_[1]$_[0]->{subrule}}} }"; } 1597 else 1598 { return "\\&$_[1]$_[0]->{subrule}"; } 1599} 1600 1601sub new ($$$$$$$$$$) 1602{ 1603 my ($self, $subrule, $repspec, $min, $max, $lookahead, $line, $parser, $matchrule, $argcode) = @_; 1604 my $class = ref($self) || $self; 1605 ($max, $min) = ( $min, $max) if ($max<$min); 1606 1607 my $desc; 1608 if ($subrule=~/\A_alternation_\d+_of_production_\d+_of_rule/) 1609 { $desc = $parser->{"rules"}{$subrule}->expected } 1610 1611 if ($lookahead) 1612 { 1613 if ($min>0) 1614 { 1615 return new Parse::RecDescent::Subrule($subrule,$lookahead,$line,$desc,$matchrule,$argcode); 1616 } 1617 else 1618 { 1619 Parse::RecDescent::_error("Not symbol (\"!\") before 1620 \"$subrule\" doesn't make 1621 sense.",$line); 1622 Parse::RecDescent::_hint("Lookahead for negated optional 1623 repetitions (such as 1624 \"!$subrule($repspec)\" can never 1625 succeed, since optional items always 1626 match (zero times at worst). 1627 Did you mean a single \"!$subrule\", 1628 instead?"); 1629 } 1630 } 1631 bless 1632 { 1633 "subrule" => $subrule, 1634 "repspec" => $repspec, 1635 "min" => $min, 1636 "max" => $max, 1637 "lookahead" => $lookahead, 1638 "line" => $line, 1639 "expected" => $desc, 1640 "argcode" => $argcode || undef, 1641 "matchrule" => $matchrule, 1642 }, $class; 1643} 1644 1645sub code($$$$) 1646{ 1647 my ($self, $namespace, $rule, $check) = @_; 1648 1649 my ($subrule, $repspec, $min, $max, $lookahead) = 1650 @{$self}{ qw{subrule repspec min max lookahead} }; 1651 1652' 1653 Parse::RecDescent::_trace(q{Trying repeated subrule: [' . $self->describe . ']}, 1654 Parse::RecDescent::_tracefirst($text), 1655 q{' . $rule->{"name"} . '}, 1656 $tracelevel) 1657 if defined $::RD_TRACE; 1658 $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' 1659 # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); 1660 : 'q{'.$self->describe.'}' ) . ')->at($text); 1661 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' 1662 unless (defined ($_tok = $thisparser->_parserepeat($text, ' 1663 . $self->callsyntax($namespace.'::') 1664 . ', ' . $min . ', ' . $max . ', ' 1665 . ($self->{"lookahead"}?'1':'$_noactions') 1666 . ',$expectation,' 1667 . ($self->{argcode} ? "sub { return $self->{argcode} }" 1668 : 'sub { \\@arg }') 1669 . ($check->{"itempos"}?',$itempos[$#itempos]':',undef') 1670 . '))) 1671 { 1672 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' repeated subrule: [' 1673 . $self->describe . ']>>}, 1674 Parse::RecDescent::_tracefirst($text), 1675 q{' . $rule->{"name"} .'}, 1676 $tracelevel) 1677 if defined $::RD_TRACE; 1678 last; 1679 } 1680 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' repeated subrule: [' 1681 . $self->{subrule} . ']<< (} 1682 . @$_tok . q{ times)}, 1683 1684 Parse::RecDescent::_tracefirst($text), 1685 q{' . $rule->{"name"} .'}, 1686 $tracelevel) 1687 if defined $::RD_TRACE; 1688 $item{q{' . "$self->{subrule}($self->{repspec})" . '}} = $_tok; 1689 push @item, $_tok; 1690 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' 1691 1692' 1693} 1694 1695package Parse::RecDescent::Result; 1696 1697sub issubrule { 0 } 1698sub isterminal { 0 } 1699sub describe { '' } 1700 1701sub new 1702{ 1703 my ($class, $pos) = @_; 1704 1705 bless {}, $class; 1706} 1707 1708sub code($$$$) 1709{ 1710 my ($self, $namespace, $rule) = @_; 1711 1712 ' 1713 $return = $item[-1]; 1714 '; 1715} 1716 1717package Parse::RecDescent::Operator; 1718 1719my @opertype = ( " non-optional", "n optional" ); 1720 1721sub issubrule { 0 } 1722sub isterminal { 0 } 1723 1724sub describe { $_[0]->{"expected"} } 1725sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } 1726 1727 1728sub new 1729{ 1730 my ($class, $type, $minrep, $maxrep, $leftarg, $op, $rightarg) = @_; 1731 1732 bless 1733 { 1734 "type" => "${type}op", 1735 "leftarg" => $leftarg, 1736 "op" => $op, 1737 "min" => $minrep, 1738 "max" => $maxrep, 1739 "rightarg" => $rightarg, 1740 "expected" => "<${type}op: ".$leftarg->describe." ".$op->describe." ".$rightarg->describe.">", 1741 }, $class; 1742} 1743 1744sub code($$$$) 1745{ 1746 my ($self, $namespace, $rule, $check) = @_; 1747 1748 my @codeargs = @_[1..$#_]; 1749 1750 my ($leftarg, $op, $rightarg) = 1751 @{$self}{ qw{leftarg op rightarg} }; 1752 1753 my $code = ' 1754 Parse::RecDescent::_trace(q{Trying operator: [' . $self->describe . ']}, 1755 Parse::RecDescent::_tracefirst($text), 1756 q{' . $rule->{"name"} . '}, 1757 $tracelevel) 1758 if defined $::RD_TRACE; 1759 $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' 1760 # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); 1761 : 'q{'.$self->describe.'}' ) . ')->at($text); 1762 1763 $_tok = undef; 1764 OPLOOP: while (1) 1765 { 1766 $repcount = 0; 1767 my @item; 1768 my %item; 1769'; 1770 1771 $code .= ' 1772 my $_itempos = $itempos[-1]; 1773 my $itemposfirst; 1774' if $check->{itempos}; 1775 1776 if ($self->{type} eq "leftop" ) 1777 { 1778 $code .= ' 1779 # MATCH LEFTARG 1780 ' . $leftarg->code(@codeargs) . ' 1781 1782'; 1783 1784 $code .= ' 1785 if (defined($_itempos) and !defined($itemposfirst)) 1786 { 1787 $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); 1788 } 1789' if $check->{itempos}; 1790 1791 $code .= ' 1792 $repcount++; 1793 1794 my $savetext = $text; 1795 my $backtrack; 1796 1797 # MATCH (OP RIGHTARG)(s) 1798 while ($repcount < ' . $self->{max} . ') 1799 { 1800 $backtrack = 0; 1801 ' . $op->code(@codeargs) . ' 1802 ' . ($op->isterminal() ? 'pop @item;' : '$backtrack=1;' ) . ' 1803 ' . (ref($op) eq 'Parse::RecDescent::Token' 1804 ? 'if (defined $1) {push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; $backtrack=1;}' 1805 : "" ) . ' 1806 ' . $rightarg->code(@codeargs) . ' 1807 $savetext = $text; 1808 $repcount++; 1809 } 1810 $text = $savetext; 1811 pop @item if $backtrack; 1812 1813 '; 1814 } 1815 else 1816 { 1817 $code .= ' 1818 my $savetext = $text; 1819 my $backtrack; 1820 # MATCH (LEFTARG OP)(s) 1821 while ($repcount < ' . $self->{max} . ') 1822 { 1823 $backtrack = 0; 1824 ' . $leftarg->code(@codeargs) . ' 1825'; 1826 $code .= ' 1827 if (defined($_itempos) and !defined($itemposfirst)) 1828 { 1829 $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); 1830 } 1831' if $check->{itempos}; 1832 1833 $code .= ' 1834 $repcount++; 1835 $backtrack = 1; 1836 ' . $op->code(@codeargs) . ' 1837 $savetext = $text; 1838 ' . ($op->isterminal() ? 'pop @item;' : "" ) . ' 1839 ' . (ref($op) eq 'Parse::RecDescent::Token' ? 'do { push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; } if defined $1;' : "" ) . ' 1840 } 1841 $text = $savetext; 1842 pop @item if $backtrack; 1843 1844 # MATCH RIGHTARG 1845 ' . $rightarg->code(@codeargs) . ' 1846 $repcount++; 1847 '; 1848 } 1849 1850 $code .= 'unless (@item) { undef $_tok; last }' unless $self->{min}==0; 1851 1852 $code .= ' 1853 $_tok = [ @item ]; 1854'; 1855 1856 1857 $code .= ' 1858 if (defined $itemposfirst) 1859 { 1860 Parse::RecDescent::Production::_update_itempos( 1861 $_itempos, $itemposfirst, undef, [qw(from)]); 1862 } 1863' if $check->{itempos}; 1864 1865 $code .= ' 1866 last; 1867 } # end of OPLOOP 1868'; 1869 1870 $code .= ' 1871 unless ($repcount>='.$self->{min}.') 1872 { 1873 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' operator: [' 1874 . $self->describe 1875 . ']>>}, 1876 Parse::RecDescent::_tracefirst($text), 1877 q{' . $rule->{"name"} .'}, 1878 $tracelevel) 1879 if defined $::RD_TRACE; 1880 $expectation->failed(); 1881 last; 1882 } 1883 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' operator: [' 1884 . $self->describe 1885 . ']<< (return value: [} 1886 . qq{@{$_tok||[]}} . q{]}, 1887 Parse::RecDescent::_tracefirst($text), 1888 q{' . $rule->{"name"} .'}, 1889 $tracelevel) 1890 if defined $::RD_TRACE; 1891 1892 push @item, $item{'.($self->{name}||$self->{hashname}).'}=$_tok||[]; 1893'; 1894 1895 return $code; 1896} 1897 1898 1899package Parse::RecDescent::Expectation; 1900 1901sub new ($) 1902{ 1903 bless { 1904 "failed" => 0, 1905 "expected" => "", 1906 "unexpected" => "", 1907 "lastexpected" => "", 1908 "lastunexpected" => "", 1909 "defexpected" => $_[1], 1910 }; 1911} 1912 1913sub is ($$) 1914{ 1915 $_[0]->{lastexpected} = $_[1]; return $_[0]; 1916} 1917 1918sub at ($$) 1919{ 1920 $_[0]->{lastunexpected} = $_[1]; return $_[0]; 1921} 1922 1923sub failed ($) 1924{ 1925 return unless $_[0]->{lastexpected}; 1926 $_[0]->{expected} = $_[0]->{lastexpected} unless $_[0]->{failed}; 1927 $_[0]->{unexpected} = $_[0]->{lastunexpected} unless $_[0]->{failed}; 1928 $_[0]->{failed} = 1; 1929} 1930 1931sub message ($) 1932{ 1933 my ($self) = @_; 1934 $self->{expected} = $self->{defexpected} unless $self->{expected}; 1935 $self->{expected} =~ s/_/ /g; 1936 if (!$self->{unexpected} || $self->{unexpected} =~ /\A\s*\Z/s) 1937 { 1938 return "Was expecting $self->{expected}"; 1939 } 1940 else 1941 { 1942 $self->{unexpected} =~ /\s*(.*)/; 1943 return "Was expecting $self->{expected} but found \"$1\" instead"; 1944 } 1945} 1946 19471; 1948 1949package Parse::RecDescent; 1950 1951use Carp; 1952use vars qw ( $AUTOLOAD $VERSION $_FILENAME); 1953 1954my $ERRORS = 0; 1955 1956our $VERSION = '1.967015'; 1957$VERSION = eval $VERSION; 1958$_FILENAME=__FILE__; 1959 1960# BUILDING A PARSER 1961 1962my $nextnamespace = "namespace000001"; 1963 1964sub _nextnamespace() 1965{ 1966 return "Parse::RecDescent::" . $nextnamespace++; 1967} 1968 1969# ARGS ARE: $class, $grammar, $compiling, $namespace 1970sub new ($$$$) 1971{ 1972 my $class = ref($_[0]) || $_[0]; 1973 local $Parse::RecDescent::compiling = $_[2]; 1974 my $name_space_name = defined $_[3] 1975 ? "Parse::RecDescent::".$_[3] 1976 : _nextnamespace(); 1977 my $self = 1978 { 1979 "rules" => {}, 1980 "namespace" => $name_space_name, 1981 "startcode" => '', 1982 "localvars" => '', 1983 "_AUTOACTION" => undef, 1984 "_AUTOTREE" => undef, 1985 1986 # Precompiled parsers used to set _precompiled, but that 1987 # wasn't present in some versions of Parse::RecDescent used to 1988 # build precompiled parsers. Instead, set a new 1989 # _not_precompiled flag, which is remove from future 1990 # Precompiled parsers at build time. 1991 "_not_precompiled" => 1, 1992 }; 1993 1994 1995 if ($::RD_AUTOACTION) { 1996 my $sourcecode = $::RD_AUTOACTION; 1997 $sourcecode = "{ $sourcecode }" 1998 unless $sourcecode =~ /\A\s*\{.*\}\s*\Z/; 1999 $self->{_check}{itempos} = 2000 $sourcecode =~ /\@itempos\b|\$itempos\s*\[/; 2001 $self->{_AUTOACTION} 2002 = new Parse::RecDescent::Action($sourcecode,0,-1) 2003 } 2004 2005 bless $self, $class; 2006 return $self->Replace($_[1]) 2007} 2008 2009sub Compile($$$$) { 2010 die "Compilation of Parse::RecDescent grammars not yet implemented\n"; 2011} 2012 2013sub DESTROY { 2014 my ($self) = @_; 2015 my $namespace = $self->{namespace}; 2016 $namespace =~ s/Parse::RecDescent:://; 2017 if ($self->{_not_precompiled}) { 2018 # BEGIN WORKAROUND 2019 # Perl has a bug that creates a circular reference between 2020 # @ISA and that variable's stash: 2021 # https://rt.perl.org/rt3/Ticket/Display.html?id=92708 2022 # Emptying the array before deleting the stash seems to 2023 # prevent the leak. Once the ticket above has been resolved, 2024 # these two lines can be removed. 2025 no strict 'refs'; 2026 @{$self->{namespace} . '::ISA'} = (); 2027 # END WORKAROUND 2028 2029 # Some grammars may contain circular references between rules, 2030 # such as: 2031 # a: 'ID' | b 2032 # b: '(' a ')' 2033 # Unless these references are broken, the subs stay around on 2034 # stash deletion below. Iterate through the stash entries and 2035 # for each defined code reference, set it to reference sub {} 2036 # instead. 2037 { 2038 local $^W; # avoid 'sub redefined' warnings. 2039 my $blank_sub = sub {}; 2040 while (my ($name, $glob) = each %{"Parse::RecDescent::$namespace\::"}) { 2041 *$glob = $blank_sub if defined &$glob; 2042 } 2043 } 2044 2045 # Delete the namespace's stash 2046 delete $Parse::RecDescent::{$namespace.'::'}; 2047 } 2048} 2049 2050# BUILDING A GRAMMAR.... 2051 2052# ARGS ARE: $self, $grammar, $isimplicit, $isleftop 2053sub Replace ($$) 2054{ 2055 # set $replace = 1 for _generate 2056 splice(@_, 2, 0, 1); 2057 2058 return _generate(@_); 2059} 2060 2061# ARGS ARE: $self, $grammar, $isimplicit, $isleftop 2062sub Extend ($$) 2063{ 2064 # set $replace = 0 for _generate 2065 splice(@_, 2, 0, 0); 2066 2067 return _generate(@_); 2068} 2069 2070sub _no_rule ($$;$) 2071{ 2072 _error("Ruleless $_[0] at start of grammar.",$_[1]); 2073 my $desc = $_[2] ? "\"$_[2]\"" : ""; 2074 _hint("You need to define a rule for the $_[0] $desc 2075 to be part of."); 2076} 2077 2078my $NEGLOOKAHEAD = '\G(\s*\.\.\.\!)'; 2079my $POSLOOKAHEAD = '\G(\s*\.\.\.)'; 2080my $RULE = '\G\s*(\w+)[ \t]*:'; 2081my $PROD = '\G\s*([|])'; 2082my $TOKEN = q{\G\s*/((\\\\/|\\\\\\\\|[^/])*)/([cgimsox]*)}; 2083my $MTOKEN = q{\G\s*(m\s*[^\w\s])}; 2084my $LITERAL = q{\G\s*'((\\\\['\\\\]|[^'])*)'}; 2085my $INTERPLIT = q{\G\s*"((\\\\["\\\\]|[^"])*)"}; 2086my $SUBRULE = '\G\s*(\w+)'; 2087my $MATCHRULE = '\G(\s*<matchrule:)'; 2088my $SIMPLEPAT = '((\\s+/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/)?)'; 2089my $OPTIONAL = '\G\((\?)'.$SIMPLEPAT.'\)'; 2090my $ANY = '\G\((s\?)'.$SIMPLEPAT.'\)'; 2091my $MANY = '\G\((s|\.\.)'.$SIMPLEPAT.'\)'; 2092my $EXACTLY = '\G\(([1-9]\d*)'.$SIMPLEPAT.'\)'; 2093my $BETWEEN = '\G\((\d+)\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; 2094my $ATLEAST = '\G\((\d+)\.\.'.$SIMPLEPAT.'\)'; 2095my $ATMOST = '\G\(\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; 2096my $BADREP = '\G\((-?\d+)?\.\.(-?\d+)?'.$SIMPLEPAT.'\)'; 2097my $ACTION = '\G\s*\{'; 2098my $IMPLICITSUBRULE = '\G\s*\('; 2099my $COMMENT = '\G\s*(#.*)'; 2100my $COMMITMK = '\G\s*<commit>'; 2101my $UNCOMMITMK = '\G\s*<uncommit>'; 2102my $QUOTELIKEMK = '\G\s*<perl_quotelike>'; 2103my $CODEBLOCKMK = '\G\s*<perl_codeblock(?:\s+([][()<>{}]+))?>'; 2104my $VARIABLEMK = '\G\s*<perl_variable>'; 2105my $NOCHECKMK = '\G\s*<nocheck>'; 2106my $AUTOACTIONPATMK = '\G\s*<autoaction:'; 2107my $AUTOTREEMK = '\G\s*<autotree(?::\s*([\w:]+)\s*)?>'; 2108my $AUTOSTUBMK = '\G\s*<autostub>'; 2109my $AUTORULEMK = '\G\s*<autorule:(.*?)>'; 2110my $REJECTMK = '\G\s*<reject>'; 2111my $CONDREJECTMK = '\G\s*<reject:'; 2112my $SCOREMK = '\G\s*<score:'; 2113my $AUTOSCOREMK = '\G\s*<autoscore:'; 2114my $SKIPMK = '\G\s*<skip:'; 2115my $OPMK = '\G\s*<(left|right)op(?:=(\'.*?\'))?:'; 2116my $ENDDIRECTIVEMK = '\G\s*>'; 2117my $RESYNCMK = '\G\s*<resync>'; 2118my $RESYNCPATMK = '\G\s*<resync:'; 2119my $RULEVARPATMK = '\G\s*<rulevar:'; 2120my $DEFERPATMK = '\G\s*<defer:'; 2121my $TOKENPATMK = '\G\s*<token:'; 2122my $AUTOERRORMK = '\G\s*<error(\??)>'; 2123my $MSGERRORMK = '\G\s*<error(\??):'; 2124my $NOCHECK = '\G\s*<nocheck>'; 2125my $WARNMK = '\G\s*<warn((?::\s*(\d+)\s*)?)>'; 2126my $HINTMK = '\G\s*<hint>'; 2127my $TRACEBUILDMK = '\G\s*<trace_build((?::\s*(\d+)\s*)?)>'; 2128my $TRACEPARSEMK = '\G\s*<trace_parse((?::\s*(\d+)\s*)?)>'; 2129my $UNCOMMITPROD = $PROD.'\s*<uncommit'; 2130my $ERRORPROD = $PROD.'\s*<error'; 2131my $LONECOLON = '\G\s*:'; 2132my $OTHER = '\G\s*([^\s]+)'; 2133 2134my @lines = 0; 2135 2136sub _generate 2137{ 2138 my ($self, $grammar, $replace, $isimplicit, $isleftop) = (@_, 0); 2139 2140 my $aftererror = 0; 2141 my $lookahead = 0; 2142 my $lookaheadspec = ""; 2143 my $must_pop_lines; 2144 if (! $lines[-1]) { 2145 push @lines, _linecount($grammar) ; 2146 $must_pop_lines = 1; 2147 } 2148 $self->{_check}{itempos} = ($grammar =~ /\@itempos\b|\$itempos\s*\[/) 2149 unless $self->{_check}{itempos}; 2150 for (qw(thisoffset thiscolumn prevline prevoffset prevcolumn)) 2151 { 2152 $self->{_check}{$_} = 2153 ($grammar =~ /\$$_/) || $self->{_check}{itempos} 2154 unless $self->{_check}{$_}; 2155 } 2156 my $line; 2157 2158 my $rule = undef; 2159 my $prod = undef; 2160 my $item = undef; 2161 my $lastgreedy = ''; 2162 pos $grammar = 0; 2163 study $grammar; 2164 2165 local $::RD_HINT = $::RD_HINT; 2166 local $::RD_WARN = $::RD_WARN; 2167 local $::RD_TRACE = $::RD_TRACE; 2168 local $::RD_CHECK = $::RD_CHECK; 2169 2170 while (pos $grammar < length $grammar) 2171 { 2172 $line = $lines[-1] - _linecount($grammar) + 1; 2173 my $commitonly; 2174 my $code = ""; 2175 my @components = (); 2176 if ($grammar =~ m/$COMMENT/gco) 2177 { 2178 _parse("a comment",0,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2179 next; 2180 } 2181 elsif ($grammar =~ m/$NEGLOOKAHEAD/gco) 2182 { 2183 _parse("a negative lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2184 $lookahead = $lookahead ? -$lookahead : -1; 2185 $lookaheadspec .= $1; 2186 next; # SKIP LOOKAHEAD RESET AT END OF while LOOP 2187 } 2188 elsif ($grammar =~ m/$POSLOOKAHEAD/gco) 2189 { 2190 _parse("a positive lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2191 $lookahead = $lookahead ? $lookahead : 1; 2192 $lookaheadspec .= $1; 2193 next; # SKIP LOOKAHEAD RESET AT END OF while LOOP 2194 } 2195 elsif ($grammar =~ m/(?=$ACTION)/gco 2196 and do { ($code) = extract_codeblock($grammar); $code }) 2197 { 2198 _parse("an action", $aftererror, $line, $code); 2199 $item = new Parse::RecDescent::Action($code,$lookahead,$line); 2200 $prod and $prod->additem($item) 2201 or $self->_addstartcode($code); 2202 } 2203 elsif ($grammar =~ m/(?=$IMPLICITSUBRULE)/gco 2204 and do { ($code) = extract_codeblock($grammar,'{([',undef,'(',1); 2205 $code }) 2206 { 2207 $code =~ s/\A\s*\(|\)\Z//g; 2208 _parse("an implicit subrule", $aftererror, $line, 2209 "( $code )"); 2210 my $implicit = $rule->nextimplicit; 2211 return undef 2212 if !$self->_generate("$implicit : $code",$replace,1); 2213 my $pos = pos $grammar; 2214 substr($grammar,$pos,0,$implicit); 2215 pos $grammar = $pos;; 2216 } 2217 elsif ($grammar =~ m/$ENDDIRECTIVEMK/gco) 2218 { 2219 2220 # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) 2221 2222 my ($minrep,$maxrep) = (1,$MAXREP); 2223 if ($grammar =~ m/\G[(]/gc) 2224 { 2225 pos($grammar)--; 2226 2227 if ($grammar =~ m/$OPTIONAL/gco) 2228 { ($minrep, $maxrep) = (0,1) } 2229 elsif ($grammar =~ m/$ANY/gco) 2230 { $minrep = 0 } 2231 elsif ($grammar =~ m/$EXACTLY/gco) 2232 { ($minrep, $maxrep) = ($1,$1) } 2233 elsif ($grammar =~ m/$BETWEEN/gco) 2234 { ($minrep, $maxrep) = ($1,$2) } 2235 elsif ($grammar =~ m/$ATLEAST/gco) 2236 { $minrep = $1 } 2237 elsif ($grammar =~ m/$ATMOST/gco) 2238 { $maxrep = $1 } 2239 elsif ($grammar =~ m/$MANY/gco) 2240 { } 2241 elsif ($grammar =~ m/$BADREP/gco) 2242 { 2243 _parse("an invalid repetition specifier", 0,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2244 _error("Incorrect specification of a repeated directive", 2245 $line); 2246 _hint("Repeated directives cannot have 2247 a maximum repetition of zero, nor can they have 2248 negative components in their ranges."); 2249 } 2250 } 2251 2252 $prod && $prod->enddirective($line,$minrep,$maxrep); 2253 } 2254 elsif ($grammar =~ m/\G\s*<[^m]/gc) 2255 { 2256 pos($grammar)-=2; 2257 2258 if ($grammar =~ m/$OPMK/gco) 2259 { 2260 # $DB::single=1; 2261 _parse("a $1-associative operator directive", $aftererror, $line, "<$1op:...>"); 2262 $prod->adddirective($1, $line,$2||''); 2263 } 2264 elsif ($grammar =~ m/$UNCOMMITMK/gco) 2265 { 2266 _parse("an uncommit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2267 $item = new Parse::RecDescent::Directive('$commit=0;1', 2268 $lookahead,$line,"<uncommit>"); 2269 $prod and $prod->additem($item) 2270 or _no_rule("<uncommit>",$line); 2271 } 2272 elsif ($grammar =~ m/$QUOTELIKEMK/gco) 2273 { 2274 _parse("an perl quotelike marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2275 $item = new Parse::RecDescent::Directive( 2276 'my ($match,@res); 2277 ($match,$text,undef,@res) = 2278 Text::Balanced::extract_quotelike($text,$skip); 2279 $match ? \@res : undef; 2280 ', $lookahead,$line,"<perl_quotelike>"); 2281 $prod and $prod->additem($item) 2282 or _no_rule("<perl_quotelike>",$line); 2283 } 2284 elsif ($grammar =~ m/$CODEBLOCKMK/gco) 2285 { 2286 my $outer = $1||"{}"; 2287 _parse("an perl codeblock marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2288 $item = new Parse::RecDescent::Directive( 2289 'Text::Balanced::extract_codeblock($text,undef,$skip,\''.$outer.'\'); 2290 ', $lookahead,$line,"<perl_codeblock>"); 2291 $prod and $prod->additem($item) 2292 or _no_rule("<perl_codeblock>",$line); 2293 } 2294 elsif ($grammar =~ m/$VARIABLEMK/gco) 2295 { 2296 _parse("an perl variable marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2297 $item = new Parse::RecDescent::Directive( 2298 'Text::Balanced::extract_variable($text,$skip); 2299 ', $lookahead,$line,"<perl_variable>"); 2300 $prod and $prod->additem($item) 2301 or _no_rule("<perl_variable>",$line); 2302 } 2303 elsif ($grammar =~ m/$NOCHECKMK/gco) 2304 { 2305 _parse("a disable checking marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2306 if ($rule) 2307 { 2308 _error("<nocheck> directive not at start of grammar", $line); 2309 _hint("The <nocheck> directive can only 2310 be specified at the start of a 2311 grammar (before the first rule 2312 is defined."); 2313 } 2314 else 2315 { 2316 local $::RD_CHECK = 1; 2317 } 2318 } 2319 elsif ($grammar =~ m/$AUTOSTUBMK/gco) 2320 { 2321 _parse("an autostub marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2322 $::RD_AUTOSTUB = ""; 2323 } 2324 elsif ($grammar =~ m/$AUTORULEMK/gco) 2325 { 2326 _parse("an autorule marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2327 $::RD_AUTOSTUB = $1; 2328 } 2329 elsif ($grammar =~ m/$AUTOTREEMK/gco) 2330 { 2331 my $base = defined($1) ? $1 : ""; 2332 my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); 2333 $base .= "::" if $base && $base !~ /::$/; 2334 _parse("an autotree marker", $aftererror,$line, $current_match); 2335 if ($rule) 2336 { 2337 _error("<autotree> directive not at start of grammar", $line); 2338 _hint("The <autotree> directive can only 2339 be specified at the start of a 2340 grammar (before the first rule 2341 is defined."); 2342 } 2343 else 2344 { 2345 undef $self->{_AUTOACTION}; 2346 $self->{_AUTOTREE}{NODE} 2347 = new Parse::RecDescent::Action(q({bless \%item, ').$base.q('.$item[0]}),0,-1); 2348 $self->{_AUTOTREE}{TERMINAL} 2349 = new Parse::RecDescent::Action(q({bless {__VALUE__=>$item[1]}, ').$base.q('.$item[0]}),0,-1); 2350 } 2351 } 2352 2353 elsif ($grammar =~ m/$REJECTMK/gco) 2354 { 2355 _parse("an reject marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2356 $item = new Parse::RecDescent::UncondReject($lookahead,$line,"<reject>"); 2357 $prod and $prod->additem($item) 2358 or _no_rule("<reject>",$line); 2359 } 2360 elsif ($grammar =~ m/(?=$CONDREJECTMK)/gco 2361 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2362 $code }) 2363 { 2364 _parse("a (conditional) reject marker", $aftererror,$line, $code ); 2365 $code =~ /\A\s*<reject:(.*)>\Z/s; 2366 my $cond = $1; 2367 $item = new Parse::RecDescent::Directive( 2368 "($1) ? undef : 1", $lookahead,$line,"<reject:$cond>"); 2369 $prod and $prod->additem($item) 2370 or _no_rule("<reject:$cond>",$line); 2371 } 2372 elsif ($grammar =~ m/(?=$SCOREMK)/gco 2373 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2374 $code }) 2375 { 2376 _parse("a score marker", $aftererror,$line, $code ); 2377 $code =~ /\A\s*<score:(.*)>\Z/s; 2378 $prod and $prod->addscore($1, $lookahead, $line) 2379 or _no_rule($code,$line); 2380 } 2381 elsif ($grammar =~ m/(?=$AUTOSCOREMK)/gco 2382 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2383 $code; 2384 } ) 2385 { 2386 _parse("an autoscore specifier", $aftererror,$line,$code); 2387 $code =~ /\A\s*<autoscore:(.*)>\Z/s; 2388 2389 $rule and $rule->addautoscore($1,$self) 2390 or _no_rule($code,$line); 2391 2392 $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); 2393 $prod and $prod->additem($item) 2394 or _no_rule($code,$line); 2395 } 2396 elsif ($grammar =~ m/$RESYNCMK/gco) 2397 { 2398 _parse("a resync to newline marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2399 $item = new Parse::RecDescent::Directive( 2400 'if ($text =~ s/(\A[^\n]*\n)//) { $return = 0; $1; } else { undef }', 2401 $lookahead,$line,"<resync>"); 2402 $prod and $prod->additem($item) 2403 or _no_rule("<resync>",$line); 2404 } 2405 elsif ($grammar =~ m/(?=$RESYNCPATMK)/gco 2406 and do { ($code) = extract_bracketed($grammar,'<'); 2407 $code }) 2408 { 2409 _parse("a resync with pattern marker", $aftererror,$line, $code ); 2410 $code =~ /\A\s*<resync:(.*)>\Z/s; 2411 $item = new Parse::RecDescent::Directive( 2412 'if ($text =~ s/(\A'.$1.')//) { $return = 0; $1; } else { undef }', 2413 $lookahead,$line,$code); 2414 $prod and $prod->additem($item) 2415 or _no_rule($code,$line); 2416 } 2417 elsif ($grammar =~ m/(?=$SKIPMK)/gco 2418 and do { ($code) = extract_codeblock($grammar,'<'); 2419 $code }) 2420 { 2421 _parse("a skip marker", $aftererror,$line, $code ); 2422 $code =~ /\A\s*<skip:(.*)>\Z/s; 2423 if ($rule) { 2424 $item = new Parse::RecDescent::Directive( 2425 'my $oldskip = $skip; $skip='.$1.'; $oldskip', 2426 $lookahead,$line,$code); 2427 $prod and $prod->additem($item) 2428 or _no_rule($code,$line); 2429 } else { 2430 #global <skip> directive 2431 $self->{skip} = $1; 2432 } 2433 } 2434 elsif ($grammar =~ m/(?=$RULEVARPATMK)/gco 2435 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2436 $code; 2437 } ) 2438 { 2439 _parse("a rule variable specifier", $aftererror,$line,$code); 2440 $code =~ /\A\s*<rulevar:(.*)>\Z/s; 2441 2442 $rule and $rule->addvar($1,$self) 2443 or _no_rule($code,$line); 2444 2445 $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); 2446 $prod and $prod->additem($item) 2447 or _no_rule($code,$line); 2448 } 2449 elsif ($grammar =~ m/(?=$AUTOACTIONPATMK)/gco 2450 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2451 $code; 2452 } ) 2453 { 2454 _parse("an autoaction specifier", $aftererror,$line,$code); 2455 $code =~ s/\A\s*<autoaction:(.*)>\Z/$1/s; 2456 if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) { 2457 $code = "{ $code }" 2458 } 2459 $self->{_check}{itempos} = 2460 $code =~ /\@itempos\b|\$itempos\s*\[/; 2461 $self->{_AUTOACTION} 2462 = new Parse::RecDescent::Action($code,0,-$line) 2463 } 2464 elsif ($grammar =~ m/(?=$DEFERPATMK)/gco 2465 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2466 $code; 2467 } ) 2468 { 2469 _parse("a deferred action specifier", $aftererror,$line,$code); 2470 $code =~ s/\A\s*<defer:(.*)>\Z/$1/s; 2471 if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) 2472 { 2473 $code = "{ $code }" 2474 } 2475 2476 $item = new Parse::RecDescent::Directive( 2477 "push \@{\$thisparser->{deferred}}, sub $code;", 2478 $lookahead,$line,"<defer:$code>"); 2479 $prod and $prod->additem($item) 2480 or _no_rule("<defer:$code>",$line); 2481 2482 $self->{deferrable} = 1; 2483 } 2484 elsif ($grammar =~ m/(?=$TOKENPATMK)/gco 2485 and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); 2486 $code; 2487 } ) 2488 { 2489 _parse("a token constructor", $aftererror,$line,$code); 2490 $code =~ s/\A\s*<token:(.*)>\Z/$1/s; 2491 2492 my $types = eval 'no strict; local $SIG{__WARN__} = sub {0}; my @arr=('.$code.'); @arr' || (); 2493 if (!$types) 2494 { 2495 _error("Incorrect token specification: \"$@\"", $line); 2496 _hint("The <token:...> directive requires a list 2497 of one or more strings representing possible 2498 types of the specified token. For example: 2499 <token:NOUN,VERB>"); 2500 } 2501 else 2502 { 2503 $item = new Parse::RecDescent::Directive( 2504 'no strict; 2505 $return = { text => $item[-1] }; 2506 @{$return->{type}}{'.$code.'} = (1..'.$types.');', 2507 $lookahead,$line,"<token:$code>"); 2508 $prod and $prod->additem($item) 2509 or _no_rule("<token:$code>",$line); 2510 } 2511 } 2512 elsif ($grammar =~ m/$COMMITMK/gco) 2513 { 2514 _parse("an commit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2515 $item = new Parse::RecDescent::Directive('$commit = 1', 2516 $lookahead,$line,"<commit>"); 2517 $prod and $prod->additem($item) 2518 or _no_rule("<commit>",$line); 2519 } 2520 elsif ($grammar =~ m/$NOCHECKMK/gco) { 2521 _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2522 $::RD_CHECK = 0; 2523 } 2524 elsif ($grammar =~ m/$HINTMK/gco) { 2525 _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2526 $::RD_HINT = $self->{__HINT__} = 1; 2527 } 2528 elsif ($grammar =~ m/$WARNMK/gco) { 2529 _parse("an warning request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2530 $::RD_WARN = $self->{__WARN__} = $1 ? $2+0 : 1; 2531 } 2532 elsif ($grammar =~ m/$TRACEBUILDMK/gco) { 2533 _parse("an grammar build trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2534 $::RD_TRACE = $1 ? $2+0 : 1; 2535 } 2536 elsif ($grammar =~ m/$TRACEPARSEMK/gco) { 2537 _parse("an parse trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2538 $self->{__TRACE__} = $1 ? $2+0 : 1; 2539 } 2540 elsif ($grammar =~ m/$AUTOERRORMK/gco) 2541 { 2542 $commitonly = $1; 2543 _parse("an error marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2544 $item = new Parse::RecDescent::Error('',$lookahead,$1,$line); 2545 $prod and $prod->additem($item) 2546 or _no_rule("<error>",$line); 2547 $aftererror = !$commitonly; 2548 } 2549 elsif ($grammar =~ m/(?=$MSGERRORMK)/gco 2550 and do { $commitonly = $1; 2551 ($code) = extract_bracketed($grammar,'<'); 2552 $code }) 2553 { 2554 _parse("an error marker", $aftererror,$line,$code); 2555 $code =~ /\A\s*<error\??:(.*)>\Z/s; 2556 $item = new Parse::RecDescent::Error($1,$lookahead,$commitonly,$line); 2557 $prod and $prod->additem($item) 2558 or _no_rule("$code",$line); 2559 $aftererror = !$commitonly; 2560 } 2561 elsif (do { $commitonly = $1; 2562 ($code) = extract_bracketed($grammar,'<'); 2563 $code }) 2564 { 2565 if ($code =~ /^<[A-Z_]+>$/) 2566 { 2567 _error("Token items are not yet 2568 supported: \"$code\"", 2569 $line); 2570 _hint("Items like $code that consist of angle 2571 brackets enclosing a sequence of 2572 uppercase characters will eventually 2573 be used to specify pre-lexed tokens 2574 in a grammar. That functionality is not 2575 yet implemented. Or did you misspell 2576 \"$code\"?"); 2577 } 2578 else 2579 { 2580 _error("Untranslatable item encountered: \"$code\"", 2581 $line); 2582 _hint("Did you misspell \"$code\" 2583 or forget to comment it out?"); 2584 } 2585 } 2586 } 2587 elsif ($grammar =~ m/$RULE/gco) 2588 { 2589 _parseunneg("a rule declaration", 0, 2590 $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; 2591 my $rulename = $1; 2592 if ($rulename =~ /Replace|Extend|Precompile|PrecompiledRuntime|Save/ ) 2593 { 2594 _warn(2,"Rule \"$rulename\" hidden by method 2595 Parse::RecDescent::$rulename",$line) 2596 and 2597 _hint("The rule named \"$rulename\" cannot be directly 2598 called through the Parse::RecDescent object 2599 for this grammar (although it may still 2600 be used as a subrule of other rules). 2601 It can't be directly called because 2602 Parse::RecDescent::$rulename is already defined (it 2603 is the standard method of all 2604 parsers)."); 2605 } 2606 $rule = new Parse::RecDescent::Rule($rulename,$self,$line,$replace); 2607 $prod->check_pending($line) if $prod; 2608 $prod = $rule->addprod( new Parse::RecDescent::Production ); 2609 $aftererror = 0; 2610 } 2611 elsif ($grammar =~ m/$UNCOMMITPROD/gco) 2612 { 2613 pos($grammar)-=9; 2614 _parseunneg("a new (uncommitted) production", 2615 0, $lookahead, $line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; 2616 2617 $prod->check_pending($line) if $prod; 2618 $prod = new Parse::RecDescent::Production($line,1); 2619 $rule and $rule->addprod($prod) 2620 or _no_rule("<uncommit>",$line); 2621 $aftererror = 0; 2622 } 2623 elsif ($grammar =~ m/$ERRORPROD/gco) 2624 { 2625 pos($grammar)-=6; 2626 _parseunneg("a new (error) production", $aftererror, 2627 $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; 2628 $prod->check_pending($line) if $prod; 2629 $prod = new Parse::RecDescent::Production($line,0,1); 2630 $rule and $rule->addprod($prod) 2631 or _no_rule("<error>",$line); 2632 $aftererror = 0; 2633 } 2634 elsif ($grammar =~ m/$PROD/gco) 2635 { 2636 _parseunneg("a new production", 0, 2637 $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; 2638 $rule 2639 and (!$prod || $prod->check_pending($line)) 2640 and $prod = $rule->addprod(new Parse::RecDescent::Production($line)) 2641 or _no_rule("production",$line); 2642 $aftererror = 0; 2643 } 2644 elsif ($grammar =~ m/$LITERAL/gco) 2645 { 2646 my $literal = $1; 2647 ($code = $literal) =~ s/\\\\/\\/g; 2648 _parse("a literal terminal", $aftererror,$line,$literal); 2649 $item = new Parse::RecDescent::Literal($code,$lookahead,$line); 2650 $prod and $prod->additem($item) 2651 or _no_rule("literal terminal",$line,"'$literal'"); 2652 } 2653 elsif ($grammar =~ m/$INTERPLIT/gco) 2654 { 2655 _parse("an interpolated literal terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2656 $item = new Parse::RecDescent::InterpLit($1,$lookahead,$line); 2657 $prod and $prod->additem($item) 2658 or _no_rule("interpolated literal terminal",$line,"'$1'"); 2659 } 2660 elsif ($grammar =~ m/$TOKEN/gco) 2661 { 2662 _parse("a /../ pattern terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); 2663 $item = new Parse::RecDescent::Token($1,'/',$3?$3:'',$lookahead,$line); 2664 $prod and $prod->additem($item) 2665 or _no_rule("pattern terminal",$line,"/$1/"); 2666 } 2667 elsif ($grammar =~ m/(?=$MTOKEN)/gco 2668 and do { ($code, undef, @components) 2669 = extract_quotelike($grammar); 2670 $code } 2671 ) 2672 2673 { 2674 _parse("an m/../ pattern terminal", $aftererror,$line,$code); 2675 $item = new Parse::RecDescent::Token(@components[3,2,8], 2676 $lookahead,$line); 2677 $prod and $prod->additem($item) 2678 or _no_rule("pattern terminal",$line,$code); 2679 } 2680 elsif ($grammar =~ m/(?=$MATCHRULE)/gco 2681 and do { ($code) = extract_bracketed($grammar,'<'); 2682 $code 2683 } 2684 or $grammar =~ m/$SUBRULE/gco 2685 and $code = $1) 2686 { 2687 my $name = $code; 2688 my $matchrule = 0; 2689 if (substr($name,0,1) eq '<') 2690 { 2691 $name =~ s/$MATCHRULE\s*//; 2692 $name =~ s/\s*>\Z//; 2693 $matchrule = 1; 2694 } 2695 2696 # EXTRACT TRAILING ARG LIST (IF ANY) 2697 2698 my ($argcode) = extract_codeblock($grammar, "[]",'') || ''; 2699 2700 # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) 2701 2702 if ($grammar =~ m/\G[(]/gc) 2703 { 2704 pos($grammar)--; 2705 2706 if ($grammar =~ m/$OPTIONAL/gco) 2707 { 2708 _parse("an zero-or-one subrule match", $aftererror,$line,"$code$argcode($1)"); 2709 $item = new Parse::RecDescent::Repetition($name,$1,0,1, 2710 $lookahead,$line, 2711 $self, 2712 $matchrule, 2713 $argcode); 2714 $prod and $prod->additem($item) 2715 or _no_rule("repetition",$line,"$code$argcode($1)"); 2716 2717 !$matchrule and $rule and $rule->addcall($name); 2718 } 2719 elsif ($grammar =~ m/$ANY/gco) 2720 { 2721 _parse("a zero-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); 2722 if ($2) 2723 { 2724 my $pos = pos $grammar; 2725 substr($grammar,$pos,0, 2726 "<leftop='$name(s?)': $name $2 $name>(s?) "); 2727 2728 pos $grammar = $pos; 2729 } 2730 else 2731 { 2732 $item = new Parse::RecDescent::Repetition($name,$1,0,$MAXREP, 2733 $lookahead,$line, 2734 $self, 2735 $matchrule, 2736 $argcode); 2737 $prod and $prod->additem($item) 2738 or _no_rule("repetition",$line,"$code$argcode($1)"); 2739 2740 !$matchrule and $rule and $rule->addcall($name); 2741 2742 _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; 2743 } 2744 } 2745 elsif ($grammar =~ m/$MANY/gco) 2746 { 2747 _parse("a one-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); 2748 if ($2) 2749 { 2750 # $DB::single=1; 2751 my $pos = pos $grammar; 2752 substr($grammar,$pos,0, 2753 "<leftop='$name(s)': $name $2 $name> "); 2754 2755 pos $grammar = $pos; 2756 } 2757 else 2758 { 2759 $item = new Parse::RecDescent::Repetition($name,$1,1,$MAXREP, 2760 $lookahead,$line, 2761 $self, 2762 $matchrule, 2763 $argcode); 2764 2765 $prod and $prod->additem($item) 2766 or _no_rule("repetition",$line,"$code$argcode($1)"); 2767 2768 !$matchrule and $rule and $rule->addcall($name); 2769 2770 _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; 2771 } 2772 } 2773 elsif ($grammar =~ m/$EXACTLY/gco) 2774 { 2775 _parse("an exactly-$1-times subrule match", $aftererror,$line,"$code$argcode($1)"); 2776 if ($2) 2777 { 2778 my $pos = pos $grammar; 2779 substr($grammar,$pos,0, 2780 "<leftop='$name($1)': $name $2 $name>($1) "); 2781 2782 pos $grammar = $pos; 2783 } 2784 else 2785 { 2786 $item = new Parse::RecDescent::Repetition($name,$1,$1,$1, 2787 $lookahead,$line, 2788 $self, 2789 $matchrule, 2790 $argcode); 2791 $prod and $prod->additem($item) 2792 or _no_rule("repetition",$line,"$code$argcode($1)"); 2793 2794 !$matchrule and $rule and $rule->addcall($name); 2795 } 2796 } 2797 elsif ($grammar =~ m/$BETWEEN/gco) 2798 { 2799 _parse("a $1-to-$2 subrule match", $aftererror,$line,"$code$argcode($1..$2)"); 2800 if ($3) 2801 { 2802 my $pos = pos $grammar; 2803 substr($grammar,$pos,0, 2804 "<leftop='$name($1..$2)': $name $3 $name>($1..$2) "); 2805 2806 pos $grammar = $pos; 2807 } 2808 else 2809 { 2810 $item = new Parse::RecDescent::Repetition($name,"$1..$2",$1,$2, 2811 $lookahead,$line, 2812 $self, 2813 $matchrule, 2814 $argcode); 2815 $prod and $prod->additem($item) 2816 or _no_rule("repetition",$line,"$code$argcode($1..$2)"); 2817 2818 !$matchrule and $rule and $rule->addcall($name); 2819 } 2820 } 2821 elsif ($grammar =~ m/$ATLEAST/gco) 2822 { 2823 _parse("a $1-or-more subrule match", $aftererror,$line,"$code$argcode($1..)"); 2824 if ($2) 2825 { 2826 my $pos = pos $grammar; 2827 substr($grammar,$pos,0, 2828 "<leftop='$name($1..)': $name $2 $name>($1..) "); 2829 2830 pos $grammar = $pos; 2831 } 2832 else 2833 { 2834 $item = new Parse::RecDescent::Repetition($name,"$1..",$1,$MAXREP, 2835 $lookahead,$line, 2836 $self, 2837 $matchrule, 2838 $argcode); 2839 $prod and $prod->additem($item) 2840 or _no_rule("repetition",$line,"$code$argcode($1..)"); 2841 2842 !$matchrule and $rule and $rule->addcall($name); 2843 _check_insatiable($name,"$1..",$grammar,$line) if $::RD_CHECK; 2844 } 2845 } 2846 elsif ($grammar =~ m/$ATMOST/gco) 2847 { 2848 _parse("a one-to-$1 subrule match", $aftererror,$line,"$code$argcode(..$1)"); 2849 if ($2) 2850 { 2851 my $pos = pos $grammar; 2852 substr($grammar,$pos,0, 2853 "<leftop='$name(..$1)': $name $2 $name>(..$1) "); 2854 2855 pos $grammar = $pos; 2856 } 2857 else 2858 { 2859 $item = new Parse::RecDescent::Repetition($name,"..$1",1,$1, 2860 $lookahead,$line, 2861 $self, 2862 $matchrule, 2863 $argcode); 2864 $prod and $prod->additem($item) 2865 or _no_rule("repetition",$line,"$code$argcode(..$1)"); 2866 2867 !$matchrule and $rule and $rule->addcall($name); 2868 } 2869 } 2870 elsif ($grammar =~ m/$BADREP/gco) 2871 { 2872 my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); 2873 _parse("an subrule match with invalid repetition specifier", 0,$line, $current_match); 2874 _error("Incorrect specification of a repeated subrule", 2875 $line); 2876 _hint("Repeated subrules like \"$code$argcode$current_match\" cannot have 2877 a maximum repetition of zero, nor can they have 2878 negative components in their ranges."); 2879 } 2880 } 2881 else 2882 { 2883 _parse("a subrule match", $aftererror,$line,$code); 2884 my $desc; 2885 if ($name=~/\A_alternation_\d+_of_production_\d+_of_rule/) 2886 { $desc = $self->{"rules"}{$name}->expected } 2887 $item = new Parse::RecDescent::Subrule($name, 2888 $lookahead, 2889 $line, 2890 $desc, 2891 $matchrule, 2892 $argcode); 2893 2894 $prod and $prod->additem($item) 2895 or _no_rule("(sub)rule",$line,$name); 2896 2897 !$matchrule and $rule and $rule->addcall($name); 2898 } 2899 } 2900 elsif ($grammar =~ m/$LONECOLON/gco ) 2901 { 2902 _error("Unexpected colon encountered", $line); 2903 _hint("Did you mean \"|\" (to start a new production)? 2904 Or perhaps you forgot that the colon 2905 in a rule definition must be 2906 on the same line as the rule name?"); 2907 } 2908 elsif ($grammar =~ m/$ACTION/gco ) # BAD ACTION, ALREADY FAILED 2909 { 2910 _error("Malformed action encountered", 2911 $line); 2912 _hint("Did you forget the closing curly bracket 2913 or is there a syntax error in the action?"); 2914 } 2915 elsif ($grammar =~ m/$OTHER/gco ) 2916 { 2917 _error("Untranslatable item encountered: \"$1\"", 2918 $line); 2919 _hint("Did you misspell \"$1\" 2920 or forget to comment it out?"); 2921 } 2922 2923 if ($lookaheadspec =~ tr /././ > 3) 2924 { 2925 $lookaheadspec =~ s/\A\s+//; 2926 $lookahead = $lookahead<0 2927 ? 'a negative lookahead ("...!")' 2928 : 'a positive lookahead ("...")' ; 2929 _warn(1,"Found two or more lookahead specifiers in a 2930 row.",$line) 2931 and 2932 _hint("Multiple positive and/or negative lookaheads 2933 are simply multiplied together to produce a 2934 single positive or negative lookahead 2935 specification. In this case the sequence 2936 \"$lookaheadspec\" was reduced to $lookahead. 2937 Was this your intention?"); 2938 } 2939 $lookahead = 0; 2940 $lookaheadspec = ""; 2941 2942 $grammar =~ m/\G\s+/gc; 2943 } 2944 2945 if ($must_pop_lines) { 2946 pop @lines; 2947 } 2948 2949 unless ($ERRORS or $isimplicit or !$::RD_CHECK) 2950 { 2951 $self->_check_grammar(); 2952 } 2953 2954 unless ($ERRORS or $isimplicit or $Parse::RecDescent::compiling) 2955 { 2956 my $code = $self->_code(); 2957 if (defined $::RD_TRACE) 2958 { 2959 my $mode = ($nextnamespace eq "namespace000002") ? '>' : '>>'; 2960 print STDERR "printing code (", length($code),") to RD_TRACE\n"; 2961 local *TRACE_FILE; 2962 open TRACE_FILE, $mode, "RD_TRACE" 2963 and print TRACE_FILE "my \$ERRORS;\n$code" 2964 and close TRACE_FILE; 2965 } 2966 2967 unless ( eval "$code 1" ) 2968 { 2969 _error("Internal error in generated parser code!"); 2970 $@ =~ s/at grammar/in grammar at/; 2971 _hint($@); 2972 } 2973 } 2974 2975 if ($ERRORS and !_verbosity("HINT")) 2976 { 2977 local $::RD_HINT = defined $::RD_HINT ? $::RD_HINT : 1; 2978 _hint('Set $::RD_HINT (or -RD_HINT if you\'re using "perl -s") 2979 for hints on fixing these problems. Use $::RD_HINT = 0 2980 to disable this message.'); 2981 } 2982 if ($ERRORS) { $ERRORS=0; return } 2983 return $self; 2984} 2985 2986 2987sub _addstartcode($$) 2988{ 2989 my ($self, $code) = @_; 2990 $code =~ s/\A\s*\{(.*)\}\Z/$1/s; 2991 2992 $self->{"startcode"} .= "$code;\n"; 2993} 2994 2995# CHECK FOR GRAMMAR PROBLEMS.... 2996 2997sub _check_insatiable($$$$) 2998{ 2999 my ($subrule,$repspec,$grammar,$line) = @_; 3000 pos($grammar)=pos($_[2]); 3001 return if $grammar =~ m/$OPTIONAL/gco || $grammar =~ m/$ANY/gco; 3002 my $min = 1; 3003 if ( $grammar =~ m/$MANY/gco 3004 || $grammar =~ m/$EXACTLY/gco 3005 || $grammar =~ m/$ATMOST/gco 3006 || $grammar =~ m/$BETWEEN/gco && do { $min=$2; 1 } 3007 || $grammar =~ m/$ATLEAST/gco && do { $min=$2; 1 } 3008 || $grammar =~ m/$SUBRULE(?!\s*:)/gco 3009 ) 3010 { 3011 return unless $1 eq $subrule && $min > 0; 3012 my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); 3013 _warn(3,"Subrule sequence \"$subrule($repspec) $current_match\" will 3014 (almost certainly) fail.",$line) 3015 and 3016 _hint("Unless subrule \"$subrule\" performs some cunning 3017 lookahead, the repetition \"$subrule($repspec)\" will 3018 insatiably consume as many matches of \"$subrule\" as it 3019 can, leaving none to match the \"$current_match\" that follows."); 3020 } 3021} 3022 3023sub _check_grammar ($) 3024{ 3025 my $self = shift; 3026 my $rules = $self->{"rules"}; 3027 my $rule; 3028 foreach $rule ( values %$rules ) 3029 { 3030 next if ! $rule->{"changed"}; 3031 3032 # CHECK FOR UNDEFINED RULES 3033 3034 my $call; 3035 foreach $call ( @{$rule->{"calls"}} ) 3036 { 3037 if (!defined ${$rules}{$call} 3038 &&!defined &{"Parse::RecDescent::$call"}) 3039 { 3040 if (!defined $::RD_AUTOSTUB) 3041 { 3042 _warn(3,"Undefined (sub)rule \"$call\" 3043 used in a production.") 3044 and 3045 _hint("Will you be providing this rule 3046 later, or did you perhaps 3047 misspell \"$call\"? Otherwise 3048 it will be treated as an 3049 immediate <reject>."); 3050 eval "sub $self->{namespace}::$call {undef}"; 3051 } 3052 else # EXPERIMENTAL 3053 { 3054 my $rule = qq{'$call'}; 3055 if ($::RD_AUTOSTUB and $::RD_AUTOSTUB ne "1") { 3056 $rule = $::RD_AUTOSTUB; 3057 } 3058 _warn(1,"Autogenerating rule: $call") 3059 and 3060 _hint("A call was made to a subrule 3061 named \"$call\", but no such 3062 rule was specified. However, 3063 since \$::RD_AUTOSTUB 3064 was defined, a rule stub 3065 ($call : $rule) was 3066 automatically created."); 3067 3068 $self->_generate("$call: $rule",0,1); 3069 } 3070 } 3071 } 3072 3073 # CHECK FOR LEFT RECURSION 3074 3075 if ($rule->isleftrec($rules)) 3076 { 3077 _error("Rule \"$rule->{name}\" is left-recursive."); 3078 _hint("Redesign the grammar so it's not left-recursive. 3079 That will probably mean you need to re-implement 3080 repetitions using the '(s)' notation. 3081 For example: \"$rule->{name}(s)\"."); 3082 next; 3083 } 3084 3085 # CHECK FOR PRODUCTIONS FOLLOWING EMPTY PRODUCTIONS 3086 { 3087 my $hasempty; 3088 my $prod; 3089 foreach $prod ( @{$rule->{"prods"}} ) { 3090 if ($hasempty) { 3091 _error("Production " . $prod->describe . " for \"$rule->{name}\" 3092 will never be reached (preceding empty production will 3093 always match first)."); 3094 _hint("Reorder the grammar so that the empty production 3095 is last in the list or productions."); 3096 last; 3097 } 3098 $hasempty ||= $prod->isempty(); 3099 } 3100 } 3101 } 3102} 3103 3104# GENERATE ACTUAL PARSER CODE 3105 3106sub _code($) 3107{ 3108 my $self = shift; 3109 my $initial_skip = defined($self->{skip}) ? 3110 '$skip = ' . $self->{skip} . ';' : 3111 $self->_dump([$skip],[qw(skip)]); 3112 3113 my $code = qq! 3114package $self->{namespace}; 3115use strict; 3116use vars qw(\$skip \$AUTOLOAD $self->{localvars} ); 3117\@$self->{namespace}\::ISA = (); 3118$initial_skip 3119$self->{startcode} 3120 3121{ 3122local \$SIG{__WARN__} = sub {0}; 3123# PRETEND TO BE IN Parse::RecDescent NAMESPACE 3124*$self->{namespace}::AUTOLOAD = sub 3125{ 3126 no strict 'refs'; 3127! 3128# This generated code uses ${"AUTOLOAD"} rather than $AUTOLOAD in 3129# order to avoid the circular reference documented here: 3130# https://rt.perl.org/rt3/Public/Bug/Display.html?id=110248 3131# As a result of the investigation of 3132# https://rt.cpan.org/Ticket/Display.html?id=53710 3133. qq! 3134 \${"AUTOLOAD"} =~ s/^$self->{namespace}/Parse::RecDescent/; 3135 goto &{\${"AUTOLOAD"}}; 3136} 3137} 3138 3139!; 3140 $code .= "push \@$self->{namespace}\::ISA, 'Parse::RecDescent';"; 3141 $self->{"startcode"} = ''; 3142 3143 my $rule; 3144 # sort the rules to ensure the output is reproducible 3145 foreach $rule ( sort { $a->{name} cmp $b->{name} } 3146 values %{$self->{"rules"}} ) 3147 { 3148 if ($rule->{"changed"}) 3149 { 3150 $code .= $rule->code($self->{"namespace"},$self); 3151 $rule->{"changed"} = 0; 3152 } 3153 } 3154 3155 return $code; 3156} 3157 3158# A wrapper for Data::Dumper->Dump, which localizes some variables to 3159# keep the output in a form suitable for Parse::RecDescent. 3160# 3161# List of variables and their defaults taken from 3162# $Data::Dumper::VERSION == 2.158 3163 3164sub _dump { 3165 require Data::Dumper; 3166 3167 # 3168 # Allow the user's settings to persist for some features in case 3169 # RD_TRACE is set. These shouldn't affect the eval()-ability of 3170 # the resulting parser. 3171 # 3172 3173 #local $Data::Dumper::Indent = 2; 3174 #local $Data::Dumper::Useqq = 0; 3175 #local $Data::Dumper::Quotekeys = 1; 3176 #local $Data::Dumper::Useperl = 0; 3177 3178 # 3179 # These may affect whether the output is valid perl code for 3180 # eval(), and must be controlled. Set them to their default 3181 # values. 3182 # 3183 3184 local $Data::Dumper::Purity = 0; 3185 local $Data::Dumper::Pad = ""; 3186 local $Data::Dumper::Varname = "VAR"; 3187 local $Data::Dumper::Terse = 0; 3188 local $Data::Dumper::Freezer = ""; 3189 local $Data::Dumper::Toaster = ""; 3190 local $Data::Dumper::Deepcopy = 0; 3191 local $Data::Dumper::Bless = "bless"; 3192 local $Data::Dumper::Maxdepth = 0; 3193 local $Data::Dumper::Pair = ' => '; 3194 local $Data::Dumper::Deparse = 0; 3195 local $Data::Dumper::Sparseseen = 0; 3196 3197 # 3198 # Modify the below options from their defaults. 3199 # 3200 3201 # Sort the keys to ensure the output is reproducible 3202 local $Data::Dumper::Sortkeys = 1; 3203 3204 # Don't stop recursing 3205 local $Data::Dumper::Maxrecurse = 0; 3206 3207 return Data::Dumper->Dump(@_[1..$#_]); 3208} 3209 3210# EXECUTING A PARSE.... 3211 3212sub AUTOLOAD # ($parser, $text; $linenum, @args) 3213{ 3214 croak "Could not find method: $AUTOLOAD\n" unless ref $_[0]; 3215 my $class = ref($_[0]) || $_[0]; 3216 my $text = ref($_[1]) eq 'SCALAR' ? ${$_[1]} : "$_[1]"; 3217 $_[0]->{lastlinenum} = _linecount($text); 3218 $_[0]->{lastlinenum} += ($_[2]||0) if @_ > 2; 3219 $_[0]->{offsetlinenum} = $_[0]->{lastlinenum}; 3220 $_[0]->{fulltext} = $text; 3221 $_[0]->{fulltextlen} = length $text; 3222 $_[0]->{linecounter_cache} = {}; 3223 $_[0]->{deferred} = []; 3224 $_[0]->{errors} = []; 3225 my @args = @_[3..$#_]; 3226 my $args = sub { [ @args ] }; 3227 3228 $AUTOLOAD =~ s/$class/$_[0]->{namespace}/; 3229 no strict "refs"; 3230 3231 local $::RD_WARN = $::RD_WARN || $_[0]->{__WARN__}; 3232 local $::RD_HINT = $::RD_HINT || $_[0]->{__HINT__}; 3233 local $::RD_TRACE = $::RD_TRACE || $_[0]->{__TRACE__}; 3234 3235 croak "Unknown starting rule ($AUTOLOAD) called\n" 3236 unless defined &$AUTOLOAD; 3237 my $retval = &{$AUTOLOAD}( 3238 $_[0], # $parser 3239 $text, # $text 3240 undef, # $repeating 3241 undef, # $_noactions 3242 $args, # \@args 3243 undef, # $_itempos 3244 ); 3245 3246 3247 if (defined $retval) 3248 { 3249 foreach ( @{$_[0]->{deferred}} ) { &$_; } 3250 } 3251 else 3252 { 3253 foreach ( @{$_[0]->{errors}} ) { _error(@$_); } 3254 } 3255 3256 if (ref $_[1] eq 'SCALAR') { ${$_[1]} = $text } 3257 3258 $ERRORS = 0; 3259 return $retval; 3260} 3261 3262sub _parserepeat($$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES 3263{ 3264 my ($parser, $text, $prod, $min, $max, $_noactions, $expectation, $argcode, $_itempos) = @_; 3265 my @tokens = (); 3266 3267 my $itemposfirst; 3268 my $reps; 3269 for ($reps=0; $reps<$max;) 3270 { 3271 $expectation->at($text); 3272 my $_savetext = $text; 3273 my $prevtextlen = length $text; 3274 my $_tok; 3275 if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$argcode,$_itempos))) 3276 { 3277 $text = $_savetext; 3278 last; 3279 } 3280 3281 if (defined($_itempos) and !defined($itemposfirst)) 3282 { 3283 $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); 3284 } 3285 3286 push @tokens, $_tok if defined $_tok; 3287 last if ++$reps >= $min and $prevtextlen == length $text; 3288 } 3289 3290 do { $expectation->failed(); return undef} if $reps<$min; 3291 3292 if (defined $itemposfirst) 3293 { 3294 Parse::RecDescent::Production::_update_itempos($_itempos, $itemposfirst, undef, [qw(from)]); 3295 } 3296 3297 $_[1] = $text; 3298 return [@tokens]; 3299} 3300 3301sub set_autoflush { 3302 my $orig_selected = select $_[0]; 3303 $| = 1; 3304 select $orig_selected; 3305 return; 3306} 3307 3308# ERROR REPORTING.... 3309 3310sub _write_ERROR { 3311 my ($errorprefix, $errortext) = @_; 3312 return if $errortext !~ /\S/; 3313 $errorprefix =~ s/\s+\Z//; 3314 local $^A = q{}; 3315 3316 formline(<<'END_FORMAT', $errorprefix, $errortext); 3317@>>>>>>>>>>>>>>>>>>>>: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 3318END_FORMAT 3319 formline(<<'END_FORMAT', $errortext); 3320~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 3321END_FORMAT 3322 print {*STDERR} $^A; 3323} 3324 3325# TRACING 3326 3327my $TRACE_FORMAT = <<'END_FORMAT'; 3328@>|@|||||||||@^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| 3329 | ~~ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| 3330END_FORMAT 3331 3332my $TRACECONTEXT_FORMAT = <<'END_FORMAT'; 3333@>|@|||||||||@ |^<<<<<<<<<<<<<<<<<<<<<<<<<<< 3334 | ~~ | |^<<<<<<<<<<<<<<<<<<<<<<<<<<< 3335END_FORMAT 3336 3337sub _write_TRACE { 3338 my ($tracelevel, $tracerulename, $tracemsg) = @_; 3339 return if $tracemsg !~ /\S/; 3340 $tracemsg =~ s/\s*\Z//; 3341 local $^A = q{}; 3342 my $bar = '|'; 3343 formline($TRACE_FORMAT, $tracelevel, $tracerulename, $bar, $tracemsg, $tracemsg); 3344 print {*STDERR} $^A; 3345} 3346 3347sub _write_TRACECONTEXT { 3348 my ($tracelevel, $tracerulename, $tracecontext) = @_; 3349 return if $tracecontext !~ /\S/; 3350 $tracecontext =~ s/\s*\Z//; 3351 local $^A = q{}; 3352 my $bar = '|'; 3353 formline($TRACECONTEXT_FORMAT, $tracelevel, $tracerulename, $bar, $tracecontext, $tracecontext); 3354 print {*STDERR} $^A; 3355} 3356 3357sub _verbosity($) 3358{ 3359 defined $::RD_TRACE 3360 or defined $::RD_HINT and $::RD_HINT and $_[0] =~ /ERRORS|WARN|HINT/ 3361 or defined $::RD_WARN and $::RD_WARN and $_[0] =~ /ERRORS|WARN/ 3362 or defined $::RD_ERRORS and $::RD_ERRORS and $_[0] =~ /ERRORS/ 3363} 3364 3365sub _error($;$) 3366{ 3367 $ERRORS++; 3368 return 0 if ! _verbosity("ERRORS"); 3369 my $errortext = $_[0]; 3370 my $errorprefix = "ERROR" . ($_[1] ? " (line $_[1])" : ""); 3371 $errortext =~ s/\s+/ /g; 3372 print {*STDERR} "\n" if _verbosity("WARN"); 3373 _write_ERROR($errorprefix, $errortext); 3374 return 1; 3375} 3376 3377sub _warn($$;$) 3378{ 3379 return 0 unless _verbosity("WARN") && ($::RD_HINT || $_[0] >= ($::RD_WARN||1)); 3380 my $errortext = $_[1]; 3381 my $errorprefix = "Warning" . ($_[2] ? " (line $_[2])" : ""); 3382 print {*STDERR} "\n" if _verbosity("HINT"); 3383 $errortext =~ s/\s+/ /g; 3384 _write_ERROR($errorprefix, $errortext); 3385 return 1; 3386} 3387 3388sub _hint($) 3389{ 3390 return 0 unless $::RD_HINT; 3391 my $errortext = $_[0]; 3392 my $errorprefix = "Hint" . ($_[1] ? " (line $_[1])" : ""); 3393 $errortext =~ s/\s+/ /g; 3394 _write_ERROR($errorprefix, $errortext); 3395 return 1; 3396} 3397 3398sub _tracemax($) 3399{ 3400 if (defined $::RD_TRACE 3401 && $::RD_TRACE =~ /\d+/ 3402 && $::RD_TRACE>1 3403 && $::RD_TRACE+10<length($_[0])) 3404 { 3405 my $count = length($_[0]) - $::RD_TRACE; 3406 return substr($_[0],0,$::RD_TRACE/2) 3407 . "...<$count>..." 3408 . substr($_[0],-$::RD_TRACE/2); 3409 } 3410 else 3411 { 3412 return substr($_[0],0,500); 3413 } 3414} 3415 3416sub _tracefirst($) 3417{ 3418 if (defined $::RD_TRACE 3419 && $::RD_TRACE =~ /\d+/ 3420 && $::RD_TRACE>1 3421 && $::RD_TRACE+10<length($_[0])) 3422 { 3423 my $count = length($_[0]) - $::RD_TRACE; 3424 return substr($_[0],0,$::RD_TRACE) . "...<+$count>"; 3425 } 3426 else 3427 { 3428 return substr($_[0],0,500); 3429 } 3430} 3431 3432my $lastcontext = ''; 3433my $lastrulename = ''; 3434my $lastlevel = ''; 3435 3436sub _trace($;$$$) 3437{ 3438 my $tracemsg = $_[0]; 3439 my $tracecontext = $_[1]||$lastcontext; 3440 my $tracerulename = $_[2]||$lastrulename; 3441 my $tracelevel = $_[3]||$lastlevel; 3442 if ($tracerulename) { $lastrulename = $tracerulename } 3443 if ($tracelevel) { $lastlevel = $tracelevel } 3444 3445 $tracecontext =~ s/\n/\\n/g; 3446 $tracecontext =~ s/\s+/ /g; 3447 $tracerulename = qq{$tracerulename}; 3448 _write_TRACE($tracelevel, $tracerulename, $tracemsg); 3449 if ($tracecontext ne $lastcontext) 3450 { 3451 if ($tracecontext) 3452 { 3453 $lastcontext = _tracefirst($tracecontext); 3454 $tracecontext = qq{"$tracecontext"}; 3455 } 3456 else 3457 { 3458 $tracecontext = qq{<NO TEXT LEFT>}; 3459 } 3460 _write_TRACECONTEXT($tracelevel, $tracerulename, $tracecontext); 3461 } 3462} 3463 3464sub _matchtracemessage 3465{ 3466 my ($self, $reject) = @_; 3467 3468 my $prefix = ''; 3469 my $postfix = ''; 3470 my $matched = not $reject; 3471 my @t = ("Matched", "Didn't match"); 3472 if (exists $self->{lookahead} and $self->{lookahead}) 3473 { 3474 $postfix = $reject ? "(reject)" : "(keep)"; 3475 $prefix = "..."; 3476 if ($self->{lookahead} < 0) 3477 { 3478 $prefix .= '!'; 3479 $matched = not $matched; 3480 } 3481 } 3482 $prefix . ($matched ? $t[0] : $t[1]) . $postfix; 3483} 3484 3485sub _parseunneg($$$$$) 3486{ 3487 _parse($_[0],$_[1],$_[3],$_[4]); 3488 if ($_[2]<0) 3489 { 3490 _error("Can't negate \"$_[4]\".",$_[3]); 3491 _hint("You can't negate $_[0]. Remove the \"...!\" before 3492 \"$_[4]\"."); 3493 return 0; 3494 } 3495 return 1; 3496} 3497 3498sub _parse($$$$) 3499{ 3500 my $what = $_[3]; 3501 $what =~ s/^\s+//; 3502 if ($_[1]) 3503 { 3504 _warn(3,"Found $_[0] ($what) after an unconditional <error>",$_[2]) 3505 and 3506 _hint("An unconditional <error> always causes the 3507 production containing it to immediately fail. 3508 \u$_[0] that follows an <error> 3509 will never be reached. Did you mean to use 3510 <error?> instead?"); 3511 } 3512 3513 return if ! _verbosity("TRACE"); 3514 my $errortext = "Treating \"$what\" as $_[0]"; 3515 my $errorprefix = "Parse::RecDescent"; 3516 $errortext =~ s/\s+/ /g; 3517 _write_ERROR($errorprefix, $errortext); 3518} 3519 3520sub _linecount($) { 3521 scalar substr($_[0], pos $_[0]||0) =~ tr/\n// 3522} 3523 3524 3525package main; 3526 3527use vars qw ( $RD_ERRORS $RD_WARN $RD_HINT $RD_TRACE $RD_CHECK ); 3528$::RD_CHECK = 1; 3529$::RD_ERRORS = 1; 3530$::RD_WARN = 3; 3531 35321; 3533 3534__END__ 3535 3536=head1 NAME 3537 3538Parse::RecDescent - Generate Recursive-Descent Parsers 3539 3540=head1 VERSION 3541 3542This document describes version 1.967015 of Parse::RecDescent 3543released April 4th, 2017. 3544 3545=head1 SYNOPSIS 3546 3547 use Parse::RecDescent; 3548 3549 # Generate a parser from the specification in $grammar: 3550 3551 $parser = new Parse::RecDescent ($grammar); 3552 3553 # Generate a parser from the specification in $othergrammar 3554 3555 $anotherparser = new Parse::RecDescent ($othergrammar); 3556 3557 3558 # Parse $text using rule 'startrule' (which must be 3559 # defined in $grammar): 3560 3561 $parser->startrule($text); 3562 3563 3564 # Parse $text using rule 'otherrule' (which must also 3565 # be defined in $grammar): 3566 3567 $parser->otherrule($text); 3568 3569 3570 # Change the universal token prefix pattern 3571 # before building a grammar 3572 # (the default is: '\s*'): 3573 3574 $Parse::RecDescent::skip = '[ \t]+'; 3575 3576 3577 # Replace productions of existing rules (or create new ones) 3578 # with the productions defined in $newgrammar: 3579 3580 $parser->Replace($newgrammar); 3581 3582 3583 # Extend existing rules (or create new ones) 3584 # by adding extra productions defined in $moregrammar: 3585 3586 $parser->Extend($moregrammar); 3587 3588 3589 # Global flags (useful as command line arguments under -s): 3590 3591 $::RD_ERRORS # unless undefined, report fatal errors 3592 $::RD_WARN # unless undefined, also report non-fatal problems 3593 $::RD_HINT # if defined, also suggestion remedies 3594 $::RD_TRACE # if defined, also trace parsers' behaviour 3595 $::RD_AUTOSTUB # if defined, generates "stubs" for undefined rules 3596 $::RD_AUTOACTION # if defined, appends specified action to productions 3597 3598 3599=head1 DESCRIPTION 3600 3601=head2 Overview 3602 3603Parse::RecDescent incrementally generates top-down recursive-descent text 3604parsers from simple I<yacc>-like grammar specifications. It provides: 3605 3606=over 4 3607 3608=item * 3609 3610Regular expressions or literal strings as terminals (tokens), 3611 3612=item * 3613 3614Multiple (non-contiguous) productions for any rule, 3615 3616=item * 3617 3618Repeated and optional subrules within productions, 3619 3620=item * 3621 3622Full access to Perl within actions specified as part of the grammar, 3623 3624=item * 3625 3626Simple automated error reporting during parser generation and parsing, 3627 3628=item * 3629 3630The ability to commit to, uncommit to, or reject particular 3631productions during a parse, 3632 3633=item * 3634 3635The ability to pass data up and down the parse tree ("down" via subrule 3636argument lists, "up" via subrule return values) 3637 3638=item * 3639 3640Incremental extension of the parsing grammar (even during a parse), 3641 3642=item * 3643 3644Precompilation of parser objects, 3645 3646=item * 3647 3648User-definable reduce-reduce conflict resolution via 3649"scoring" of matching productions. 3650 3651=back 3652 3653=head2 Using C<Parse::RecDescent> 3654 3655Parser objects are created by calling C<Parse::RecDescent::new>, passing in a 3656grammar specification (see the following subsections). If the grammar is 3657correct, C<new> returns a blessed reference which can then be used to initiate 3658parsing through any rule specified in the original grammar. A typical sequence 3659looks like this: 3660 3661 $grammar = q { 3662 # GRAMMAR SPECIFICATION HERE 3663 }; 3664 3665 $parser = new Parse::RecDescent ($grammar) or die "Bad grammar!\n"; 3666 3667 # acquire $text 3668 3669 defined $parser->startrule($text) or print "Bad text!\n"; 3670 3671The rule through which parsing is initiated must be explicitly defined 3672in the grammar (i.e. for the above example, the grammar must include a 3673rule of the form: "startrule: <subrules>". 3674 3675If the starting rule succeeds, its value (see below) 3676is returned. Failure to generate the original parser or failure to match a text 3677is indicated by returning C<undef>. Note that it's easy to set up grammars 3678that can succeed, but which return a value of 0, "0", or "". So don't be 3679tempted to write: 3680 3681 $parser->startrule($text) or print "Bad text!\n"; 3682 3683Normally, the parser has no effect on the original text. So in the 3684previous example the value of $text would be unchanged after having 3685been parsed. 3686 3687If, however, the text to be matched is passed by reference: 3688 3689 $parser->startrule(\$text) 3690 3691then any text which was consumed during the match will be removed from the 3692start of $text. 3693 3694 3695=head2 Rules 3696 3697In the grammar from which the parser is built, rules are specified by 3698giving an identifier (which must satisfy /[A-Za-z]\w*/), followed by a 3699colon I<on the same line>, followed by one or more productions, 3700separated by single vertical bars. The layout of the productions 3701is entirely free-format: 3702 3703 rule1: production1 3704 | production2 | 3705 production3 | production4 3706 3707At any point in the grammar previously defined rules may be extended with 3708additional productions. This is achieved by redeclaring the rule with the new 3709productions. Thus: 3710 3711 rule1: a | b | c 3712 rule2: d | e | f 3713 rule1: g | h 3714 3715is exactly equivalent to: 3716 3717 rule1: a | b | c | g | h 3718 rule2: d | e | f 3719 3720Each production in a rule consists of zero or more items, each of which 3721may be either: the name of another rule to be matched (a "subrule"), 3722a pattern or string literal to be matched directly (a "token"), a 3723block of Perl code to be executed (an "action"), a special instruction 3724to the parser (a "directive"), or a standard Perl comment (which is 3725ignored). 3726 3727A rule matches a text if one of its productions matches. A production 3728matches if each of its items match consecutive substrings of the 3729text. The productions of a rule being matched are tried in the same 3730order that they appear in the original grammar, and the first matching 3731production terminates the match attempt (successfully). If all 3732productions are tried and none matches, the match attempt fails. 3733 3734Note that this behaviour is quite different from the "prefer the longer match" 3735behaviour of I<yacc>. For example, if I<yacc> were parsing the rule: 3736 3737 seq : 'A' 'B' 3738 | 'A' 'B' 'C' 3739 3740upon matching "AB" it would look ahead to see if a 'C' is next and, if 3741so, will match the second production in preference to the first. In 3742other words, I<yacc> effectively tries all the productions of a rule 3743breadth-first in parallel, and selects the "best" match, where "best" 3744means longest (note that this is a gross simplification of the true 3745behaviour of I<yacc> but it will do for our purposes). 3746 3747In contrast, C<Parse::RecDescent> tries each production depth-first in 3748sequence, and selects the "best" match, where "best" means first. This is 3749the fundamental difference between "bottom-up" and "recursive descent" 3750parsing. 3751 3752Each successfully matched item in a production is assigned a value, 3753which can be accessed in subsequent actions within the same 3754production (or, in some cases, as the return value of a successful 3755subrule call). Unsuccessful items don't have an associated value, 3756since the failure of an item causes the entire surrounding production 3757to immediately fail. The following sections describe the various types 3758of items and their success values. 3759 3760 3761=head2 Subrules 3762 3763A subrule which appears in a production is an instruction to the parser to 3764attempt to match the named rule at that point in the text being 3765parsed. If the named subrule is not defined when requested the 3766production containing it immediately fails (unless it was "autostubbed" - see 3767L<Autostubbing>). 3768 3769A rule may (recursively) call itself as a subrule, but I<not> as the 3770left-most item in any of its productions (since such recursions are usually 3771non-terminating). 3772 3773The value associated with a subrule is the value associated with its 3774C<$return> variable (see L<"Actions"> below), or with the last successfully 3775matched item in the subrule match. 3776 3777Subrules may also be specified with a trailing repetition specifier, 3778indicating that they are to be (greedily) matched the specified number 3779of times. The available specifiers are: 3780 3781 subrule(?) # Match one-or-zero times 3782 subrule(s) # Match one-or-more times 3783 subrule(s?) # Match zero-or-more times 3784 subrule(N) # Match exactly N times for integer N > 0 3785 subrule(N..M) # Match between N and M times 3786 subrule(..M) # Match between 1 and M times 3787 subrule(N..) # Match at least N times 3788 3789Repeated subrules keep matching until either the subrule fails to 3790match, or it has matched the minimal number of times but fails to 3791consume any of the parsed text (this second condition prevents the 3792subrule matching forever in some cases). 3793 3794Since a repeated subrule may match many instances of the subrule itself, the 3795value associated with it is not a simple scalar, but rather a reference to a 3796list of scalars, each of which is the value associated with one of the 3797individual subrule matches. In other words in the rule: 3798 3799 program: statement(s) 3800 3801the value associated with the repeated subrule "statement(s)" is a reference 3802to an array containing the values matched by each call to the individual 3803subrule "statement". 3804 3805Repetition modifiers may include a separator pattern: 3806 3807 program: statement(s /;/) 3808 3809specifying some sequence of characters to be skipped between each repetition. 3810This is really just a shorthand for the E<lt>leftop:...E<gt> directive 3811(see below). 3812 3813=head2 Tokens 3814 3815If a quote-delimited string or a Perl regex appears in a production, 3816the parser attempts to match that string or pattern at that point in 3817the text. For example: 3818 3819 typedef: "typedef" typename identifier ';' 3820 3821 identifier: /[A-Za-z_][A-Za-z0-9_]*/ 3822 3823As in regular Perl, a single quoted string is uninterpolated, whilst 3824a double-quoted string or a pattern is interpolated (at the time 3825of matching, I<not> when the parser is constructed). Hence, it is 3826possible to define rules in which tokens can be set at run-time: 3827 3828 typedef: "$::typedefkeyword" typename identifier ';' 3829 3830 identifier: /$::identpat/ 3831 3832Note that, since each rule is implemented inside a special namespace 3833belonging to its parser, it is necessary to explicitly quantify 3834variables from the main package. 3835 3836Regex tokens can be specified using just slashes as delimiters 3837or with the explicit C<mE<lt>delimiterE<gt>......E<lt>delimiterE<gt>> syntax: 3838 3839 typedef: "typedef" typename identifier ';' 3840 3841 typename: /[A-Za-z_][A-Za-z0-9_]*/ 3842 3843 identifier: m{[A-Za-z_][A-Za-z0-9_]*} 3844 3845A regex of either type can also have any valid trailing parameter(s) 3846(that is, any of [cgimsox]): 3847 3848 typedef: "typedef" typename identifier ';' 3849 3850 identifier: / [a-z_] # LEADING ALPHA OR UNDERSCORE 3851 [a-z0-9_]* # THEN DIGITS ALSO ALLOWED 3852 /ix # CASE/SPACE/COMMENT INSENSITIVE 3853 3854The value associated with any successfully matched token is a string 3855containing the actual text which was matched by the token. 3856 3857It is important to remember that, since each grammar is specified in a 3858Perl string, all instances of the universal escape character '\' within 3859a grammar must be "doubled", so that they interpolate to single '\'s when 3860the string is compiled. For example, to use the grammar: 3861 3862 word: /\S+/ | backslash 3863 line: prefix word(s) "\n" 3864 backslash: '\\' 3865 3866the following code is required: 3867 3868 $parser = new Parse::RecDescent (q{ 3869 3870 word: /\\S+/ | backslash 3871 line: prefix word(s) "\\n" 3872 backslash: '\\\\' 3873 3874 }); 3875 3876=head2 Anonymous subrules 3877 3878Parentheses introduce a nested scope that is very like a call to an anonymous 3879subrule. Hence they are useful for "in-lining" subroutine calls, and other 3880kinds of grouping behaviour. For example, instead of: 3881 3882 word: /\S+/ | backslash 3883 line: prefix word(s) "\n" 3884 3885you could write: 3886 3887 line: prefix ( /\S+/ | backslash )(s) "\n" 3888 3889and get exactly the same effects. 3890 3891Parentheses are also use for collecting unrepeated alternations within a 3892single production. 3893 3894 secret_identity: "Mr" ("Incredible"|"Fantastic"|"Sheen") ", Esq." 3895 3896 3897=head2 Terminal Separators 3898 3899For the purpose of matching, each terminal in a production is considered 3900to be preceded by a "prefix" - a pattern which must be 3901matched before a token match is attempted. By default, the 3902prefix is optional whitespace (which always matches, at 3903least trivially), but this default may be reset in any production. 3904 3905The variable C<$Parse::RecDescent::skip> stores the universal 3906prefix, which is the default for all terminal matches in all parsers 3907built with C<Parse::RecDescent>. 3908 3909If you want to change the universal prefix using 3910C<$Parse::RecDescent::skip>, be careful to set it I<before> creating 3911the grammar object, because it is applied statically (when a grammar 3912is built) rather than dynamically (when the grammar is used). 3913Alternatively you can provide a global C<E<lt>skip:...E<gt>> directive 3914in your grammar before any rules (described later). 3915 3916The prefix for an individual production can be altered 3917by using the C<E<lt>skip:...E<gt>> directive (described later). 3918Setting this directive in the top-level rule is an alternative approach 3919to setting C<$Parse::RecDescent::skip> before creating the object, but 3920in this case you don't get the intended skipping behaviour if you 3921directly invoke methods different from the top-level rule. 3922 3923 3924=head2 Actions 3925 3926An action is a block of Perl code which is to be executed (as the 3927block of a C<do> statement) when the parser reaches that point in a 3928production. The action executes within a special namespace belonging to 3929the active parser, so care must be taken in correctly qualifying variable 3930names (see also L<Start-up Actions> below). 3931 3932The action is considered to succeed if the final value of the block 3933is defined (that is, if the implied C<do> statement evaluates to a 3934defined value - I<even one which would be treated as "false">). Note 3935that the value associated with a successful action is also the final 3936value in the block. 3937 3938An action will I<fail> if its last evaluated value is C<undef>. This is 3939surprisingly easy to accomplish by accident. For instance, here's an 3940infuriating case of an action that makes its production fail, but only 3941when debugging I<isn't> activated: 3942 3943 description: name rank serial_number 3944 { print "Got $item[2] $item[1] ($item[3])\n" 3945 if $::debugging 3946 } 3947 3948If C<$debugging> is false, no statement in the block is executed, so 3949the final value is C<undef>, and the entire production fails. The solution is: 3950 3951 description: name rank serial_number 3952 { print "Got $item[2] $item[1] ($item[3])\n" 3953 if $::debugging; 3954 1; 3955 } 3956 3957Within an action, a number of useful parse-time variables are 3958available in the special parser namespace (there are other variables 3959also accessible, but meddling with them will probably just break your 3960parser. As a general rule, if you avoid referring to unqualified 3961variables - especially those starting with an underscore - inside an action, 3962things should be okay): 3963 3964=over 4 3965 3966=item C<@item> and C<%item> 3967 3968The array slice C<@item[1..$#item]> stores the value associated with each item 3969(that is, each subrule, token, or action) in the current production. The 3970analogy is to C<$1>, C<$2>, etc. in a I<yacc> grammar. 3971Note that, for obvious reasons, C<@item> only contains the 3972values of items I<before> the current point in the production. 3973 3974The first element (C<$item[0]>) stores the name of the current rule 3975being matched. 3976 3977C<@item> is a standard Perl array, so it can also be indexed with negative 3978numbers, representing the number of items I<back> from the current position in 3979the parse: 3980 3981 stuff: /various/ bits 'and' pieces "then" data 'end' 3982 { print $item[-2] } # PRINTS data 3983 # (EASIER THAN: $item[6]) 3984 3985The C<%item> hash complements the <@item> array, providing named 3986access to the same item values: 3987 3988 stuff: /various/ bits 'and' pieces "then" data 'end' 3989 { print $item{data} # PRINTS data 3990 # (EVEN EASIER THAN USING @item) 3991 3992 3993The results of named subrules are stored in the hash under each 3994subrule's name (including the repetition specifier, if any), 3995whilst all other items are stored under a "named 3996positional" key that indicates their ordinal position within their item 3997type: __STRINGI<n>__, __PATTERNI<n>__, __DIRECTIVEI<n>__, __ACTIONI<n>__: 3998 3999 stuff: /various/ bits 'and' pieces "then" data 'end' { save } 4000 { print $item{__PATTERN1__}, # PRINTS 'various' 4001 $item{__STRING2__}, # PRINTS 'then' 4002 $item{__ACTION1__}, # PRINTS RETURN 4003 # VALUE OF save 4004 } 4005 4006 4007If you want proper I<named> access to patterns or literals, you need to turn 4008them into separate rules: 4009 4010 stuff: various bits 'and' pieces "then" data 'end' 4011 { print $item{various} # PRINTS various 4012 } 4013 4014 various: /various/ 4015 4016 4017The special entry C<$item{__RULE__}> stores the name of the current 4018rule (i.e. the same value as C<$item[0]>. 4019 4020The advantage of using C<%item>, instead of C<@items> is that it 4021removes the need to track items positions that may change as a grammar 4022evolves. For example, adding an interim C<E<lt>skipE<gt>> directive 4023of action can silently ruin a trailing action, by moving an C<@item> 4024element "down" the array one place. In contrast, the named entry 4025of C<%item> is unaffected by such an insertion. 4026 4027A limitation of the C<%item> hash is that it only records the I<last> 4028value of a particular subrule. For example: 4029 4030 range: '(' number '..' number )' 4031 { $return = $item{number} } 4032 4033will return only the value corresponding to the I<second> match of the 4034C<number> subrule. In other words, successive calls to a subrule 4035overwrite the corresponding entry in C<%item>. Once again, the 4036solution is to rename each subrule in its own rule: 4037 4038 range: '(' from_num '..' to_num ')' 4039 { $return = $item{from_num} } 4040 4041 from_num: number 4042 to_num: number 4043 4044 4045 4046=item C<@arg> and C<%arg> 4047 4048The array C<@arg> and the hash C<%arg> store any arguments passed to 4049the rule from some other rule (see L<Subrule argument lists>). Changes 4050to the elements of either variable do not propagate back to the calling 4051rule (data can be passed back from a subrule via the C<$return> 4052variable - see next item). 4053 4054 4055=item C<$return> 4056 4057If a value is assigned to C<$return> within an action, that value is 4058returned if the production containing the action eventually matches 4059successfully. Note that setting C<$return> I<doesn't> cause the current 4060production to succeed. It merely tells it what to return if it I<does> succeed. 4061Hence C<$return> is analogous to C<$$> in a I<yacc> grammar. 4062 4063If C<$return> is not assigned within a production, the value of the 4064last component of the production (namely: C<$item[$#item]>) is 4065returned if the production succeeds. 4066 4067 4068=item C<$commit> 4069 4070The current state of commitment to the current production (see L<"Directives"> 4071below). 4072 4073=item C<$skip> 4074 4075The current terminal prefix (see L<"Directives"> below). 4076 4077=item C<$text> 4078 4079The remaining (unparsed) text. Changes to C<$text> I<do not 4080propagate> out of unsuccessful productions, but I<do> survive 4081successful productions. Hence it is possible to dynamically alter the 4082text being parsed - for example, to provide a C<#include>-like facility: 4083 4084 hash_include: '#include' filename 4085 { $text = ::loadfile($item[2]) . $text } 4086 4087 filename: '<' /[a-z0-9._-]+/i '>' { $return = $item[2] } 4088 | '"' /[a-z0-9._-]+/i '"' { $return = $item[2] } 4089 4090 4091=item C<$thisline> and C<$prevline> 4092 4093C<$thisline> stores the current line number within the current parse 4094(starting from 1). C<$prevline> stores the line number for the last 4095character which was already successfully parsed (this will be different from 4096C<$thisline> at the end of each line). 4097 4098For efficiency, C<$thisline> and C<$prevline> are actually tied 4099hashes, and only recompute the required line number when the variable's 4100value is used. 4101 4102Assignment to C<$thisline> adjusts the line number calculator, so that 4103it believes that the current line number is the value being assigned. Note 4104that this adjustment will be reflected in all subsequent line numbers 4105calculations. 4106 4107Modifying the value of the variable C<$text> (as in the previous 4108C<hash_include> example, for instance) will confuse the line 4109counting mechanism. To prevent this, you should call 4110C<Parse::RecDescent::LineCounter::resync($thisline)> I<immediately> 4111after any assignment to the variable C<$text> (or, at least, before the 4112next attempt to use C<$thisline>). 4113 4114Note that if a production fails after assigning to or 4115resync'ing C<$thisline>, the parser's line counter mechanism will 4116usually be corrupted. 4117 4118Also see the entry for C<@itempos>. 4119 4120The line number can be set to values other than 1, by calling the start 4121rule with a second argument. For example: 4122 4123 $parser = new Parse::RecDescent ($grammar); 4124 4125 $parser->input($text, 10); # START LINE NUMBERS AT 10 4126 4127 4128=item C<$thiscolumn> and C<$prevcolumn> 4129 4130C<$thiscolumn> stores the current column number within the current line 4131being parsed (starting from 1). C<$prevcolumn> stores the column number 4132of the last character which was actually successfully parsed. Usually 4133C<$prevcolumn == $thiscolumn-1>, but not at the end of lines. 4134 4135For efficiency, C<$thiscolumn> and C<$prevcolumn> are 4136actually tied hashes, and only recompute the required column number 4137when the variable's value is used. 4138 4139Assignment to C<$thiscolumn> or C<$prevcolumn> is a fatal error. 4140 4141Modifying the value of the variable C<$text> (as in the previous 4142C<hash_include> example, for instance) may confuse the column 4143counting mechanism. 4144 4145Note that C<$thiscolumn> reports the column number I<before> any 4146whitespace that might be skipped before reading a token. Hence 4147if you wish to know where a token started (and ended) use something like this: 4148 4149 rule: token1 token2 startcol token3 endcol token4 4150 { print "token3: columns $item[3] to $item[5]"; } 4151 4152 startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP 4153 endcol: { $prevcolumn } 4154 4155Also see the entry for C<@itempos>. 4156 4157=item C<$thisoffset> and C<$prevoffset> 4158 4159C<$thisoffset> stores the offset of the current parsing position 4160within the complete text 4161being parsed (starting from 0). C<$prevoffset> stores the offset 4162of the last character which was actually successfully parsed. In all 4163cases C<$prevoffset == $thisoffset-1>. 4164 4165For efficiency, C<$thisoffset> and C<$prevoffset> are 4166actually tied hashes, and only recompute the required offset 4167when the variable's value is used. 4168 4169Assignment to C<$thisoffset> or <$prevoffset> is a fatal error. 4170 4171Modifying the value of the variable C<$text> will I<not> affect the 4172offset counting mechanism. 4173 4174Also see the entry for C<@itempos>. 4175 4176=item C<@itempos> 4177 4178The array C<@itempos> stores a hash reference corresponding to 4179each element of C<@item>. The elements of the hash provide the 4180following: 4181 4182 $itempos[$n]{offset}{from} # VALUE OF $thisoffset BEFORE $item[$n] 4183 $itempos[$n]{offset}{to} # VALUE OF $prevoffset AFTER $item[$n] 4184 $itempos[$n]{line}{from} # VALUE OF $thisline BEFORE $item[$n] 4185 $itempos[$n]{line}{to} # VALUE OF $prevline AFTER $item[$n] 4186 $itempos[$n]{column}{from} # VALUE OF $thiscolumn BEFORE $item[$n] 4187 $itempos[$n]{column}{to} # VALUE OF $prevcolumn AFTER $item[$n] 4188 4189Note that the various C<$itempos[$n]...{from}> values record the 4190appropriate value I<after> any token prefix has been skipped. 4191 4192Hence, instead of the somewhat tedious and error-prone: 4193 4194 rule: startcol token1 endcol 4195 startcol token2 endcol 4196 startcol token3 endcol 4197 { print "token1: columns $item[1] 4198 to $item[3] 4199 token2: columns $item[4] 4200 to $item[6] 4201 token3: columns $item[7] 4202 to $item[9]" } 4203 4204 startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP 4205 endcol: { $prevcolumn } 4206 4207it is possible to write: 4208 4209 rule: token1 token2 token3 4210 { print "token1: columns $itempos[1]{column}{from} 4211 to $itempos[1]{column}{to} 4212 token2: columns $itempos[2]{column}{from} 4213 to $itempos[2]{column}{to} 4214 token3: columns $itempos[3]{column}{from} 4215 to $itempos[3]{column}{to}" } 4216 4217Note however that (in the current implementation) the use of C<@itempos> 4218anywhere in a grammar implies that item positioning information is 4219collected I<everywhere> during the parse. Depending on the grammar 4220and the size of the text to be parsed, this may be prohibitively 4221expensive and the explicit use of C<$thisline>, C<$thiscolumn>, etc. may 4222be a better choice. 4223 4224 4225=item C<$thisparser> 4226 4227A reference to the S<C<Parse::RecDescent>> object through which 4228parsing was initiated. 4229 4230The value of C<$thisparser> propagates down the subrules of a parse 4231but not back up. Hence, you can invoke subrules from another parser 4232for the scope of the current rule as follows: 4233 4234 rule: subrule1 subrule2 4235 | { $thisparser = $::otherparser } <reject> 4236 | subrule3 subrule4 4237 | subrule5 4238 4239The result is that the production calls "subrule1" and "subrule2" of 4240the current parser, and the remaining productions call the named subrules 4241from C<$::otherparser>. Note, however that "Bad Things" will happen if 4242C<::otherparser> isn't a blessed reference and/or doesn't have methods 4243with the same names as the required subrules! 4244 4245=item C<$thisrule> 4246 4247A reference to the S<C<Parse::RecDescent::Rule>> object corresponding to the 4248rule currently being matched. 4249 4250=item C<$thisprod> 4251 4252A reference to the S<C<Parse::RecDescent::Production>> object 4253corresponding to the production currently being matched. 4254 4255=item C<$score> and C<$score_return> 4256 4257$score stores the best production score to date, as specified by 4258an earlier C<E<lt>score:...E<gt>> directive. $score_return stores 4259the corresponding return value for the successful production. 4260 4261See L<Scored productions>. 4262 4263=back 4264 4265B<Warning:> the parser relies on the information in the various C<this...> 4266objects in some non-obvious ways. Tinkering with the other members of 4267these objects will probably cause Bad Things to happen, unless you 4268I<really> know what you're doing. The only exception to this advice is 4269that the use of C<$this...-E<gt>{local}> is always safe. 4270 4271 4272=head2 Start-up Actions 4273 4274Any actions which appear I<before> the first rule definition in a 4275grammar are treated as "start-up" actions. Each such action is 4276stripped of its outermost brackets and then evaluated (in the parser's 4277special namespace) just before the rules of the grammar are first 4278compiled. 4279 4280The main use of start-up actions is to declare local variables within the 4281parser's special namespace: 4282 4283 { my $lastitem = '???'; } 4284 4285 list: item(s) { $return = $lastitem } 4286 4287 item: book { $lastitem = 'book'; } 4288 bell { $lastitem = 'bell'; } 4289 candle { $lastitem = 'candle'; } 4290 4291but start-up actions can be used to execute I<any> valid Perl code 4292within a parser's special namespace. 4293 4294Start-up actions can appear within a grammar extension or replacement 4295(that is, a partial grammar installed via C<Parse::RecDescent::Extend()> or 4296C<Parse::RecDescent::Replace()> - see L<Incremental Parsing>), and will be 4297executed before the new grammar is installed. Note, however, that a 4298particular start-up action is only ever executed once. 4299 4300 4301=head2 Autoactions 4302 4303It is sometimes desirable to be able to specify a default action to be 4304taken at the end of every production (for example, in order to easily 4305build a parse tree). If the variable C<$::RD_AUTOACTION> is defined 4306when C<Parse::RecDescent::new()> is called, the contents of that 4307variable are treated as a specification of an action which is to appended 4308to each production in the corresponding grammar. 4309 4310Alternatively, you can hard-code the autoaction within a grammar, using the 4311C<< <autoaction:...> >> directive. 4312 4313So, for example, to construct a simple parse tree you could write: 4314 4315 $::RD_AUTOACTION = q { [@item] }; 4316 4317 parser = Parse::RecDescent->new(q{ 4318 expression: and_expr '||' expression | and_expr 4319 and_expr: not_expr '&&' and_expr | not_expr 4320 not_expr: '!' brack_expr | brack_expr 4321 brack_expr: '(' expression ')' | identifier 4322 identifier: /[a-z]+/i 4323 }); 4324 4325or: 4326 4327 parser = Parse::RecDescent->new(q{ 4328 <autoaction: { [@item] } > 4329 4330 expression: and_expr '||' expression | and_expr 4331 and_expr: not_expr '&&' and_expr | not_expr 4332 not_expr: '!' brack_expr | brack_expr 4333 brack_expr: '(' expression ')' | identifier 4334 identifier: /[a-z]+/i 4335 }); 4336 4337Either of these is equivalent to: 4338 4339 parser = new Parse::RecDescent (q{ 4340 expression: and_expr '||' expression 4341 { [@item] } 4342 | and_expr 4343 { [@item] } 4344 4345 and_expr: not_expr '&&' and_expr 4346 { [@item] } 4347 | not_expr 4348 { [@item] } 4349 4350 not_expr: '!' brack_expr 4351 { [@item] } 4352 | brack_expr 4353 { [@item] } 4354 4355 brack_expr: '(' expression ')' 4356 { [@item] } 4357 | identifier 4358 { [@item] } 4359 4360 identifier: /[a-z]+/i 4361 { [@item] } 4362 }); 4363 4364Alternatively, we could take an object-oriented approach, use different 4365classes for each node (and also eliminating redundant intermediate nodes): 4366 4367 $::RD_AUTOACTION = q 4368 { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) }; 4369 4370 parser = Parse::RecDescent->new(q{ 4371 expression: and_expr '||' expression | and_expr 4372 and_expr: not_expr '&&' and_expr | not_expr 4373 not_expr: '!' brack_expr | brack_expr 4374 brack_expr: '(' expression ')' | identifier 4375 identifier: /[a-z]+/i 4376 }); 4377 4378or: 4379 4380 parser = Parse::RecDescent->new(q{ 4381 <autoaction: 4382 $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) 4383 > 4384 4385 expression: and_expr '||' expression | and_expr 4386 and_expr: not_expr '&&' and_expr | not_expr 4387 not_expr: '!' brack_expr | brack_expr 4388 brack_expr: '(' expression ')' | identifier 4389 identifier: /[a-z]+/i 4390 }); 4391 4392which are equivalent to: 4393 4394 parser = Parse::RecDescent->new(q{ 4395 expression: and_expr '||' expression 4396 { "expression_node"->new(@item[1..3]) } 4397 | and_expr 4398 4399 and_expr: not_expr '&&' and_expr 4400 { "and_expr_node"->new(@item[1..3]) } 4401 | not_expr 4402 4403 not_expr: '!' brack_expr 4404 { "not_expr_node"->new(@item[1..2]) } 4405 | brack_expr 4406 4407 brack_expr: '(' expression ')' 4408 { "brack_expr_node"->new(@item[1..3]) } 4409 | identifier 4410 4411 identifier: /[a-z]+/i 4412 { "identifer_node"->new(@item[1]) } 4413 }); 4414 4415Note that, if a production already ends in an action, no autoaction is appended 4416to it. For example, in this version: 4417 4418 $::RD_AUTOACTION = q 4419 { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) }; 4420 4421 parser = Parse::RecDescent->new(q{ 4422 expression: and_expr '&&' expression | and_expr 4423 and_expr: not_expr '&&' and_expr | not_expr 4424 not_expr: '!' brack_expr | brack_expr 4425 brack_expr: '(' expression ')' | identifier 4426 identifier: /[a-z]+/i 4427 { 'terminal_node'->new($item[1]) } 4428 }); 4429 4430each C<identifier> match produces a C<terminal_node> object, I<not> an 4431C<identifier_node> object. 4432 4433A level 1 warning is issued each time an "autoaction" is added to 4434some production. 4435 4436 4437=head2 Autotrees 4438 4439A commonly needed autoaction is one that builds a parse-tree. It is moderately 4440tricky to set up such an action (which must treat terminals differently from 4441non-terminals), so Parse::RecDescent simplifies the process by providing the 4442C<E<lt>autotreeE<gt>> directive. 4443 4444If this directive appears at the start of grammar, it causes 4445Parse::RecDescent to insert autoactions at the end of any rule except 4446those which already end in an action. The action inserted depends on whether 4447the production is an intermediate rule (two or more items), or a terminal 4448of the grammar (i.e. a single pattern or string item). 4449 4450So, for example, the following grammar: 4451 4452 <autotree> 4453 4454 file : command(s) 4455 command : get | set | vet 4456 get : 'get' ident ';' 4457 set : 'set' ident 'to' value ';' 4458 vet : 'check' ident 'is' value ';' 4459 ident : /\w+/ 4460 value : /\d+/ 4461 4462is equivalent to: 4463 4464 file : command(s) { bless \%item, $item[0] } 4465 command : get { bless \%item, $item[0] } 4466 | set { bless \%item, $item[0] } 4467 | vet { bless \%item, $item[0] } 4468 get : 'get' ident ';' { bless \%item, $item[0] } 4469 set : 'set' ident 'to' value ';' { bless \%item, $item[0] } 4470 vet : 'check' ident 'is' value ';' { bless \%item, $item[0] } 4471 4472 ident : /\w+/ { bless {__VALUE__=>$item[1]}, $item[0] } 4473 value : /\d+/ { bless {__VALUE__=>$item[1]}, $item[0] } 4474 4475Note that each node in the tree is blessed into a class of the same name 4476as the rule itself. This makes it easy to build object-oriented 4477processors for the parse-trees that the grammar produces. Note too that 4478the last two rules produce special objects with the single attribute 4479'__VALUE__'. This is because they consist solely of a single terminal. 4480 4481This autoaction-ed grammar would then produce a parse tree in a data 4482structure like this: 4483 4484 { 4485 file => { 4486 command => { 4487 [ get => { 4488 identifier => { __VALUE__ => 'a' }, 4489 }, 4490 set => { 4491 identifier => { __VALUE__ => 'b' }, 4492 value => { __VALUE__ => '7' }, 4493 }, 4494 vet => { 4495 identifier => { __VALUE__ => 'b' }, 4496 value => { __VALUE__ => '7' }, 4497 }, 4498 ], 4499 }, 4500 } 4501 } 4502 4503(except, of course, that each nested hash would also be blessed into 4504the appropriate class). 4505 4506You can also specify a base class for the C<E<lt>autotreeE<gt>> directive. 4507The supplied prefix will be prepended to the rule names when creating 4508tree nodes. The following are equivalent: 4509 4510 <autotree:MyBase::Class> 4511 <autotree:MyBase::Class::> 4512 4513And will produce a root node blessed into the C<MyBase::Class::file> 4514package in the example above. 4515 4516 4517=head2 Autostubbing 4518 4519Normally, if a subrule appears in some production, but no rule of that 4520name is ever defined in the grammar, the production which refers to the 4521non-existent subrule fails immediately. This typically occurs as a 4522result of misspellings, and is a sufficiently common occurrence that a 4523warning is generated for such situations. 4524 4525However, when prototyping a grammar it is sometimes useful to be 4526able to use subrules before a proper specification of them is 4527really possible. For example, a grammar might include a section like: 4528 4529 function_call: identifier '(' arg(s?) ')' 4530 4531 identifier: /[a-z]\w*/i 4532 4533where the possible format of an argument is sufficiently complex that 4534it is not worth specifying in full until the general function call 4535syntax has been debugged. In this situation it is convenient to leave 4536the real rule C<arg> undefined and just slip in a placeholder (or 4537"stub"): 4538 4539 arg: 'arg' 4540 4541so that the function call syntax can be tested with dummy input such as: 4542 4543 f0() 4544 f1(arg) 4545 f2(arg arg) 4546 f3(arg arg arg) 4547 4548et cetera. 4549 4550Early in prototyping, many such "stubs" may be required, so 4551C<Parse::RecDescent> provides a means of automating their definition. 4552If the variable C<$::RD_AUTOSTUB> is defined when a parser is built, a 4553subrule reference to any non-existent rule (say, C<subrule>), will 4554cause a "stub" rule to be automatically defined in the generated 4555parser. If C<$::RD_AUTOSTUB eq '1'> or is false, a stub rule of the 4556form: 4557 4558 subrule: 'subrule' 4559 4560will be generated. The special-case for a value of C<'1'> is to allow 4561the use of the B<perl -s> with B<-RD_AUTOSTUB> without generating 4562C<subrule: '1'> per below. If C<$::RD_AUTOSTUB> is true, a stub rule 4563of the form: 4564 4565 subrule: $::RD_AUTOSTUB 4566 4567will be generated. C<$::RD_AUTOSTUB> must contain a valid production 4568item, no checking is performed. No lazy evaluation of 4569C<$::RD_AUTOSTUB> is performed, it is evaluated at the time the Parser 4570is generated. 4571 4572Hence, with C<$::RD_AUTOSTUB> defined, it is possible to only 4573partially specify a grammar, and then "fake" matches of the 4574unspecified (sub)rules by just typing in their name, or a literal 4575value that was assigned to C<$::RD_AUTOSTUB>. 4576 4577 4578 4579=head2 Look-ahead 4580 4581If a subrule, token, or action is prefixed by "...", then it is 4582treated as a "look-ahead" request. That means that the current production can 4583(as usual) only succeed if the specified item is matched, but that the matching 4584I<does not consume any of the text being parsed>. This is very similar to the 4585C</(?=...)/> look-ahead construct in Perl patterns. Thus, the rule: 4586 4587 inner_word: word ...word 4588 4589will match whatever the subrule "word" matches, provided that match is followed 4590by some more text which subrule "word" would also match (although this 4591second substring is not actually consumed by "inner_word") 4592 4593Likewise, a "...!" prefix, causes the following item to succeed (without 4594consuming any text) if and only if it would normally fail. Hence, a 4595rule such as: 4596 4597 identifier: ...!keyword ...!'_' /[A-Za-z_]\w*/ 4598 4599matches a string of characters which satisfies the pattern 4600C</[A-Za-z_]\w*/>, but only if the same sequence of characters would 4601not match either subrule "keyword" or the literal token '_'. 4602 4603Sequences of look-ahead prefixes accumulate, multiplying their positive and/or 4604negative senses. Hence: 4605 4606 inner_word: word ...!......!word 4607 4608is exactly equivalent to the original example above (a warning is issued in 4609cases like these, since they often indicate something left out, or 4610misunderstood). 4611 4612Note that actions can also be treated as look-aheads. In such cases, 4613the state of the parser text (in the local variable C<$text>) 4614I<after> the look-ahead action is guaranteed to be identical to its 4615state I<before> the action, regardless of how it's changed I<within> 4616the action (unless you actually undefine C<$text>, in which case you 4617get the disaster you deserve :-). 4618 4619 4620=head2 Directives 4621 4622Directives are special pre-defined actions which may be used to alter 4623the behaviour of the parser. There are currently twenty-three directives: 4624C<E<lt>commitE<gt>>, 4625C<E<lt>uncommitE<gt>>, 4626C<E<lt>rejectE<gt>>, 4627C<E<lt>scoreE<gt>>, 4628C<E<lt>autoscoreE<gt>>, 4629C<E<lt>skipE<gt>>, 4630C<E<lt>resyncE<gt>>, 4631C<E<lt>errorE<gt>>, 4632C<E<lt>warnE<gt>>, 4633C<E<lt>hintE<gt>>, 4634C<E<lt>trace_buildE<gt>>, 4635C<E<lt>trace_parseE<gt>>, 4636C<E<lt>nocheckE<gt>>, 4637C<E<lt>rulevarE<gt>>, 4638C<E<lt>matchruleE<gt>>, 4639C<E<lt>leftopE<gt>>, 4640C<E<lt>rightopE<gt>>, 4641C<E<lt>deferE<gt>>, 4642C<E<lt>nocheckE<gt>>, 4643C<E<lt>perl_quotelikeE<gt>>, 4644C<E<lt>perl_codeblockE<gt>>, 4645C<E<lt>perl_variableE<gt>>, 4646and C<E<lt>tokenE<gt>>. 4647 4648=over 4 4649 4650=item Committing and uncommitting 4651 4652The C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives permit the recursive 4653descent of the parse tree to be pruned (or "cut") for efficiency. 4654Within a rule, a C<E<lt>commitE<gt>> directive instructs the rule to ignore subsequent 4655productions if the current production fails. For example: 4656 4657 command: 'find' <commit> filename 4658 | 'open' <commit> filename 4659 | 'move' filename filename 4660 4661Clearly, if the leading token 'find' is matched in the first production but that 4662production fails for some other reason, then the remaining 4663productions cannot possibly match. The presence of the 4664C<E<lt>commitE<gt>> causes the "command" rule to fail immediately if 4665an invalid "find" command is found, and likewise if an invalid "open" 4666command is encountered. 4667 4668It is also possible to revoke a previous commitment. For example: 4669 4670 if_statement: 'if' <commit> condition 4671 'then' block <uncommit> 4672 'else' block 4673 | 'if' <commit> condition 4674 'then' block 4675 4676In this case, a failure to find an "else" block in the first 4677production shouldn't preclude trying the second production, but a 4678failure to find a "condition" certainly should. 4679 4680As a special case, any production in which the I<first> item is an 4681C<E<lt>uncommitE<gt>> immediately revokes a preceding C<E<lt>commitE<gt>> 4682(even though the production would not otherwise have been tried). For 4683example, in the rule: 4684 4685 request: 'explain' expression 4686 | 'explain' <commit> keyword 4687 | 'save' 4688 | 'quit' 4689 | <uncommit> term '?' 4690 4691if the text being matched was "explain?", and the first two 4692productions failed, then the C<E<lt>commitE<gt>> in production two would cause 4693productions three and four to be skipped, but the leading 4694C<E<lt>uncommitE<gt>> in the production five would allow that production to 4695attempt a match. 4696 4697Note in the preceding example, that the C<E<lt>commitE<gt>> was only placed 4698in production two. If production one had been: 4699 4700 request: 'explain' <commit> expression 4701 4702then production two would be (inappropriately) skipped if a leading 4703"explain..." was encountered. 4704 4705Both C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives always succeed, and their value 4706is always 1. 4707 4708 4709=item Rejecting a production 4710 4711The C<E<lt>rejectE<gt>> directive immediately causes the current production 4712to fail (it is exactly equivalent to, but more obvious than, the 4713action C<{undef}>). A C<E<lt>rejectE<gt>> is useful when it is desirable to get 4714the side effects of the actions in one production, without prejudicing a match 4715by some other production later in the rule. For example, to insert 4716tracing code into the parse: 4717 4718 complex_rule: { print "In complex rule...\n"; } <reject> 4719 4720 complex_rule: simple_rule '+' 'i' '*' simple_rule 4721 | 'i' '*' simple_rule 4722 | simple_rule 4723 4724 4725It is also possible to specify a conditional rejection, using the 4726form C<E<lt>reject:I<condition>E<gt>>, which only rejects if the 4727specified condition is true. This form of rejection is exactly 4728equivalent to the action C<{(I<condition>)?undef:1}E<gt>>. 4729For example: 4730 4731 command: save_command 4732 | restore_command 4733 | <reject: defined $::tolerant> { exit } 4734 | <error: Unknown command. Ignored.> 4735 4736A C<E<lt>rejectE<gt>> directive never succeeds (and hence has no 4737associated value). A conditional rejection may succeed (if its 4738condition is not satisfied), in which case its value is 1. 4739 4740As an extra optimization, C<Parse::RecDescent> ignores any production 4741which I<begins> with an unconditional C<E<lt>rejectE<gt>> directive, 4742since any such production can never successfully match or have any 4743useful side-effects. A level 1 warning is issued in all such cases. 4744 4745Note that productions beginning with conditional 4746C<E<lt>reject:...E<gt>> directives are I<never> "optimized away" in 4747this manner, even if they are always guaranteed to fail (for example: 4748C<E<lt>reject:1E<gt>>) 4749 4750Due to the way grammars are parsed, there is a minor restriction on the 4751condition of a conditional C<E<lt>reject:...E<gt>>: it cannot 4752contain any raw '<' or '>' characters. For example: 4753 4754 line: cmd <reject: $thiscolumn > max> data 4755 4756results in an error when a parser is built from this grammar (since the 4757grammar parser has no way of knowing whether the first > is a "less than" 4758or the end of the C<E<lt>reject:...E<gt>>. 4759 4760To overcome this problem, put the condition inside a do{} block: 4761 4762 line: cmd <reject: do{$thiscolumn > max}> data 4763 4764Note that the same problem may occur in other directives that take 4765arguments. The same solution will work in all cases. 4766 4767 4768=item Skipping between terminals 4769 4770The C<E<lt>skipE<gt>> directive enables the terminal prefix used in 4771a production to be changed. For example: 4772 4773 OneLiner: Command <skip:'[ \t]*'> Arg(s) /;/ 4774 4775causes only blanks and tabs to be skipped before terminals in the 4776C<Arg> subrule (and any of I<its> subrules>, and also before the final 4777C</;/> terminal. Once the production is complete, the previous 4778terminal prefix is reinstated. Note that this implies that distinct 4779productions of a rule must reset their terminal prefixes individually. 4780 4781The C<E<lt>skipE<gt>> directive evaluates to the I<previous> terminal 4782prefix, so it's easy to reinstate a prefix later in a production: 4783 4784 Command: <skip:","> CSV(s) <skip:$item[1]> Modifier 4785 4786The value specified after the colon is interpolated into a pattern, so 4787all of the following are equivalent (though their efficiency increases 4788down the list): 4789 4790 <skip: "$colon|$comma"> # ASSUMING THE VARS HOLD THE OBVIOUS VALUES 4791 4792 <skip: ':|,'> 4793 4794 <skip: q{[:,]}> 4795 4796 <skip: qr/[:,]/> 4797 4798There is no way of directly setting the prefix for 4799an entire rule, except as follows: 4800 4801 Rule: <skip: '[ \t]*'> Prod1 4802 | <skip: '[ \t]*'> Prod2a Prod2b 4803 | <skip: '[ \t]*'> Prod3 4804 4805or, better: 4806 4807 Rule: <skip: '[ \t]*'> 4808 ( 4809 Prod1 4810 | Prod2a Prod2b 4811 | Prod3 4812 ) 4813 4814The skip pattern is passed down to subrules, so setting the skip for 4815the top-level rule as described above actually sets the prefix for the 4816entire grammar (provided that you only call the method corresponding 4817to the top-level rule itself). Alternatively, or if you have more than 4818one top-level rule in your grammar, you can provide a global 4819C<E<lt>skipE<gt>> directive prior to defining any rules in the 4820grammar. These are the preferred alternatives to setting 4821C<$Parse::RecDescent::skip>. 4822 4823Additionally, using C<E<lt>skipE<gt>> actually allows you to have 4824a completely dynamic skipping behaviour. For example: 4825 4826 Rule_with_dynamic_skip: <skip: $::skip_pattern> Rule 4827 4828Then you can set C<$::skip_pattern> before invoking 4829C<Rule_with_dynamic_skip> and have it skip whatever you specified. 4830 4831B<Note: Up to release 1.51 of Parse::RecDescent, an entirely different 4832mechanism was used for specifying terminal prefixes. The current 4833method is not backwards-compatible with that early approach. The 4834current approach is stable and will not change again.> 4835 4836B<Note: the global C<E<lt>skipE<gt>> directive added in 1.967_004 did 4837not interpolate the pattern argument, instead the pattern was placed 4838inside of single quotes and then interpolated. This behavior was 4839changed in 1.967_010 so that all C<E<lt>skipE<gt>> directives behavior 4840similarly.> 4841 4842=item Resynchronization 4843 4844The C<E<lt>resyncE<gt>> directive provides a visually distinctive 4845means of consuming some of the text being parsed, usually to skip an 4846erroneous input. In its simplest form C<E<lt>resyncE<gt>> simply 4847consumes text up to and including the next newline (C<"\n">) 4848character, succeeding only if the newline is found, in which case it 4849causes its surrounding rule to return zero on success. 4850 4851In other words, a C<E<lt>resyncE<gt>> is exactly equivalent to the token 4852C</[^\n]*\n/> followed by the action S<C<{ $return = 0 }>> (except that 4853productions beginning with a C<E<lt>resyncE<gt>> are ignored when generating 4854error messages). A typical use might be: 4855 4856 script : command(s) 4857 4858 command: save_command 4859 | restore_command 4860 | <resync> # TRY NEXT LINE, IF POSSIBLE 4861 4862It is also possible to explicitly specify a resynchronization 4863pattern, using the C<E<lt>resync:I<pattern>E<gt>> variant. This version 4864succeeds only if the specified pattern matches (and consumes) the 4865parsed text. In other words, C<E<lt>resync:I<pattern>E<gt>> is exactly 4866equivalent to the token C</I<pattern>/> (followed by a S<C<{ $return = 0 }>> 4867action). For example, if commands were terminated by newlines or semi-colons: 4868 4869 command: save_command 4870 | restore_command 4871 | <resync:[^;\n]*[;\n]> 4872 4873The value of a successfully matched C<E<lt>resyncE<gt>> directive (of either 4874type) is the text that it consumed. Note, however, that since the 4875directive also sets C<$return>, a production consisting of a lone 4876C<E<lt>resyncE<gt>> succeeds but returns the value zero (which a calling rule 4877may find useful to distinguish between "true" matches and "tolerant" matches). 4878Remember that returning a zero value indicates that the rule I<succeeded> (since 4879only an C<undef> denotes failure within C<Parse::RecDescent> parsers. 4880 4881 4882=item Error handling 4883 4884The C<E<lt>errorE<gt>> directive provides automatic or user-defined 4885generation of error messages during a parse. In its simplest form 4886C<E<lt>errorE<gt>> prepares an error message based on 4887the mismatch between the last item expected and the text which cause 4888it to fail. For example, given the rule: 4889 4890 McCoy: curse ',' name ', I'm a doctor, not a' a_profession '!' 4891 | pronoun 'dead,' name '!' 4892 | <error> 4893 4894the following strings would produce the following messages: 4895 4896=over 4 4897 4898=item "Amen, Jim!" 4899 4900 ERROR (line 1): Invalid McCoy: Expected curse or pronoun 4901 not found 4902 4903=item "Dammit, Jim, I'm a doctor!" 4904 4905 ERROR (line 1): Invalid McCoy: Expected ", I'm a doctor, not a" 4906 but found ", I'm a doctor!" instead 4907 4908=item "He's dead,\n" 4909 4910 ERROR (line 2): Invalid McCoy: Expected name not found 4911 4912=item "He's alive!" 4913 4914 ERROR (line 1): Invalid McCoy: Expected 'dead,' but found 4915 "alive!" instead 4916 4917=item "Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!" 4918 4919 ERROR (line 1): Invalid McCoy: Expected a profession but found 4920 "pointy-eared Vulcan!" instead 4921 4922 4923=back 4924 4925Note that, when autogenerating error messages, all underscores in any 4926rule name used in a message are replaced by single spaces (for example 4927"a_production" becomes "a production"). Judicious choice of rule 4928names can therefore considerably improve the readability of automatic 4929error messages (as well as the maintainability of the original 4930grammar). 4931 4932If the automatically generated error is not sufficient, it is possible to 4933provide an explicit message as part of the error directive. For example: 4934 4935 Spock: "Fascinating ',' (name | 'Captain') '.' 4936 | "Highly illogical, doctor." 4937 | <error: He never said that!> 4938 4939which would result in I<all> failures to parse a "Spock" subrule printing the 4940following message: 4941 4942 ERROR (line <N>): Invalid Spock: He never said that! 4943 4944The error message is treated as a "qq{...}" string and interpolated 4945when the error is generated (I<not> when the directive is specified!). 4946Hence: 4947 4948 <error: Mystical error near "$text"> 4949 4950would correctly insert the ambient text string which caused the error. 4951 4952There are two other forms of error directive: C<E<lt>error?E<gt>> and 4953S<C<E<lt>error?: msgE<gt>>>. These behave just like C<E<lt>errorE<gt>> 4954and S<C<E<lt>error: msgE<gt>>> respectively, except that they are 4955only triggered if the rule is "committed" at the time they are 4956encountered. For example: 4957 4958 Scotty: "Ya kenna change the Laws of Phusics," <commit> name 4959 | name <commit> ',' 'she's goanta blaw!' 4960 | <error?> 4961 4962will only generate an error for a string beginning with "Ya kenna 4963change the Laws o' Phusics," or a valid name, but which still fails to match the 4964corresponding production. That is, C<$parser-E<gt>Scotty("Aye, Cap'ain")> will 4965fail silently (since neither production will "commit" the rule on that 4966input), whereas S<C<$parser-E<gt>Scotty("Mr Spock, ah jest kenna do'ut!")>> 4967will fail with the error message: 4968 4969 ERROR (line 1): Invalid Scotty: expected 'she's goanta blaw!' 4970 but found 'I jest kenna do'ut!' instead. 4971 4972since in that case the second production would commit after matching 4973the leading name. 4974 4975Note that to allow this behaviour, all C<E<lt>errorE<gt>> directives which are 4976the first item in a production automatically uncommit the rule just 4977long enough to allow their production to be attempted (that is, when 4978their production fails, the commitment is reinstated so that 4979subsequent productions are skipped). 4980 4981In order to I<permanently> uncommit the rule before an error message, 4982it is necessary to put an explicit C<E<lt>uncommitE<gt>> before the 4983C<E<lt>errorE<gt>>. For example: 4984 4985 line: 'Kirk:' <commit> Kirk 4986 | 'Spock:' <commit> Spock 4987 | 'McCoy:' <commit> McCoy 4988 | <uncommit> <error?> <reject> 4989 | <resync> 4990 4991 4992Error messages generated by the various C<E<lt>error...E<gt>> directives 4993are not displayed immediately. Instead, they are "queued" in a buffer and 4994are only displayed once parsing ultimately fails. Moreover, 4995C<E<lt>error...E<gt>> directives that cause one production of a rule 4996to fail are automatically removed from the message queue 4997if another production subsequently causes the entire rule to succeed. 4998This means that you can put 4999C<E<lt>error...E<gt>> directives wherever useful diagnosis can be done, 5000and only those associated with actual parser failure will ever be 5001displayed. Also see L<"GOTCHAS">. 5002 5003As a general rule, the most useful diagnostics are usually generated 5004either at the very lowest level within the grammar, or at the very 5005highest. A good rule of thumb is to identify those subrules which 5006consist mainly (or entirely) of terminals, and then put an 5007C<E<lt>error...E<gt>> directive at the end of any other rule which calls 5008one or more of those subrules. 5009 5010There is one other situation in which the output of the various types of 5011error directive is suppressed; namely, when the rule containing them 5012is being parsed as part of a "look-ahead" (see L<"Look-ahead">). In this 5013case, the error directive will still cause the rule to fail, but will do 5014so silently. 5015 5016An unconditional C<E<lt>errorE<gt>> directive always fails (and hence has no 5017associated value). This means that encountering such a directive 5018always causes the production containing it to fail. Hence an 5019C<E<lt>errorE<gt>> directive will inevitably be the last (useful) item of a 5020rule (a level 3 warning is issued if a production contains items after an unconditional 5021C<E<lt>errorE<gt>> directive). 5022 5023An C<E<lt>error?E<gt>> directive will I<succeed> (that is: fail to fail :-), if 5024the current rule is uncommitted when the directive is encountered. In 5025that case the directive's associated value is zero. Hence, this type 5026of error directive I<can> be used before the end of a 5027production. For example: 5028 5029 command: 'do' <commit> something 5030 | 'report' <commit> something 5031 | <error?: Syntax error> <error: Unknown command> 5032 5033 5034B<Warning:> The C<E<lt>error?E<gt>> directive does I<not> mean "always fail (but 5035do so silently unless committed)". It actually means "only fail (and report) if 5036committed, otherwise I<succeed>". To achieve the "fail silently if uncommitted" 5037semantics, it is necessary to use: 5038 5039 rule: item <commit> item(s) 5040 | <error?> <reject> # FAIL SILENTLY UNLESS COMMITTED 5041 5042However, because people seem to expect a lone C<E<lt>error?E<gt>> directive 5043to work like this: 5044 5045 rule: item <commit> item(s) 5046 | <error?: Error message if committed> 5047 | <error: Error message if uncommitted> 5048 5049Parse::RecDescent automatically appends a 5050C<E<lt>rejectE<gt>> directive if the C<E<lt>error?E<gt>> directive 5051is the only item in a production. A level 2 warning (see below) 5052is issued when this happens. 5053 5054The level of error reporting during both parser construction and 5055parsing is controlled by the presence or absence of four global 5056variables: C<$::RD_ERRORS>, C<$::RD_WARN>, C<$::RD_HINT>, and 5057<$::RD_TRACE>. If C<$::RD_ERRORS> is defined (and, by default, it is) 5058then fatal errors are reported. 5059 5060Whenever C<$::RD_WARN> is defined, certain non-fatal problems are also reported. 5061 5062Warnings have an associated "level": 1, 2, or 3. The higher the level, 5063the more serious the warning. The value of the corresponding global 5064variable (C<$::RD_WARN>) determines the I<lowest> level of warning to 5065be displayed. Hence, to see I<all> warnings, set C<$::RD_WARN> to 1. 5066To see only the most serious warnings set C<$::RD_WARN> to 3. 5067By default C<$::RD_WARN> is initialized to 3, ensuring that serious but 5068non-fatal errors are automatically reported. 5069 5070There is also a grammar directive to turn on warnings from within the 5071grammar: C<< <warn> >>. It takes an optional argument, which specifies 5072the warning level: C<< <warn: 2> >>. 5073 5074See F<"DIAGNOSTICS"> for a list of the various error and warning messages 5075that Parse::RecDescent generates when these two variables are defined. 5076 5077Defining any of the remaining variables (which are not defined by 5078default) further increases the amount of information reported. 5079Defining C<$::RD_HINT> causes the parser generator to offer 5080more detailed analyses and hints on both errors and warnings. 5081Note that setting C<$::RD_HINT> at any point automagically 5082sets C<$::RD_WARN> to 1. There is also a C<< <hint> >> directive, which can 5083be hard-coded into a grammar. 5084 5085Defining C<$::RD_TRACE> causes the parser generator and the parser to 5086report their progress to STDERR in excruciating detail (although, without hints 5087unless $::RD_HINT is separately defined). This detail 5088can be moderated in only one respect: if C<$::RD_TRACE> has an 5089integer value (I<N>) greater than 1, only the I<N> characters of 5090the "current parsing context" (that is, where in the input string we 5091are at any point in the parse) is reported at any time. 5092 5093C<$::RD_TRACE> is mainly useful for debugging a grammar that isn't 5094behaving as you expected it to. To this end, if C<$::RD_TRACE> is 5095defined when a parser is built, any actual parser code which is 5096generated is also written to a file named "RD_TRACE" in the local 5097directory. 5098 5099There are two directives associated with the C<$::RD_TRACE> variable. 5100If a grammar contains a C<< <trace_build> >> directive anywhere in its 5101specification, C<$::RD_TRACE> is turned on during the parser construction 5102phase. If a grammar contains a C<< <trace_parse> >> directive anywhere in its 5103specification, C<$::RD_TRACE> is turned on during any parse the parser 5104performs. 5105 5106Note that the four variables belong to the "main" package, which 5107makes them easier to refer to in the code controlling the parser, and 5108also makes it easy to turn them into command line flags ("-RD_ERRORS", 5109"-RD_WARN", "-RD_HINT", "-RD_TRACE") under B<perl -s>. 5110 5111The corresponding directives are useful to "hardwire" the various 5112debugging features into a particular grammar (rather than having to set 5113and reset external variables). 5114 5115=item Redirecting diagnostics 5116 5117The diagnostics provided by the tracing mechanism always go to STDERR. 5118If you need them to go elsewhere, localize and reopen STDERR prior to the 5119parse. 5120 5121For example: 5122 5123 { 5124 local *STDERR = IO::File->new(">$filename") or die $!; 5125 5126 my $result = $parser->startrule($text); 5127 } 5128 5129 5130=item Consistency checks 5131 5132Whenever a parser is build, Parse::RecDescent carries out a number of 5133(potentially expensive) consistency checks. These include: verifying that the 5134grammar is not left-recursive and that no rules have been left undefined. 5135 5136These checks are important safeguards during development, but unnecessary 5137overheads when the grammar is stable and ready to be deployed. So 5138Parse::RecDescent provides a directive to disable them: C<< <nocheck> >>. 5139 5140If a grammar contains a C<< <nocheck> >> directive anywhere in its 5141specification, the extra compile-time checks are by-passed. 5142 5143 5144=item Specifying local variables 5145 5146It is occasionally convenient to specify variables which are local 5147to a single rule. This may be achieved by including a 5148C<E<lt>rulevar:...E<gt>> directive anywhere in the rule. For example: 5149 5150 markup: <rulevar: $tag> 5151 5152 markup: tag {($tag=$item[1]) =~ s/^<|>$//g} body[$tag] 5153 5154The example C<E<lt>rulevar: $tagE<gt>> directive causes a "my" variable named 5155C<$tag> to be declared at the start of the subroutine implementing the 5156C<markup> rule (that is, I<before> the first production, regardless of 5157where in the rule it is specified). 5158 5159Specifically, any directive of the form: 5160C<E<lt>rulevar:I<text>E<gt>> causes a line of the form C<my I<text>;> 5161to be added at the beginning of the rule subroutine, immediately after 5162the definitions of the following local variables: 5163 5164 $thisparser $commit 5165 $thisrule @item 5166 $thisline @arg 5167 $text %arg 5168 5169This means that the following C<E<lt>rulevarE<gt>> directives work 5170as expected: 5171 5172 <rulevar: $count = 0 > 5173 5174 <rulevar: $firstarg = $arg[0] || '' > 5175 5176 <rulevar: $myItems = \@item > 5177 5178 <rulevar: @context = ( $thisline, $text, @arg ) > 5179 5180 <rulevar: ($name,$age) = $arg{"name","age"} > 5181 5182If a variable that is also visible to subrules is required, it needs 5183to be C<local>'d, not C<my>'d. C<rulevar> defaults to C<my>, but if C<local> 5184is explicitly specified: 5185 5186 <rulevar: local $count = 0 > 5187 5188then a C<local>-ized variable is declared instead, and will be available 5189within subrules. 5190 5191Note however that, because all such variables are "my" variables, their 5192values I<do not persist> between match attempts on a given rule. To 5193preserve values between match attempts, values can be stored within the 5194"local" member of the C<$thisrule> object: 5195 5196 countedrule: { $thisrule->{"local"}{"count"}++ } 5197 <reject> 5198 | subrule1 5199 | subrule2 5200 | <reject: $thisrule->{"local"}{"count"} == 1> 5201 subrule3 5202 5203 5204When matching a rule, each C<E<lt>rulevarE<gt>> directive is matched as 5205if it were an unconditional C<E<lt>rejectE<gt>> directive (that is, it 5206causes any production in which it appears to immediately fail to match). 5207For this reason (and to improve readability) it is usual to specify any 5208C<E<lt>rulevarE<gt>> directive in a separate production at the start of 5209the rule (this has the added advantage that it enables 5210C<Parse::RecDescent> to optimize away such productions, just as it does 5211for the C<E<lt>rejectE<gt>> directive). 5212 5213 5214=item Dynamically matched rules 5215 5216Because regexes and double-quoted strings are interpolated, it is relatively 5217easy to specify productions with "context sensitive" tokens. For example: 5218 5219 command: keyword body "end $item[1]" 5220 5221which ensures that a command block is bounded by a 5222"I<E<lt>keywordE<gt>>...end I<E<lt>same keywordE<gt>>" pair. 5223 5224Building productions in which subrules are context sensitive is also possible, 5225via the C<E<lt>matchrule:...E<gt>> directive. This directive behaves 5226identically to a subrule item, except that the rule which is invoked to match 5227it is determined by the string specified after the colon. For example, we could 5228rewrite the C<command> rule like this: 5229 5230 command: keyword <matchrule:body> "end $item[1]" 5231 5232Whatever appears after the colon in the directive is treated as an interpolated 5233string (that is, as if it appeared in C<qq{...}> operator) and the value of 5234that interpolated string is the name of the subrule to be matched. 5235 5236Of course, just putting a constant string like C<body> in a 5237C<E<lt>matchrule:...E<gt>> directive is of little interest or benefit. 5238The power of directive is seen when we use a string that interpolates 5239to something interesting. For example: 5240 5241 command: keyword <matchrule:$item[1]_body> "end $item[1]" 5242 5243 keyword: 'while' | 'if' | 'function' 5244 5245 while_body: condition block 5246 5247 if_body: condition block ('else' block)(?) 5248 5249 function_body: arglist block 5250 5251Now the C<command> rule selects how to proceed on the basis of the keyword 5252that is found. It is as if C<command> were declared: 5253 5254 command: 'while' while_body "end while" 5255 | 'if' if_body "end if" 5256 | 'function' function_body "end function" 5257 5258 5259When a C<E<lt>matchrule:...E<gt>> directive is used as a repeated 5260subrule, the rule name expression is "late-bound". That is, the name of 5261the rule to be called is re-evaluated I<each time> a match attempt is 5262made. Hence, the following grammar: 5263 5264 { $::species = 'dogs' } 5265 5266 pair: 'two' <matchrule:$::species>(s) 5267 5268 dogs: /dogs/ { $::species = 'cats' } 5269 5270 cats: /cats/ 5271 5272will match the string "two dogs cats cats" completely, whereas it will 5273only match the string "two dogs dogs dogs" up to the eighth letter. If 5274the rule name were "early bound" (that is, evaluated only the first 5275time the directive is encountered in a production), the reverse 5276behaviour would be expected. 5277 5278Note that the C<matchrule> directive takes a string that is to be treated 5279as a rule name, I<not> as a rule invocation. That is, 5280it's like a Perl symbolic reference, not an C<eval>. Just as you can say: 5281 5282 $subname = 'foo'; 5283 5284 # and later... 5285 5286 &{$foo}(@args); 5287 5288but not: 5289 5290 $subname = 'foo(@args)'; 5291 5292 # and later... 5293 5294 &{$foo}; 5295 5296likewise you can say: 5297 5298 $rulename = 'foo'; 5299 5300 # and in the grammar... 5301 5302 <matchrule:$rulename>[@args] 5303 5304but not: 5305 5306 $rulename = 'foo[@args]'; 5307 5308 # and in the grammar... 5309 5310 <matchrule:$rulename> 5311 5312 5313=item Deferred actions 5314 5315The C<E<lt>defer:...E<gt>> directive is used to specify an action to be 5316performed when (and only if!) the current production ultimately succeeds. 5317 5318Whenever a C<E<lt>defer:...E<gt>> directive appears, the code it specifies 5319is converted to a closure (an anonymous subroutine reference) which is 5320queued within the active parser object. Note that, 5321because the deferred code is converted to a closure, the values of any 5322"local" variable (such as C<$text>, <@item>, etc.) are preserved 5323until the deferred code is actually executed. 5324 5325If the parse ultimately succeeds 5326I<and> the production in which the C<E<lt>defer:...E<gt>> directive was 5327evaluated formed part of the successful parse, then the deferred code is 5328executed immediately before the parse returns. If however the production 5329which queued a deferred action fails, or one of the higher-level 5330rules which called that production fails, then the deferred action is 5331removed from the queue, and hence is never executed. 5332 5333For example, given the grammar: 5334 5335 sentence: noun trans noun 5336 | noun intrans 5337 5338 noun: 'the dog' 5339 { print "$item[1]\t(noun)\n" } 5340 | 'the meat' 5341 { print "$item[1]\t(noun)\n" } 5342 5343 trans: 'ate' 5344 { print "$item[1]\t(transitive)\n" } 5345 5346 intrans: 'ate' 5347 { print "$item[1]\t(intransitive)\n" } 5348 | 'barked' 5349 { print "$item[1]\t(intransitive)\n" } 5350 5351then parsing the sentence C<"the dog ate"> would produce the output: 5352 5353 the dog (noun) 5354 ate (transitive) 5355 the dog (noun) 5356 ate (intransitive) 5357 5358This is because, even though the first production of C<sentence> 5359ultimately fails, its initial subrules C<noun> and C<trans> do match, 5360and hence they execute their associated actions. 5361Then the second production of C<sentence> succeeds, causing the 5362actions of the subrules C<noun> and C<intrans> to be executed as well. 5363 5364On the other hand, if the actions were replaced by C<E<lt>defer:...E<gt>> 5365directives: 5366 5367 sentence: noun trans noun 5368 | noun intrans 5369 5370 noun: 'the dog' 5371 <defer: print "$item[1]\t(noun)\n" > 5372 | 'the meat' 5373 <defer: print "$item[1]\t(noun)\n" > 5374 5375 trans: 'ate' 5376 <defer: print "$item[1]\t(transitive)\n" > 5377 5378 intrans: 'ate' 5379 <defer: print "$item[1]\t(intransitive)\n" > 5380 | 'barked' 5381 <defer: print "$item[1]\t(intransitive)\n" > 5382 5383the output would be: 5384 5385 the dog (noun) 5386 ate (intransitive) 5387 5388since deferred actions are only executed if they were evaluated in 5389a production which ultimately contributes to the successful parse. 5390 5391In this case, even though the first production of C<sentence> caused 5392the subrules C<noun> and C<trans> to match, that production ultimately 5393failed and so the deferred actions queued by those subrules were subsequently 5394discarded. The second production then succeeded, causing the entire 5395parse to succeed, and so the deferred actions queued by the (second) match of 5396the C<noun> subrule and the subsequent match of C<intrans> I<are> preserved and 5397eventually executed. 5398 5399Deferred actions provide a means of improving the performance of a parser, 5400by only executing those actions which are part of the final parse-tree 5401for the input data. 5402 5403Alternatively, deferred actions can be viewed as a mechanism for building 5404(and executing) a 5405customized subroutine corresponding to the given input data, much in the 5406same way that autoactions (see L<"Autoactions">) can be used to build a 5407customized data structure for specific input. 5408 5409Whether or not the action it specifies is ever executed, 5410a C<E<lt>defer:...E<gt>> directive always succeeds, returning the 5411number of deferred actions currently queued at that point. 5412 5413 5414=item Parsing Perl 5415 5416Parse::RecDescent provides limited support for parsing subsets of Perl, 5417namely: quote-like operators, Perl variables, and complete code blocks. 5418 5419The C<E<lt>perl_quotelikeE<gt>> directive can be used to parse any Perl 5420quote-like operator: C<'a string'>, C<m/a pattern/>, C<tr{ans}{lation}>, 5421etc. It does this by calling Text::Balanced::quotelike(). 5422 5423If a quote-like operator is found, a reference to an array of eight elements 5424is returned. Those elements are identical to the last eight elements returned 5425by Text::Balanced::extract_quotelike() in an array context, namely: 5426 5427=over 4 5428 5429=item [0] 5430 5431the name of the quotelike operator -- 'q', 'qq', 'm', 's', 'tr' -- if the 5432operator was named; otherwise C<undef>, 5433 5434=item [1] 5435 5436the left delimiter of the first block of the operation, 5437 5438=item [2] 5439 5440the text of the first block of the operation 5441(that is, the contents of 5442a quote, the regex of a match, or substitution or the target list of a 5443translation), 5444 5445=item [3] 5446 5447the right delimiter of the first block of the operation, 5448 5449=item [4] 5450 5451the left delimiter of the second block of the operation if there is one 5452(that is, if it is a C<s>, C<tr>, or C<y>); otherwise C<undef>, 5453 5454=item [5] 5455 5456the text of the second block of the operation if there is one 5457(that is, the replacement of a substitution or the translation list 5458of a translation); otherwise C<undef>, 5459 5460=item [6] 5461 5462the right delimiter of the second block of the operation (if any); 5463otherwise C<undef>, 5464 5465=item [7] 5466 5467the trailing modifiers on the operation (if any); otherwise C<undef>. 5468 5469=back 5470 5471If a quote-like expression is not found, the directive fails with the usual 5472C<undef> value. 5473 5474The C<E<lt>perl_variableE<gt>> directive can be used to parse any Perl 5475variable: $scalar, @array, %hash, $ref->{field}[$index], etc. 5476It does this by calling Text::Balanced::extract_variable(). 5477 5478If the directive matches text representing a valid Perl variable 5479specification, it returns that text. Otherwise it fails with the usual 5480C<undef> value. 5481 5482The C<E<lt>perl_codeblockE<gt>> directive can be used to parse curly-brace-delimited block of Perl code, such as: { $a = 1; f() =~ m/pat/; }. 5483It does this by calling Text::Balanced::extract_codeblock(). 5484 5485If the directive matches text representing a valid Perl code block, 5486it returns that text. Otherwise it fails with the usual C<undef> value. 5487 5488You can also tell it what kind of brackets to use as the outermost 5489delimiters. For example: 5490 5491 arglist: <perl_codeblock ()> 5492 5493causes an arglist to match a perl code block whose outermost delimiters 5494are C<(...)> (rather than the default C<{...}>). 5495 5496 5497=item Constructing tokens 5498 5499Eventually, Parse::RecDescent will be able to parse tokenized input, as 5500well as ordinary strings. In preparation for this joyous day, the 5501C<E<lt>token:...E<gt>> directive has been provided. 5502This directive creates a token which will be suitable for 5503input to a Parse::RecDescent parser (when it eventually supports 5504tokenized input). 5505 5506The text of the token is the value of the 5507immediately preceding item in the production. A 5508C<E<lt>token:...E<gt>> directive always succeeds with a return 5509value which is the hash reference that is the new token. It also 5510sets the return value for the production to that hash ref. 5511 5512The C<E<lt>token:...E<gt>> directive makes it easy to build 5513a Parse::RecDescent-compatible lexer in Parse::RecDescent: 5514 5515 my $lexer = new Parse::RecDescent q 5516 { 5517 lex: token(s) 5518 5519 token: /a\b/ <token:INDEF> 5520 | /the\b/ <token:DEF> 5521 | /fly\b/ <token:NOUN,VERB> 5522 | /[a-z]+/i { lc $item[1] } <token:ALPHA> 5523 | <error: Unknown token> 5524 5525 }; 5526 5527which will eventually be able to be used with a regular Parse::RecDescent 5528grammar: 5529 5530 my $parser = new Parse::RecDescent q 5531 { 5532 startrule: subrule1 subrule 2 5533 5534 # ETC... 5535 }; 5536 5537either with a pre-lexing phase: 5538 5539 $parser->startrule( $lexer->lex($data) ); 5540 5541or with a lex-on-demand approach: 5542 5543 $parser->startrule( sub{$lexer->token(\$data)} ); 5544 5545But at present, only the C<E<lt>token:...E<gt>> directive is 5546actually implemented. The rest is vapourware. 5547 5548=item Specifying operations 5549 5550One of the commonest requirements when building a parser is to specify 5551binary operators. Unfortunately, in a normal grammar, the rules for 5552such things are awkward: 5553 5554 disjunction: conjunction ('or' conjunction)(s?) 5555 { $return = [ $item[1], @{$item[2]} ] } 5556 5557 conjunction: atom ('and' atom)(s?) 5558 { $return = [ $item[1], @{$item[2]} ] } 5559 5560or inefficient: 5561 5562 disjunction: conjunction 'or' disjunction 5563 { $return = [ $item[1], @{$item[2]} ] } 5564 | conjunction 5565 { $return = [ $item[1] ] } 5566 5567 conjunction: atom 'and' conjunction 5568 { $return = [ $item[1], @{$item[2]} ] } 5569 | atom 5570 { $return = [ $item[1] ] } 5571 5572and either way is ugly and hard to get right. 5573 5574The C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives provide an 5575easier way of specifying such operations. Using C<E<lt>leftop:...E<gt>> the 5576above examples become: 5577 5578 disjunction: <leftop: conjunction 'or' conjunction> 5579 conjunction: <leftop: atom 'and' atom> 5580 5581The C<E<lt>leftop:...E<gt>> directive specifies a left-associative binary operator. 5582It is specified around three other grammar elements 5583(typically subrules or terminals), which match the left operand, 5584the operator itself, and the right operand respectively. 5585 5586A C<E<lt>leftop:...E<gt>> directive such as: 5587 5588 disjunction: <leftop: conjunction 'or' conjunction> 5589 5590is converted to the following: 5591 5592 disjunction: ( conjunction ('or' conjunction)(s?) 5593 { $return = [ $item[1], @{$item[2]} ] } ) 5594 5595In other words, a C<E<lt>leftop:...E<gt>> directive matches the left operand followed by zero 5596or more repetitions of both the operator and the right operand. It then 5597flattens the matched items into an anonymous array which becomes the 5598(single) value of the entire C<E<lt>leftop:...E<gt>> directive. 5599 5600For example, an C<E<lt>leftop:...E<gt>> directive such as: 5601 5602 output: <leftop: ident '<<' expr > 5603 5604when given a string such as: 5605 5606 cout << var << "str" << 3 5607 5608would match, and C<$item[1]> would be set to: 5609 5610 [ 'cout', 'var', '"str"', '3' ] 5611 5612In other words: 5613 5614 output: <leftop: ident '<<' expr > 5615 5616is equivalent to a left-associative operator: 5617 5618 output: ident { $return = [$item[1]] } 5619 | ident '<<' expr { $return = [@item[1,3]] } 5620 | ident '<<' expr '<<' expr { $return = [@item[1,3,5]] } 5621 | ident '<<' expr '<<' expr '<<' expr { $return = [@item[1,3,5,7]] } 5622 # ...etc... 5623 5624 5625Similarly, the C<E<lt>rightop:...E<gt>> directive takes a left operand, an operator, and a right operand: 5626 5627 assign: <rightop: var '=' expr > 5628 5629and converts them to: 5630 5631 assign: ( (var '=' {$return=$item[1]})(s?) expr 5632 { $return = [ @{$item[1]}, $item[2] ] } ) 5633 5634which is equivalent to a right-associative operator: 5635 5636 assign: expr { $return = [$item[1]] } 5637 | var '=' expr { $return = [@item[1,3]] } 5638 | var '=' var '=' expr { $return = [@item[1,3,5]] } 5639 | var '=' var '=' var '=' expr { $return = [@item[1,3,5,7]] } 5640 # ...etc... 5641 5642 5643Note that for both the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives, the directive does not normally 5644return the operator itself, just a list of the operands involved. This is 5645particularly handy for specifying lists: 5646 5647 list: '(' <leftop: list_item ',' list_item> ')' 5648 { $return = $item[2] } 5649 5650There is, however, a problem: sometimes the operator is itself significant. 5651For example, in a Perl list a comma and a C<=E<gt>> are both 5652valid separators, but the C<=E<gt>> has additional stringification semantics. 5653Hence it's important to know which was used in each case. 5654 5655To solve this problem the 5656C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives 5657I<do> return the operator(s) as well, under two circumstances. 5658The first case is where the operator is specified as a subrule. In that instance, 5659whatever the operator matches is returned (on the assumption that if the operator 5660is important enough to have its own subrule, then it's important enough to return). 5661 5662The second case is where the operator is specified as a regular 5663expression. In that case, if the first bracketed subpattern of the 5664regular expression matches, that matching value is returned (this is analogous to 5665the behaviour of the Perl C<split> function, except that only the first subpattern 5666is returned). 5667 5668In other words, given the input: 5669 5670 ( a=>1, b=>2 ) 5671 5672the specifications: 5673 5674 list: '(' <leftop: list_item separator list_item> ')' 5675 5676 separator: ',' | '=>' 5677 5678or: 5679 5680 list: '(' <leftop: list_item /(,|=>)/ list_item> ')' 5681 5682cause the list separators to be interleaved with the operands in the 5683anonymous array in C<$item[2]>: 5684 5685 [ 'a', '=>', '1', ',', 'b', '=>', '2' ] 5686 5687 5688But the following version: 5689 5690 list: '(' <leftop: list_item /,|=>/ list_item> ')' 5691 5692returns only the operators: 5693 5694 [ 'a', '1', 'b', '2' ] 5695 5696Of course, none of the above specifications handle the case of an empty 5697list, since the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives 5698require at least a single right or left operand to match. To specify 5699that the operator can match "trivially", 5700it's necessary to add a C<(s?)> qualifier to the directive: 5701 5702 list: '(' <leftop: list_item /(,|=>)/ list_item>(s?) ')' 5703 5704Note that in almost all the above examples, the first and third arguments 5705of the C<<leftop:...E<gt>> directive were the same subrule. That is because 5706C<<leftop:...E<gt>>'s are frequently used to specify "separated" lists of the 5707same type of item. To make such lists easier to specify, the following 5708syntax: 5709 5710 list: element(s /,/) 5711 5712is exactly equivalent to: 5713 5714 list: <leftop: element /,/ element> 5715 5716Note that the separator must be specified as a raw pattern (i.e. 5717not a string or subrule). 5718 5719 5720=item Scored productions 5721 5722By default, Parse::RecDescent grammar rules always accept the first 5723production that matches the input. But if two or more productions may 5724potentially match the same input, choosing the first that does so may 5725not be optimal. 5726 5727For example, if you were parsing the sentence "time flies like an arrow", 5728you might use a rule like this: 5729 5730 sentence: verb noun preposition article noun { [@item] } 5731 | adjective noun verb article noun { [@item] } 5732 | noun verb preposition article noun { [@item] } 5733 5734Each of these productions matches the sentence, but the third one 5735is the most likely interpretation. However, if the sentence had been 5736"fruit flies like a banana", then the second production is probably 5737the right match. 5738 5739To cater for such situations, the C<E<lt>score:...E<gt>> can be used. 5740The directive is equivalent to an unconditional C<E<lt>rejectE<gt>>, 5741except that it allows you to specify a "score" for the current 5742production. If that score is numerically greater than the best 5743score of any preceding production, the current production is cached for later 5744consideration. If no later production matches, then the cached 5745production is treated as having matched, and the value of the 5746item immediately before its C<E<lt>score:...E<gt>> directive is returned as the 5747result. 5748 5749In other words, by putting a C<E<lt>score:...E<gt>> directive at the end of 5750each production, you can select which production matches using 5751criteria other than specification order. For example: 5752 5753 sentence: verb noun preposition article noun { [@item] } <score: sensible(@item)> 5754 | adjective noun verb article noun { [@item] } <score: sensible(@item)> 5755 | noun verb preposition article noun { [@item] } <score: sensible(@item)> 5756 5757Now, when each production reaches its respective C<E<lt>score:...E<gt>> 5758directive, the subroutine C<sensible> will be called to evaluate the 5759matched items (somehow). Once all productions have been tried, the 5760one which C<sensible> scored most highly will be the one that is 5761accepted as a match for the rule. 5762 5763The variable $score always holds the current best score of any production, 5764and the variable $score_return holds the corresponding return value. 5765 5766As another example, the following grammar matches lines that may be 5767separated by commas, colons, or semi-colons. This can be tricky if 5768a colon-separated line also contains commas, or vice versa. The grammar 5769resolves the ambiguity by selecting the rule that results in the 5770fewest fields: 5771 5772 line: seplist[sep=>','] <score: -@{$item[1]}> 5773 | seplist[sep=>':'] <score: -@{$item[1]}> 5774 | seplist[sep=>" "] <score: -@{$item[1]}> 5775 5776 seplist: <skip:""> <leftop: /[^$arg{sep}]*/ "$arg{sep}" /[^$arg{sep}]*/> 5777 5778Note the use of negation within the C<E<lt>score:...E<gt>> directive 5779to ensure that the seplist with the most items gets the lowest score. 5780 5781As the above examples indicate, it is often the case that all productions 5782in a rule use exactly the same C<E<lt>score:...E<gt>> directive. It is 5783tedious to have to repeat this identical directive in every production, so 5784Parse::RecDescent also provides the C<E<lt>autoscore:...E<gt>> directive. 5785 5786If an C<E<lt>autoscore:...E<gt>> directive appears in any 5787production of a rule, the code it specifies is used as the scoring 5788code for every production of that rule, except productions that already 5789end with an explicit C<E<lt>score:...E<gt>> directive. Thus the rules above could 5790be rewritten: 5791 5792 line: <autoscore: -@{$item[1]}> 5793 line: seplist[sep=>','] 5794 | seplist[sep=>':'] 5795 | seplist[sep=>" "] 5796 5797 5798 sentence: <autoscore: sensible(@item)> 5799 | verb noun preposition article noun { [@item] } 5800 | adjective noun verb article noun { [@item] } 5801 | noun verb preposition article noun { [@item] } 5802 5803Note that the C<E<lt>autoscore:...E<gt>> directive itself acts as an 5804unconditional C<E<lt>rejectE<gt>>, and (like the C<E<lt>rulevar:...E<gt>> 5805directive) is pruned at compile-time wherever possible. 5806 5807 5808=item Dispensing with grammar checks 5809 5810During the compilation phase of parser construction, Parse::RecDescent performs 5811a small number of checks on the grammar it's given. Specifically it checks that 5812the grammar is not left-recursive, that there are no "insatiable" constructs of 5813the form: 5814 5815 rule: subrule(s) subrule 5816 5817and that there are no rules missing (i.e. referred to, but never defined). 5818 5819These checks are important during development, but can slow down parser 5820construction in stable code. So Parse::RecDescent provides the 5821E<lt>nocheckE<gt> directive to turn them off. The directive can only appear 5822before the first rule definition, and switches off checking throughout the rest 5823of the current grammar. 5824 5825Typically, this directive would be added when a parser has been thoroughly 5826tested and is ready for release. 5827 5828=back 5829 5830 5831=head2 Subrule argument lists 5832 5833It is occasionally useful to pass data to a subrule which is being invoked. For 5834example, consider the following grammar fragment: 5835 5836 classdecl: keyword decl 5837 5838 keyword: 'struct' | 'class'; 5839 5840 decl: # WHATEVER 5841 5842The C<decl> rule might wish to know which of the two keywords was used 5843(since it may affect some aspect of the way the subsequent declaration 5844is interpreted). C<Parse::RecDescent> allows the grammar designer to 5845pass data into a rule, by placing that data in an I<argument list> 5846(that is, in square brackets) immediately after any subrule item in a 5847production. Hence, we could pass the keyword to C<decl> as follows: 5848 5849 classdecl: keyword decl[ $item[1] ] 5850 5851 keyword: 'struct' | 'class'; 5852 5853 decl: # WHATEVER 5854 5855The argument list can consist of any number (including zero!) of comma-separated 5856Perl expressions. In other words, it looks exactly like a Perl anonymous 5857array reference. For example, we could pass the keyword, the name of the 5858surrounding rule, and the literal 'keyword' to C<decl> like so: 5859 5860 classdecl: keyword decl[$item[1],$item[0],'keyword'] 5861 5862 keyword: 'struct' | 'class'; 5863 5864 decl: # WHATEVER 5865 5866Within the rule to which the data is passed (C<decl> in the above examples) 5867that data is available as the elements of a local variable C<@arg>. Hence 5868C<decl> might report its intentions as follows: 5869 5870 classdecl: keyword decl[$item[1],$item[0],'keyword'] 5871 5872 keyword: 'struct' | 'class'; 5873 5874 decl: { print "Declaring $arg[0] (a $arg[2])\n"; 5875 print "(this rule called by $arg[1])" } 5876 5877Subrule argument lists can also be interpreted as hashes, simply by using 5878the local variable C<%arg> instead of C<@arg>. Hence we could rewrite the 5879previous example: 5880 5881 classdecl: keyword decl[keyword => $item[1], 5882 caller => $item[0], 5883 type => 'keyword'] 5884 5885 keyword: 'struct' | 'class'; 5886 5887 decl: { print "Declaring $arg{keyword} (a $arg{type})\n"; 5888 print "(this rule called by $arg{caller})" } 5889 5890Both C<@arg> and C<%arg> are always available, so the grammar designer may 5891choose whichever convention (or combination of conventions) suits best. 5892 5893Subrule argument lists are also useful for creating "rule templates" 5894(especially when used in conjunction with the C<E<lt>matchrule:...E<gt>> 5895directive). For example, the subrule: 5896 5897 list: <matchrule:$arg{rule}> /$arg{sep}/ list[%arg] 5898 { $return = [ $item[1], @{$item[3]} ] } 5899 | <matchrule:$arg{rule}> 5900 { $return = [ $item[1]] } 5901 5902is a handy template for the common problem of matching a separated list. 5903For example: 5904 5905 function: 'func' name '(' list[rule=>'param',sep=>';'] ')' 5906 5907 param: list[rule=>'name',sep=>','] ':' typename 5908 5909 name: /\w+/ 5910 5911 typename: name 5912 5913 5914When a subrule argument list is used with a repeated subrule, the argument list 5915goes I<before> the repetition specifier: 5916 5917 list: /some|many/ thing[ $item[1] ](s) 5918 5919The argument list is "late bound". That is, it is re-evaluated for every 5920repetition of the repeated subrule. 5921This means that each repeated attempt to match the subrule may be 5922passed a completely different set of arguments if the value of the 5923expression in the argument list changes between attempts. So, for 5924example, the grammar: 5925 5926 { $::species = 'dogs' } 5927 5928 pair: 'two' animal[$::species](s) 5929 5930 animal: /$arg[0]/ { $::species = 'cats' } 5931 5932will match the string "two dogs cats cats" completely, whereas 5933it will only match the string "two dogs dogs dogs" up to the 5934eighth letter. If the value of the argument list were "early bound" 5935(that is, evaluated only the first time a repeated subrule match is 5936attempted), one would expect the matching behaviours to be reversed. 5937 5938Of course, it is possible to effectively "early bind" such argument lists 5939by passing them a value which does not change on each repetition. For example: 5940 5941 { $::species = 'dogs' } 5942 5943 pair: 'two' { $::species } animal[$item[2]](s) 5944 5945 animal: /$arg[0]/ { $::species = 'cats' } 5946 5947 5948Arguments can also be passed to the start rule, simply by appending them 5949to the argument list with which the start rule is called (I<after> the 5950"line number" parameter). For example, given: 5951 5952 $parser = new Parse::RecDescent ( $grammar ); 5953 5954 $parser->data($text, 1, "str", 2, \@arr); 5955 5956 # ^^^^^ ^ ^^^^^^^^^^^^^^^ 5957 # | | | 5958 # TEXT TO BE PARSED | | 5959 # STARTING LINE NUMBER | 5960 # ELEMENTS OF @arg WHICH IS PASSED TO RULE data 5961 5962then within the productions of the rule C<data>, the array C<@arg> will contain 5963C<("str", 2, \@arr)>. 5964 5965 5966=head2 Alternations 5967 5968Alternations are implicit (unnamed) rules defined as part of a production. An 5969alternation is defined as a series of '|'-separated productions inside a 5970pair of round brackets. For example: 5971 5972 character: 'the' ( good | bad | ugly ) /dude/ 5973 5974Every alternation implicitly defines a new subrule, whose 5975automatically-generated name indicates its origin: 5976"_alternation_<I>_of_production_<P>_of_rule<R>" for the appropriate 5977values of <I>, <P>, and <R>. A call to this implicit subrule is then 5978inserted in place of the brackets. Hence the above example is merely a 5979convenient short-hand for: 5980 5981 character: 'the' 5982 _alternation_1_of_production_1_of_rule_character 5983 /dude/ 5984 5985 _alternation_1_of_production_1_of_rule_character: 5986 good | bad | ugly 5987 5988Since alternations are parsed by recursively calling the parser generator, 5989any type(s) of item can appear in an alternation. For example: 5990 5991 character: 'the' ( 'high' "plains" # Silent, with poncho 5992 | /no[- ]name/ # Silent, no poncho 5993 | vengeance_seeking # Poncho-optional 5994 | <error> 5995 ) drifter 5996 5997In this case, if an error occurred, the automatically generated 5998message would be: 5999 6000 ERROR (line <N>): Invalid implicit subrule: Expected 6001 'high' or /no[- ]name/ or generic, 6002 but found "pacifist" instead 6003 6004Since every alternation actually has a name, it's even possible 6005to extend or replace them: 6006 6007 parser->Replace( 6008 "_alternation_1_of_production_1_of_rule_character: 6009 'generic Eastwood'" 6010 ); 6011 6012More importantly, since alternations are a form of subrule, they can be given 6013repetition specifiers: 6014 6015 character: 'the' ( good | bad | ugly )(?) /dude/ 6016 6017 6018=head2 Incremental Parsing 6019 6020C<Parse::RecDescent> provides two methods - C<Extend> and C<Replace> - which 6021can be used to alter the grammar matched by a parser. Both methods 6022take the same argument as C<Parse::RecDescent::new>, namely a 6023grammar specification string 6024 6025C<Parse::RecDescent::Extend> interprets the grammar specification and adds any 6026productions it finds to the end of the rules for which they are specified. For 6027example: 6028 6029 $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/"; 6030 parser->Extend($add); 6031 6032adds two productions to the rule "name" (creating it if necessary) and one 6033production to the rule "desc". 6034 6035C<Parse::RecDescent::Replace> is identical, except that it first resets are 6036rule specified in the additional grammar, removing any existing productions. 6037Hence after: 6038 6039 $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/"; 6040 parser->Replace($add); 6041 6042there are I<only> valid "name"s and the one possible description. 6043 6044A more interesting use of the C<Extend> and C<Replace> methods is to call them 6045inside the action of an executing parser. For example: 6046 6047 typedef: 'typedef' type_name identifier ';' 6048 { $thisparser->Extend("type_name: '$item[3]'") } 6049 | <error> 6050 6051 identifier: ...!type_name /[A-Za-z_]w*/ 6052 6053which automatically prevents type names from being typedef'd, or: 6054 6055 command: 'map' key_name 'to' abort_key 6056 { $thisparser->Replace("abort_key: '$item[2]'") } 6057 | 'map' key_name 'to' key_name 6058 { map_key($item[2],$item[4]) } 6059 | abort_key 6060 { exit if confirm("abort?") } 6061 6062 abort_key: 'q' 6063 6064 key_name: ...!abort_key /[A-Za-z]/ 6065 6066which allows the user to change the abort key binding, but not to unbind it. 6067 6068The careful use of such constructs makes it possible to reconfigure a 6069a running parser, eliminating the need for semantic feedback by 6070providing syntactic feedback instead. However, as currently implemented, 6071C<Replace()> and C<Extend()> have to regenerate and re-C<eval> the 6072entire parser whenever they are called. This makes them quite slow for 6073large grammars. 6074 6075In such cases, the judicious use of an interpolated regex is likely to 6076be far more efficient: 6077 6078 typedef: 'typedef' type_name/ identifier ';' 6079 { $thisparser->{local}{type_name} .= "|$item[3]" } 6080 | <error> 6081 6082 identifier: ...!type_name /[A-Za-z_]w*/ 6083 6084 type_name: /$thisparser->{local}{type_name}/ 6085 6086 6087=head2 Precompiling parsers 6088 6089Normally Parse::RecDescent builds a parser from a grammar at run-time. 6090That approach simplifies the design and implementation of parsing code, 6091but has the disadvantage that it slows the parsing process down - you 6092have to wait for Parse::RecDescent to build the parser every time the 6093program runs. Long or complex grammars can be particularly slow to 6094build, leading to unacceptable delays at start-up. 6095 6096To overcome this, the module provides a way of "pre-building" a parser 6097object and saving it in a separate module. That module can then be used 6098to create clones of the original parser. 6099 6100A grammar may be precompiled using the C<Precompile> class method. 6101For example, to precompile a grammar stored in the scalar $grammar, 6102and produce a class named PreGrammar in a module file named PreGrammar.pm, 6103you could use: 6104 6105 use Parse::RecDescent; 6106 6107 Parse::RecDescent->Precompile([$options_hashref], $grammar, "PreGrammar", ["RuntimeClass"]); 6108 6109The first required argument is the grammar string, the second is the 6110name of the class to be built. The name of the module file is 6111generated automatically by appending ".pm" to the last element of the 6112class name. Thus 6113 6114 Parse::RecDescent->Precompile($grammar, "My::New::Parser"); 6115 6116would produce a module file named Parser.pm. 6117 6118After the class name, you may specify the name of the runtime_class 6119called by the Precompiled parser. See L</"Precompiled runtimes"> for 6120more details. 6121 6122An optional hash reference may be supplied as the first argument to 6123C<Precompile>. This argument is currently EXPERIMENTAL, and may change 6124in a future release of Parse::RecDescent. The only supported option 6125is currently C<-standalone>, see L</"Standalone precompiled parsers">. 6126 6127It is somewhat tedious to have to write a small Perl program just to 6128generate a precompiled grammar class, so Parse::RecDescent has some special 6129magic that allows you to do the job directly from the command-line. 6130 6131If your grammar is specified in a file named F<grammar>, you can generate 6132a class named Yet::Another::Grammar like so: 6133 6134 > perl -MParse::RecDescent - grammar Yet::Another::Grammar [Runtime::Class] 6135 6136This would produce a file named F<Grammar.pm> containing the full 6137definition of a class called Yet::Another::Grammar. Of course, to use 6138that class, you would need to put the F<Grammar.pm> file in a 6139directory named F<Yet/Another>, somewhere in your Perl include path. 6140 6141Having created the new class, it's very easy to use it to build 6142a parser. You simply C<use> the new module, and then call its 6143C<new> method to create a parser object. For example: 6144 6145 use Yet::Another::Grammar; 6146 my $parser = Yet::Another::Grammar->new(); 6147 6148The effect of these two lines is exactly the same as: 6149 6150 use Parse::RecDescent; 6151 6152 open GRAMMAR_FILE, "grammar" or die; 6153 local $/; 6154 my $grammar = <GRAMMAR_FILE>; 6155 6156 my $parser = Parse::RecDescent->new($grammar); 6157 6158only considerably faster. 6159 6160Note however that the parsers produced by either approach are exactly 6161the same, so whilst precompilation has an effect on I<set-up> speed, 6162it has no effect on I<parsing> speed. RecDescent 2.0 will address that 6163problem. 6164 6165=head3 Standalone precompiled parsers 6166 6167Until version 1.967003 of Parse::RecDescent, parser modules built with 6168C<Precompile> were dependent on Parse::RecDescent. Future 6169Parse::RecDescent releases with different internal implementations 6170would break pre-existing precompiled parsers. 6171 6172Version 1.967_005 added the ability for Parse::RecDescent to include 6173itself in the resulting .pm file if you pass the boolean option 6174C<-standalone> to C<Precompile>: 6175 6176 Parse::RecDescent->Precompile({ -standalone => 1, }, 6177 $grammar, "My::New::Parser"); 6178 6179Parse::RecDescent is included as C<$class::_Runtime> in order to avoid 6180conflicts between an installed version of Parse::RecDescent and other 6181precompiled, standalone parser made with Parse::RecDescent. The name 6182of this class may be changed with the C<-runtime_class> option to 6183Precompile. This renaming is experimental, and is subject to change 6184in future versions. 6185 6186Precompiled parsers remain dependent on Parse::RecDescent by default, 6187as this feature is still considered experimental. In the future, 6188standalone parsers will become the default. 6189 6190=head3 Precompiled runtimes 6191 6192Standalone precompiled parsers each include a copy of 6193Parse::RecDescent. For users who have a family of related precompiled 6194parsers, this is very inefficient. C<Precompile> now supports an 6195experimental C<-runtime_class> option. To build a precompiled parser 6196with a different runtime name, call: 6197 6198 Parse::RecDescent->Precompile({ 6199 -standalone => 1, 6200 -runtime_class => "My::Runtime", 6201 }, 6202 $grammar, "My::New::Parser"); 6203 6204The resulting standalone parser will contain a copy of 6205Parse::RecDescent, renamed to "My::Runtime". 6206 6207To build a set of parsers that C<use> a custom-named runtime, without 6208including that runtime in the output, simply build those parsers with 6209C<-runtime_class> and without C<-standalone>: 6210 6211 Parse::RecDescent->Precompile({ 6212 -runtime_class => "My::Runtime", 6213 }, 6214 $grammar, "My::New::Parser"); 6215 6216The runtime itself must be generated as well, so that it may be 6217C<use>d by My::New::Parser. To generate the runtime file, use one of 6218the two folling calls: 6219 6220 Parse::RecDescent->PrecompiledRuntime("My::Runtime"); 6221 6222 Parse::RecDescent->Precompile({ 6223 -standalone => 1, 6224 -runtime_class => "My::Runtime", 6225 }, 6226 '', # empty grammar 6227 "My::Runtime"); 6228 6229=head1 GOTCHAS 6230 6231This section describes common mistakes that grammar writers seem to 6232make on a regular basis. 6233 6234=head2 1. Expecting an error to always invalidate a parse 6235 6236A common mistake when using error messages is to write the grammar like this: 6237 6238 file: line(s) 6239 6240 line: line_type_1 6241 | line_type_2 6242 | line_type_3 6243 | <error> 6244 6245The expectation seems to be that any line that is not of type 1, 2 or 3 will 6246invoke the C<E<lt>errorE<gt>> directive and thereby cause the parse to fail. 6247 6248Unfortunately, that only happens if the error occurs in the very first line. 6249The first rule states that a C<file> is matched by one or more lines, so if 6250even a single line succeeds, the first rule is completely satisfied and the 6251parse as a whole succeeds. That means that any error messages generated by 6252subsequent failures in the C<line> rule are quietly ignored. 6253 6254Typically what's really needed is this: 6255 6256 file: line(s) eofile { $return = $item[1] } 6257 6258 line: line_type_1 6259 | line_type_2 6260 | line_type_3 6261 | <error> 6262 6263 eofile: /^\Z/ 6264 6265The addition of the C<eofile> subrule to the first production means that 6266a file only matches a series of successful C<line> matches I<that consume the 6267complete input text>. If any input text remains after the lines are matched, 6268there must have been an error in the last C<line>. In that case the C<eofile> 6269rule will fail, causing the entire C<file> rule to fail too. 6270 6271Note too that C<eofile> must match C</^\Z/> (end-of-text), I<not> 6272C</^\cZ/> or C</^\cD/> (end-of-file). 6273 6274And don't forget the action at the end of the production. If you just 6275write: 6276 6277 file: line(s) eofile 6278 6279then the value returned by the C<file> rule will be the value of its 6280last item: C<eofile>. Since C<eofile> always returns an empty string 6281on success, that will cause the C<file> rule to return that empty 6282string. Apart from returning the wrong value, returning an empty string 6283will trip up code such as: 6284 6285 $parser->file($filetext) || die; 6286 6287(since "" is false). 6288 6289Remember that Parse::RecDescent returns undef on failure, 6290so the only safe test for failure is: 6291 6292 defined($parser->file($filetext)) || die; 6293 6294 6295=head2 2. Using a C<return> in an action 6296 6297An action is like a C<do> block inside the subroutine implementing the 6298surrounding rule. So if you put a C<return> statement in an action: 6299 6300 range: '(' start '..' end )' 6301 { return $item{end} } 6302 /\s+/ 6303 6304that subroutine will immediately return, without checking the rest of 6305the items in the current production (e.g. the C</\s+/>) and without 6306setting up the necessary data structures to tell the parser that the 6307rule has succeeded. 6308 6309The correct way to set a return value in an action is to set the C<$return> 6310variable: 6311 6312 range: '(' start '..' end )' 6313 { $return = $item{end} } 6314 /\s+/ 6315 6316 6317=head2 2. Setting C<$Parse::RecDescent::skip> at parse time 6318 6319If you want to change the default skipping behaviour (see 6320L<Terminal Separators> and the C<E<lt>skip:...E<gt>> directive) by setting 6321C<$Parse::RecDescent::skip> you have to remember to set this variable 6322I<before> creating the grammar object. 6323 6324For example, you might want to skip all Perl-like comments with this 6325regular expression: 6326 6327 my $skip_spaces_and_comments = qr/ 6328 (?mxs: 6329 \s+ # either spaces 6330 | \# .*?$ # or a dash and whatever up to the end of line 6331 )* # repeated at will (in whatever order) 6332 /; 6333 6334And then: 6335 6336 my $parser1 = Parse::RecDescent->new($grammar); 6337 6338 $Parse::RecDescent::skip = $skip_spaces_and_comments; 6339 6340 my $parser2 = Parse::RecDescent->new($grammar); 6341 6342 $parser1->parse($text); # this does not cope with comments 6343 $parser2->parse($text); # this skips comments correctly 6344 6345The two parsers behave differently, because any skipping behaviour 6346specified via C<$Parse::RecDescent::skip> is hard-coded when the 6347grammar object is built, not at parse time. 6348 6349 6350=head1 DIAGNOSTICS 6351 6352Diagnostics are intended to be self-explanatory (particularly if you 6353use B<-RD_HINT> (under B<perl -s>) or define C<$::RD_HINT> inside the program). 6354 6355C<Parse::RecDescent> currently diagnoses the following: 6356 6357=over 4 6358 6359=item * 6360 6361Invalid regular expressions used as pattern terminals (fatal error). 6362 6363=item * 6364 6365Invalid Perl code in code blocks (fatal error). 6366 6367=item * 6368 6369Lookahead used in the wrong place or in a nonsensical way (fatal error). 6370 6371=item * 6372 6373"Obvious" cases of left-recursion (fatal error). 6374 6375=item * 6376 6377Missing or extra components in a C<E<lt>leftopE<gt>> or C<E<lt>rightopE<gt>> 6378directive. 6379 6380=item * 6381 6382Unrecognisable components in the grammar specification (fatal error). 6383 6384=item * 6385 6386"Orphaned" rule components specified before the first rule (fatal error) 6387or after an C<E<lt>errorE<gt>> directive (level 3 warning). 6388 6389=item * 6390 6391Missing rule definitions (this only generates a level 3 warning, since you 6392may be providing them later via C<Parse::RecDescent::Extend()>). 6393 6394=item * 6395 6396Instances where greedy repetition behaviour will almost certainly 6397cause the failure of a production (a level 3 warning - see 6398L<"ON-GOING ISSUES AND FUTURE DIRECTIONS"> below). 6399 6400=item * 6401 6402Attempts to define rules named 'Replace' or 'Extend', which cannot be 6403called directly through the parser object because of the predefined 6404meaning of C<Parse::RecDescent::Replace> and 6405C<Parse::RecDescent::Extend>. (Only a level 2 warning is generated, since 6406such rules I<can> still be used as subrules). 6407 6408=item * 6409 6410Productions which consist of a single C<E<lt>error?E<gt>> 6411directive, and which therefore may succeed unexpectedly 6412(a level 2 warning, since this might conceivably be the desired effect). 6413 6414=item * 6415 6416Multiple consecutive lookahead specifiers (a level 1 warning only, since their 6417effects simply accumulate). 6418 6419=item * 6420 6421Productions which start with a C<E<lt>rejectE<gt>> or C<E<lt>rulevar:...E<gt>> 6422directive. Such productions are optimized away (a level 1 warning). 6423 6424=item * 6425 6426Rules which are autogenerated under C<$::AUTOSTUB> (a level 1 warning). 6427 6428=back 6429 6430=head1 AUTHOR 6431 6432Damian Conway (damian@conway.org) 6433Jeremy T. Braun (JTBRAUN@CPAN.org) [current maintainer] 6434 6435=head1 BUGS AND IRRITATIONS 6436 6437There are undoubtedly serious bugs lurking somewhere in this much code :-) 6438Bug reports, test cases and other feedback are most welcome. 6439 6440Ongoing annoyances include: 6441 6442=over 4 6443 6444=item * 6445 6446There's no support for parsing directly from an input stream. 6447If and when the Perl Gods give us regular expressions on streams, 6448this should be trivial (ahem!) to implement. 6449 6450=item * 6451 6452The parser generator can get confused if actions aren't properly 6453closed or if they contain particularly nasty Perl syntax errors 6454(especially unmatched curly brackets). 6455 6456=item * 6457 6458The generator only detects the most obvious form of left recursion 6459(potential recursion on the first subrule in a rule). More subtle 6460forms of left recursion (for example, through the second item in a 6461rule after a "zero" match of a preceding "zero-or-more" repetition, 6462or after a match of a subrule with an empty production) are not found. 6463 6464=item * 6465 6466Instead of complaining about left-recursion, the generator should 6467silently transform the grammar to remove it. Don't expect this 6468feature any time soon as it would require a more sophisticated 6469approach to parser generation than is currently used. 6470 6471=item * 6472 6473The generated parsers don't always run as fast as might be wished. 6474 6475=item * 6476 6477The meta-parser should be bootstrapped using C<Parse::RecDescent> :-) 6478 6479=back 6480 6481=head1 ON-GOING ISSUES AND FUTURE DIRECTIONS 6482 6483=over 4 6484 6485=item 1. 6486 6487Repetitions are "incorrigibly greedy" in that they will eat everything they can 6488and won't backtrack if that behaviour causes a production to fail needlessly. 6489So, for example: 6490 6491 rule: subrule(s) subrule 6492 6493will I<never> succeed, because the repetition will eat all the 6494subrules it finds, leaving none to match the second item. Such 6495constructions are relatively rare (and C<Parse::RecDescent::new> generates a 6496warning whenever they occur) so this may not be a problem, especially 6497since the insatiable behaviour can be overcome "manually" by writing: 6498 6499 rule: penultimate_subrule(s) subrule 6500 6501 penultimate_subrule: subrule ...subrule 6502 6503The issue is that this construction is exactly twice as expensive as the 6504original, whereas backtracking would add only 1/I<N> to the cost (for 6505matching I<N> repetitions of C<subrule>). I would welcome feedback on 6506the need for backtracking; particularly on cases where the lack of it 6507makes parsing performance problematical. 6508 6509=item 2. 6510 6511Having opened that can of worms, it's also necessary to consider whether there 6512is a need for non-greedy repetition specifiers. Again, it's possible (at some 6513cost) to manually provide the required functionality: 6514 6515 rule: nongreedy_subrule(s) othersubrule 6516 6517 nongreedy_subrule: subrule ...!othersubrule 6518 6519Overall, the issue is whether the benefit of this extra functionality 6520outweighs the drawbacks of further complicating the (currently 6521minimalist) grammar specification syntax, and (worse) introducing more overhead 6522into the generated parsers. 6523 6524=item 3. 6525 6526An C<E<lt>autocommitE<gt>> directive would be nice. That is, it would be useful to be 6527able to say: 6528 6529 command: <autocommit> 6530 command: 'find' name 6531 | 'find' address 6532 | 'do' command 'at' time 'if' condition 6533 | 'do' command 'at' time 6534 | 'do' command 6535 | unusual_command 6536 6537and have the generator work out that this should be "pruned" thus: 6538 6539 command: 'find' name 6540 | 'find' <commit> address 6541 | 'do' <commit> command <uncommit> 6542 'at' time 6543 'if' <commit> condition 6544 | 'do' <commit> command <uncommit> 6545 'at' <commit> time 6546 | 'do' <commit> command 6547 | unusual_command 6548 6549There are several issues here. Firstly, should the 6550C<E<lt>autocommitE<gt>> automatically install an C<E<lt>uncommitE<gt>> 6551at the start of the last production (on the grounds that the "command" 6552rule doesn't know whether an "unusual_command" might start with "find" 6553or "do") or should the "unusual_command" subgraph be analysed (to see 6554if it I<might> be viable after a "find" or "do")? 6555 6556The second issue is how regular expressions should be treated. The simplest 6557approach would be simply to uncommit before them (on the grounds that they 6558I<might> match). Better efficiency would be obtained by analyzing all preceding 6559literal tokens to determine whether the pattern would match them. 6560 6561Overall, the issues are: can such automated "pruning" approach a hand-tuned 6562version sufficiently closely to warrant the extra set-up expense, and (more 6563importantly) is the problem important enough to even warrant the non-trivial 6564effort of building an automated solution? 6565 6566=back 6567 6568=head1 SUPPORT 6569 6570=head2 Source Code Repository 6571 6572L<http://github.com/jtbraun/Parse-RecDescent> 6573 6574=head2 Mailing List 6575 6576Visit L<http://www.perlfoundation.org/perl5/index.cgi?parse_recdescent> to sign up for the mailing list. 6577 6578L<http://www.PerlMonks.org> is also a good place to ask 6579questions. Previous posts about Parse::RecDescent can typically be 6580found with this search: 6581L<http://perlmonks.org/index.pl?node=recdescent>. 6582 6583=head2 FAQ 6584 6585Visit L<Parse::RecDescent::FAQ> for answers to frequently (and not so 6586frequently) asked questions about Parse::RecDescent. 6587 6588=head2 View/Report Bugs 6589 6590To view the current bug list or report a new issue visit 6591L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-RecDescent>. 6592 6593=head1 SEE ALSO 6594 6595L<Regexp::Grammars> provides Parse::RecDescent style parsing using native 6596Perl 5.10 regular expressions. 6597 6598 6599=head1 LICENCE AND COPYRIGHT 6600 6601Copyright (c) 1997-2007, Damian Conway C<< <DCONWAY@CPAN.org> >>. All rights 6602reserved. 6603 6604This module is free software; you can redistribute it and/or 6605modify it under the same terms as Perl itself. See L<perlartistic>. 6606 6607 6608=head1 DISCLAIMER OF WARRANTY 6609 6610BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 6611FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 6612OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 6613PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER 6614EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 6615WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 6616ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH 6617YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL 6618NECESSARY SERVICING, REPAIR, OR CORRECTION. 6619 6620IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 6621WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 6622REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE 6623LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, 6624OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE 6625THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 6626RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 6627FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 6628SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 6629SUCH DAMAGES. 6630