1package Pugs::Emitter::Rule::Perl5::Ratchet; 2 3# p6-rule perl5 emitter for ":ratchet" (non-backtracking) 4# see: RuleInline.pl, RuleInline-more.pl for a program prototype 5 6#use Smart::Comments '####'; 7use strict; 8use warnings; 9use Pugs::Emitter::Rule::Perl5::CharClass; 10use Data::Dumper; 11$Data::Dumper::Indent = 1; 12 13our $direction = "+"; # XXX make lexical 14our $sigspace = 0; 15our $capture_count; 16our $capture_to_array; 17our $RegexPos; 18 19our $count; 20sub id { 21 if (!defined $count) { 22 if (defined $::PCR_SEED) { 23 #warn "SET SEED!!!"; 24 srand($::PCR_SEED); 25 } 26 $count = 1000 + int(rand(1000)); 27 } 28 'I' . ($count++) 29} 30 31sub call_subrule { 32 my ( $subrule, $tab, $positionals, @param ) = @_; 33 $subrule = "\$grammar->" . $subrule 34 unless $subrule =~ / :: | \. | -> /x; 35 $subrule =~ s/\./->/; # XXX - source filter 36 37 $positionals = shift @param if $positionals eq '' && @param == 1; 38 39 return 40"$tab $subrule( \$s, { " 41 . "p => \$pos, " 42 . "positionals => [ $positionals ], " 43 . "args => {" . 44 join(", ",@param) . 45 "}, " 46 . "}, undef )"; 47} 48 49sub quote_constant { 50 my $const; 51 if ( $_[0] eq "\\" ) { 52 $const = "chr(".ord("\\").")"; 53 } 54 elsif ( $_[0] eq "'" ) { 55 $const = "chr(".ord("'").")" 56 } 57 else { 58 $const = "'$_[0]'" 59 } 60 return $const; 61} 62 63sub call_constant { 64 return " 1 # null constant\n" 65 unless length($_[0]); 66 my $const = quote_constant( $_[0] ); 67 my $len = length( eval $const ); 68 #print "Const: [$_[0]] $const $len \n"; 69 return 70" 71$_[1] ## <constant> 72$_[1] ## pos: @$RegexPos 73$_[1] ( ( substr( \$s, \$pos, $len ) eq $const ) 74$_[1] ? ( \$pos $direction= $len or 1 ) 75$_[1] : 0 76$_[1] ) 77$_[1] ## </constant>\n"; 78} 79 80sub call_perl5 { 81 my $const = $_[0]; 82 $_[1] = ' ' unless defined $_[1]; 83 #print "CONST: $const - $direction \n"; 84 return 85"$_[1] ## <perl5> 86$_[1] ( ( substr( \$s, \$pos ) =~ m/^($const)/ ) 87$_[1] ? ( \$pos $direction= length( \$1 ) or 1 ) 88$_[1] : 0 89$_[1] ) 90$_[1] ## </perl5>\n"; 91} 92 93sub emit { 94 my ($grammar, $ast, $param) = @_; 95 # runtime parameters: $grammar, $string, $state, $arg_list 96 # rule parameters: see Runtime::Rule.pm 97 local $sigspace = $param->{sigspace} ? 1 : 0; # XXX - $sigspace should be lexical 98 ### ratchet emit sigspace: $sigspace 99 local $capture_count = -1; 100 local $capture_to_array = 0; 101 #print "rule: ", Dumper( $ast ); 102 return 103 "## <global> 104## sigspace: $sigspace 105## ratchet: 1 106do { my \$rule; \$rule = sub { 107 my \$grammar = \$_[0]; 108 my \$s = \$_[1]; 109 \$_[3] = \$_[2] unless defined \$_[3]; # backwards compat 110 no warnings 'substr', 'uninitialized', 'syntax'; 111 my \%pad;\n" . 112 #" my \$pos;\n" . 113 #" print \"match arg_list = \$_[1]\n\";\n" . 114 #" print 'match ', Dumper(\\\@_);\n" . 115 #" print \"match arg_list = \@{[\%{\$_[1]} ]}\n\" if defined \$_[1];\n" . 116 #" warn \"match pos = \", pos(\$_[1]), \"\\n\";\n" . 117" my \$m; 118 my \$bool; 119 my \@pos; 120 # XXX :pos(X) takes the precedence over :continue ? 121 if (defined \$_[3]{p}) { 122 push \@pos, \$_[3]{p} || 0; 123 } elsif (\$_[3]{continue}) { 124 push \@pos, (pos(\$_[1]) || 0) .. length(\$s); 125 } else { 126 push \@pos, 0..length(\$s); 127 } 128 for my \$pos ( \@pos ) { 129 my \%index; 130 my \@match; 131 my \%named; 132 \$bool = 1; 133 \$named{KEY} = \$_[3]{KEY} if exists \$_[3]{KEY}; 134 \$m = Pugs::Runtime::Match->new( { 135 str => \\\$s, from => \\(0+\$pos), to => \\(\$pos), 136 bool => \\\$bool, match => \\\@match, named => \\\%named, capture => undef, 137 } ); 138 { 139 my \$prior = \$::_V6_PRIOR_; 140 local \$::_V6_PRIOR_ = \$prior; 141 \$bool = 0 unless 142" . 143 #" do { TAILCALL: ;\n" . 144 emit_rule( $ast, ' ' ) . "; 145 } 146 if ( \$bool ) { 147 my \$prior = \$::_V6_PRIOR_; 148 \$::_V6_PRIOR_ = sub { 149 local \$main::_V6_PRIOR_ = \$prior; 150 \$rule->(\@_); 151 }; 152 #warn \"pos2 = \", \$pos, \"\\n\"; 153 pos(\$_[1]) = \$pos if \$_[3]{continue}; 154 last; 155 } 156 } # /for 157 \$::_V6_MATCH_ = \$m; 158 return \$m; 159} } 160## </global>\n"; 161} 162 163sub emit_rule { 164 my $n = $_[0]; 165 my $tab = $_[1] . ' '; 166 die "unknown node: ", Dumper( $n ) 167 unless ref( $n ) eq 'HASH'; 168 #print "NODE ", Dumper($n); 169 my @keys = grep { substr($_, 0, 1) ne '_' } keys %$n; 170 ### Node keys: @keys 171 my ($k) = @keys; 172 my $v = $n->{$k}; 173 local $RegexPos = $n->{_pos}; 174 ### $RegexPos 175 if (!defined $RegexPos) { 176 # warn "WARNING: No _pos slot found for AST node '$k'.\n"; 177 # warn Dumper($n); 178 $RegexPos = []; 179 } 180 # XXX - use real references 181 no strict 'refs'; 182 #print "NODE ", Dumper($k), ", ", Dumper($v); 183 my $code = $k->( $v, $tab ); 184 return $code; 185} 186 187#rule nodes 188 189sub non_capturing_group { 190 return emit_rule( $_[0], $_[1] ); 191} 192sub quant { 193 my $term = $_[0]->{'term'}; 194 my $quantifier = $_[0]->{quant} || ''; 195 my $greedy = $_[0]->{greedy} || ''; # + ? 196 die "greediness control not implemented: $greedy" 197 if $greedy; 198 #print "QUANT: ",Dumper($_[0]); 199 my $id = id(); 200 my $tab = ( $quantifier eq '' ) ? $_[1] : $_[1] . " "; 201 my $ws = metasyntax( { metasyntax => 'ws', modifier => '.' }, $tab ); 202 my $ws3 = ( $sigspace && $_[0]->{ws3} ne '' ) ? " &&\n$ws" : ''; 203 204 my $rul; 205 { 206 #print "Term: ", Dumper($term), "\n"; 207 my $cap = $capture_to_array; 208 local $capture_to_array = $cap || ( $quantifier ne '' ); 209 $rul = emit_rule( $term, $tab ); 210 211 # rollback on fail 212 $rul = "$_[1] ( " 213 . " ( \$pad{$id} = \$pos or 1 ) &&\n" 214 . $rul 215 . " ||" 216 . " ( ( \$pos = \$pad{$id} ) && 0 )" 217 . " )"; 218 } 219 220 $rul = "$ws &&\n$rul" if $sigspace && $_[0]->{ws1} ne ''; 221 $rul = "$rul &&\n$ws" if $sigspace && $_[0]->{ws2} ne ''; 222 #print $rul; 223 return " 224$_[1] ## <group> 225$_[1] ## pos: @$RegexPos 226" . $rul . " 227$_[1] ## </group>\n" 228 if $quantifier eq ''; 229 # * + ? 230 # TODO: *? +? ?? 231 # TODO: *+ ++ ?+ 232 # TODO: quantifier + capture creates Array 233 #warn Dumper( $quantifier ); 234 if ( ref( $quantifier ) eq 'HASH' ) 235 { 236 my $code = $quantifier->{closure}; 237 if ( ref( $code ) ) { 238 if ( defined $Pugs::Compiler::Perl6::VERSION ) { 239 #print " perl6 compiler is loaded \n"; 240 $code = Pugs::Emitter::Perl6::Perl5::emit( 'grammar', $code, 'self' ); 241 } 242 }; 243 my @count = eval $code; 244 #warn "code: $code = [ @count ]"; 245 246 die "quantifier not implemented: " . Dumper( $quantifier ) 247 if @count ne 1 248 || $count[0] == 0; 249 250 return 251 "$_[1] ## <quant>\n" . 252 "$_[1] ## pos: @$RegexPos\n" . 253 "$_[1] (\n" . 254 join( ' && ', ($rul) x $count[0] ) . 255 "\n" . 256 "$_[1] )$ws3\n" . 257 "$_[1] ## </quant>\n"; 258 } 259 return 260 "$_[1] ## <quant>\n" . 261 "$_[1] ## pos: @$RegexPos\n" . 262 "$_[1] (\n$rul\n" . 263 "$_[1] || ( \$bool = 1 )\n" . 264 "$_[1] )$ws3\n" . 265 "$_[1] ## </quant>\n" 266 if $quantifier eq '?'; 267 return 268 "$_[1] ## <quant>\n" . 269 "$_[1] ## pos: @$RegexPos\n" . 270 "$_[1] do { while (\n$rul) {}; \$bool = 1 }$ws3\n" . 271 "$_[1] ## </quant>\n" 272 if $quantifier eq '*'; 273 return 274 "$_[1] ## <quant>\n" . 275 "$_[1] ## pos: @$RegexPos\n" . 276 "$_[1] (\n$rul\n" . 277 "$_[1] && do { while (\n$rul) {}; \$bool = 1 }\n" . 278 "$_[1] )$ws3\n" . 279 "$_[1] ## </quant>\n" 280 if $quantifier eq '+'; 281 die "quantifier not implemented: $quantifier"; 282} 283 284sub alt { 285 my @s; 286 # print 'Alt: '; 287 my $count = $capture_count; 288 my $max = -1; 289 my $id = id(); 290 for ( @{$_[0]} ) { 291 $capture_count = $count; 292 my $tmp = emit_rule( $_, $_[1].' ' ); 293 # print ' ',$capture_count; 294 $max = $capture_count 295 if $capture_count > $max; 296 push @s, $tmp if $tmp; 297 } 298 $capture_count = $max; 299 # print " max = $capture_count\n"; 300 return 301 "$_[1] ## <alt> 302$_[1] ## pos: @$RegexPos 303$_[1] ( 304$_[1] ( \$pad{$id} = \$pos or 1 ) 305$_[1] && ( 306" . join( " 307$_[1] ) 308$_[1] || ( 309$_[1] ( ( \$bool = 1 ) && ( \$pos = \$pad{$id} ) or 1 ) 310$_[1] && ", 311 @s 312 ) . " 313$_[1] ) 314$_[1] ) 315$_[1] ## </alt>\n"; 316} 317sub alt1 { &alt } 318sub conjunctive { 319 my @s; 320 # print 'conjunctive: '; 321 my $count = $capture_count; 322 my $max = -1; 323 my $id = id(); 324 for ( @{$_[0]} ) { 325 $capture_count = $count; 326 my $tmp = emit_rule( $_, $_[1].' ' ); 327 # print ' ',$capture_count; 328 $max = $capture_count 329 if $capture_count > $max; 330 push @s, $tmp if $tmp; 331 } 332 $capture_count = $max; 333 # print " max = $capture_count\n"; 334 return 335 "$_[1] ## <conjunctive> 336$_[1] ## pos: @$RegexPos 337$_[1] ( 338$_[1] ( \$pad{$id} = \$pos or 1 ) 339$_[1] && ( 340" . join( " 341$_[1] ) 342$_[1] && ( 343$_[1] ( ( \$bool = 1 ) && ( \$pos = \$pad{$id} ) or 1 ) 344$_[1] && ", 345 @s 346 ) . " 347$_[1] ) 348$_[1] ) 349$_[1] ## </conjunctive>\n"; 350} 351sub conjunctive1 { &conjunctive } 352sub concat { 353 my @s; 354 355=for optimizing 356 # optimize for the common case of "words" 357 # Note: this optimization has almost no practical effect 358 my $is_constant = 0; 359 for ( @{$_[0]} ) { 360 if ( ! $sigspace && exists $_->{quant} ) { 361 my $was_constant = $is_constant; 362 $is_constant = 363 $_->{quant}->{quant} eq '' 364 && exists $_->{quant}->{term}->{constant}; 365 #print "concat: ", Dumper( $_ ); 366 if ( $is_constant && $was_constant && $direction ne '-' ) { 367 $s[-1]->{quant}->{term}->{constant} .= 368 $_->{quant}->{term}->{constant}; 369 #print "constant: ",$s[-1]->{quant}->{term}->{constant},"\n"; 370 next; 371 } 372 } 373 push @s, $_; 374 } 375 376 for ( @s ) { 377 $_ = emit_rule( $_, $_[1] ); 378 } 379=cut 380 381 # Try to remove non-greedy quantifiers, by inserting a lookahead; 382 # cheat: / .*? b / 383 # into: / [ <!before b> . ]* b / 384 # TODO - make it work for '+' quantifier too 385 for my $i ( 0 .. @{$_[0]} - 1 ) { 386 if ( exists $_[0][$i]{quant} 387 && $_[0][$i]{quant}{quant} eq '*' 388 && $_[0][$i]{quant}{greedy} eq '?' 389 ) { 390 my $tmp = { quant => { 391 %{ $_[0][$i]{quant} }, 392 greedy => '', quant => '' 393 }, 394 _pos => $_[0][$i]{_pos} 395 }; 396 $_[0][$i] = { 397 _pos => $_[0][$i]{_pos}, 398 quant => { 399 greedy => '', 400 quant => $_[0][$i]{quant}{quant}, 401 ws1 => '', 402 ws2 => '', 403 ws3 => '', 404 term => { 405 _pos => $_[0][$i]{_pos}, 406 concat => [ 407 { 408 _pos => $_[0][$i]{_pos}, 409 before => { 410 rule => { 411 _pos => $_[0][$i]{_pos}, 412 concat => [ 413 @{ $_[0] }[$i+1 .. $#{ $_[0] } ] 414 ], 415 }, 416 modifier => '!', 417 } 418 }, 419 $tmp, 420 ], 421 }, 422 }, 423 }; 424 #warn "Quant: ",Dumper($_[0]); 425 } 426 } 427 428 for ( @{$_[0]} ) { 429 my $tmp = emit_rule( $_, $_[1] ); 430 push @s, $tmp if $tmp; 431 } 432 @s = reverse @s if $direction eq '-'; 433 return 434"$_[1] ## <concat> 435$_[1] ## pos: @$RegexPos 436$_[1] (\n" . join( "\n$_[1] &&\n", @s ) . " 437$_[1] ) 438$_[1] ## </concat>\n"; 439} 440 441sub code { 442 return "$_[1] $_[0]\n"; 443} 444 445sub dot { 446 " 447$_[1] ## <dot> 448$_[1] ## pos: @$RegexPos 449$_[1] ( substr( \$s, \$pos$direction$direction, 1 ) ne '' ) 450$_[1] ## </dot>\n" 451} 452 453sub variable { 454 my $name = "$_[0]"; 455 my $value = undef; 456 # XXX - eval $name doesn't look up in user lexical pad 457 # XXX - what &xxx interpolate to? 458 459 #print "VAR: $name \n"; 460 # expand embedded $scalar 461 if ( $name =~ /^\$/ ) { 462 # $^a, $^b 463 if ( $name =~ /^ \$ \^ ([^\s]*) /x ) { 464 my $index = ord($1)-ord('a'); 465 #print "Variable #$index\n"; 466 #return "$_[1] constant( \$_[7][$index] )\n"; 467 468 my $code = 469 " ... sub { 470 #print \"Runtime Variable args[\", join(\",\",\@_) ,\"] \$_[7][$index]\\n\"; 471 return constant( \$_[7][$index] )->(\@_); 472 }"; 473 $code =~ s/^/$_[1]/mg; 474 return "$code\n"; 475 } 476 477 $value = eval $name; 478 } 479 480 # expand embedded @arrays 481 if ( $name =~ /^\@/ ) { 482 my $code = q! 483 join( 484 '|', 485 ! . $name . q! 486 ) 487 !; 488 return 489"$_[1] ## <variable> 490$_[1] ## pos: @$RegexPos 491$_[1] ( eval( '( substr( \$s, \$pos ) =~ m/^(' . $code . ')/ ) 492$_[1] ? ( \$pos $direction= length( \$1 ) or 1 ) 493$_[1] : 0 494$_[1] ') ) 495$_[1] ## </variable>\n"; 496 } 497 498 # expand embedded %hash 499 if ( $name =~ /^%/ ) { 500 my $id = '$' . id(); 501 my $preprocess_hash = 'Pugs::Runtime::Regex::preprocess_hash'; 502 my $code = 503" 504 ## <variable> 505 ## pos: @$RegexPos 506 do { 507 our $id; 508 our ${id}_sizes; 509 unless ( $id ) { 510 my \$hash = \\$name; 511 my \%sizes = map { length(\$_) => 1 } keys \%\$hash; 512 ${id}_sizes = [ sort { \$b <=> \$a } keys \%sizes ]; 513 " . #print \"sizes: \@${id}_sizes\\n\"; 514 "$id = \$hash; 515 } 516 " . #print 'keys: ',Dumper( $id ); 517 "my \$match = 0; 518 my \$key; 519 for ( \@". $id ."_sizes ) { 520 \$key = ( \$pos <= length( \$s ) 521 ? substr( \$s, \$pos, \$_ ) 522 : '' ); 523 " . #print \"try ".$name." \$_ = \$key; \$s\\\n\"; 524 "if ( exists ". $id ."->{\$key} ) { 525 #\$named{KEY} = \$key; 526 #\$::_V6_MATCH_ = \$m; 527 #print \"m: \", Dumper( \$::_V6_MATCH_->data ) 528 # if ( \$key eq 'until' ); 529 " . #print \"* ".$name."\{'\$key\'} at \$pos \\\n\"; 530 "\$match = $preprocess_hash( $id, \$key )->( \$s, \$grammar, { p => ( \$pos + \$_ ), positionals => [ ], args => { KEY => \$key } }, undef ); 531 " . #print \"match: \", Dumper( \$match->data ); 532 "last if \$match; 533 } 534 } 535 if ( \$match ) { 536 \$pos = \$match->to; 537 #print \"match: \$key at \$pos = \", Dumper( \$match->data ); 538 \$bool = 1; 539 }; # else { \$bool = 0 } 540 \$match; 541 } 542 ## </variable> 543"; 544 #print $code; 545 return $code; 546 } 547 die "interpolation of $name not implemented" 548 unless defined $value; 549 550 return call_constant( $value, $_[1] ); 551} 552sub special_char { 553 my ($char, $data) = $_[0] =~ /^.(.)(.*)/; 554 555 return call_perl5( '\\N{$data}', $_[1] ) 556 if $char eq 'c'; 557 return call_perl5( '(?!\\N{$data}).', $_[1] ) 558 if $char eq 'C'; 559 560 return call_perl5( '\\x{'.$data.'}', $_[1] ) 561 if $char eq 'x'; 562 return call_perl5( '(?!\\x{'.$data.'}).', $_[1] ) 563 if $char eq 'X'; 564 565 return special_char( sprintf("\\x%X", oct($data) ) ) 566 if $char eq 'o'; 567 return special_char( sprintf("\\X%X", oct($data) ) ) 568 if $char eq 'O'; 569 570 return call_perl5( '(?:\n\r?|\r\n?)', $_[1] ) 571 if $char eq 'n'; 572 return call_perl5( '(?!\n\r?|\r\n?).', $_[1] ) 573 if $char eq 'N'; 574 575 # XXX - Infinite loop in pugs stdrules.t 576 #return metasyntax( '?_horizontal_ws', $_[1] ) 577 return call_perl5( '[\x20\x09]' ) 578 if $char eq 'h'; 579 return call_perl5( '[^\x20\x09]' ) 580 if $char eq 'H'; 581 #return metasyntax( '?_vertical_ws', $_[1] ) 582 return call_perl5( '[\x0A\x0D]' ) 583 if $char eq 'v'; 584 return call_perl5( '[^\x0A\x0D]' ) 585 if $char eq 'V'; 586 587 for ( qw( r n t e f w d s ) ) { 588 return call_perl5( "\\$_", $_[1] ) if $char eq $_; 589 return call_perl5( "[^\\$_]", $_[1] ) if $char eq uc($_); 590 } 591 $char = '\\\\' if $char eq '\\'; 592 ### special char: $char 593 return call_constant( $char, $_[1] ); 594} 595 596sub match_variable { 597 my $name = $_[0]; 598 my $num = substr($name,1); 599 #print "var name: ", $num, "\n"; 600 601 return 602" 603$_[1] ## <match_variable> 604$_[1] ## pos: @$RegexPos 605$_[1] ( eval( '( substr( \$s, \$pos ) =~ m/^(' . \$m->{$num} . ')/ ) 606$_[1] ? ( \$pos $direction= length( \$1 ) or 1 ) 607$_[1] : 0 608$_[1] ') ) 609$_[1] ## </match_varaible> 610"; 611} 612 613sub closure { 614 #print "closure: ",Dumper($_[0]); 615 my $code = $_[0]{closure}; 616 my $modifier = $_[0]{modifier}; # 'plain', '', '?', '!' 617 618 die "invalid closure modifier: . " 619 if $modifier eq '.'; 620 621 #die "closure modifier not implemented '$modifier'" 622 # unless $modifier eq 'plain'; 623 624 if ( ref( $code ) 625 && defined $Pugs::Compiler::Perl6::VERSION 626 ) { 627 #print " perl6 compiler is loaded \n"; 628 $code = Pugs::Emitter::Perl6::Perl5::emit( 'grammar', $code, 'self' ); 629 $code = '{ my $_V6_SELF = shift; ' . $code . '}'; # make it a "method" 630 } 631 else { 632 #print " perl6 compiler is NOT loaded \n"; 633 # XXX XXX XXX - source-filter - temporary hacks to translate p6 to p5 634 # $()<name> 635 $code =~ s/ ([^']) \$ \$ (\d+) /$1\${ \$_[0]->[$2] }/sgx; 636 $code =~ s/ ([^']) \$ (\d+) /$1\$_[0]->[$2]/sgx; 637 $code =~ s/ ([^']) \$ \( \) < (.*?) > /$1\$_[0]->{$2}/sgx; 638 # $<name> 639 $code =~ s/ ([^']) \$ \$ < (.*?) > /$1\${ \$_[0]->{qw($2)} }/sgx; 640 $code =~ s/ ([^']) \$ < (.*?) > /$1\$_[0]->{qw($2)}/sgx; 641 # $() 642 $code =~ s/ ([^']) \$ \( \) /$1\$_[0]->()/sgx; 643 # $/ 644 $code =~ s/ ([^']) \$ \/ ([\{\[]) /$1\$_[0]->$2/sgx; 645 $code =~ s/ ([^']) \$ \/ /$1\$_[0]/sgx; 646 #$code =~ s/ use \s+ v6 \s* ; / # use v6\n/sgx; 647 } 648 #print "Code: $code\n"; 649 # "plain" {...return ...} 650 return 651 "$_[1] ## <closure>\n" 652 . "$_[1] ## pos: @$RegexPos\n" 653 . "$_[1] do {\n" 654 . "$_[1] local \$::_V6_SUCCEED = 1;\n" 655 . "$_[1] \$::_V6_MATCH_ = \$m;\n" 656 . "$_[1] \$m->data->{capture} = \\( sub $code->( \$m ) ); \n" 657 . "$_[1] \$bool = \$::_V6_SUCCEED;\n" 658 . "$_[1] \$::_V6_MATCH_ = \$m if \$bool; \n" 659 . "$_[1] return \$m if \$bool; \n" 660 . "$_[1] }\n" 661 . "$_[1] ## </closure>\n" 662 if $code =~ /return/; 663 664 # "plain" {...} without return 665 return 666 "$_[1] ## <closure>\n" 667 . "$_[1] ## pos: @$RegexPos\n" 668 . "$_[1] do { \n" 669 . "$_[1] local \$::_V6_SUCCEED = 1;\n" 670 . "$_[1] \$::_V6_MATCH_ = \$m;\n" 671 . "$_[1] sub $code->( \$m );\n" 672 . "$_[1] 1;\n" 673 . "$_[1] }\n" 674 . "$_[1] ## </closure>\n" 675 if $modifier eq 'plain'; 676 # "?" <?{...}> 677 return 678 "$_[1] ## <closure>\n" . 679 "$_[1] ## pos: @$RegexPos\n" . 680 "$_[1] do { \n" . 681 "$_[1] local \$::_V6_SUCCEED = 1;\n" . 682 "$_[1] \$::_V6_MATCH_ = \$m;\n" . 683 "$_[1] \$bool = ( sub $code->( \$m ) ) ? 1 : 0; \n" . 684 "$_[1] }" . 685 "$_[1] ## </closure>\n" 686 if $modifier eq '?'; 687 # "!" <!{...}> 688 return 689 "$_[1] ## <closure>\n" . 690 "$_[1] ## pos: @$RegexPos\n" . 691 "$_[1] do { \n" . 692 "$_[1] local \$::_V6_SUCCEED = 1;\n" . 693 "$_[1] \$::_V6_MATCH_ = \$m;\n" . 694 "$_[1] \$bool = ( sub $code->( \$m ) ) ? 0 : 1; \n" . 695 "$_[1] }" . 696 "$_[1] ## </closure>\n" 697 if $modifier eq '!'; 698 699} 700sub capturing_group { 701 my $program = $_[0]; 702 703 $capture_count++; 704 { 705 local $capture_count = -1; 706 local $capture_to_array = 0; 707 $program = emit_rule( $program, $_[1].' ' ) 708 if ref( $program ); 709 } 710 711 return " 712$_[1] ## <capture> 713$_[1] do{ 714$_[1] my \$hash = do { 715$_[1] my \$bool = 1; 716$_[1] my \$from = \$pos; 717$_[1] my \@match; 718$_[1] my \%named; 719$_[1] \$bool = 0 unless 720" . $program . "; 721$_[1] { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef } 722$_[1] }; 723$_[1] my \$bool = \${\$hash->{'bool'}};" . 724 ( $capture_to_array 725 ? " 726$_[1] if ( \$bool ) { 727$_[1] push \@{ \$match[ $capture_count ] }, Pugs::Runtime::Match->new( \$hash ); 728$_[1] }" 729 : " 730$_[1] \$match[ $capture_count ] = Pugs::Runtime::Match->new( \$hash );" 731 ) . " 732$_[1] \$bool; 733$_[1] } 734$_[1] ## </capture>\n"; 735} 736 737sub capture_as_result { 738 my $program = $_[0]; 739 740 $capture_count++; 741 { 742 local $capture_count = -1; 743 local $capture_to_array = 0; 744 $program = emit_rule( $program, $_[1].' ' ) 745 if ref( $program ); 746 } 747 return "$_[1] ## <capture> 748$_[1] ## pos: @$RegexPos 749$_[1] do{ 750$_[1] my \$hash = do { 751$_[1] my \$bool = 1; 752$_[1] my \$from = \$pos; 753$_[1] my \@match; 754$_[1] my \%named; 755$_[1] \$bool = 0 unless 756" . $program . "; 757$_[1] { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef } 758$_[1] }; 759$_[1] my \$bool = \${\$hash->{'bool'}}; 760$_[1] \$m->data->{capture} = \\( \"\" . Pugs::Runtime::Match->new( \$hash ) ); 761$_[1] \$bool; 762$_[1] } 763$_[1] ## </capture>\n"; 764} 765sub named_capture { 766 my $name = $_[0]{ident}; 767 ### $name 768 if (ref($name) eq 'HASH') { 769 $name = $name->{match_variable} || $name->{variable}; 770 } 771 $name =~ s/^[\$\@\%]//; # TODO - change semantics as needed 772 my $program = $_[0]{rule}; 773 #warn "name [$name]\n"; 774 775 if ( exists $program->{metasyntax} ) { 776 #print "aliased subrule\n"; 777 # $/<name> = $/<subrule> 778 779 my $cmd = $program->{metasyntax}{metasyntax}; 780 die "invalid aliased subrule" 781 unless $cmd =~ /^[_[:alnum:]]/; 782 783 # <subrule ( param, param ) > 784 my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd ); 785 $param_list = '' unless defined $param_list; 786 my @param = split( ',', $param_list ); 787 return "$_[1] ## <named_capture> 788$_[1] ## pos: @$RegexPos 789$_[1] do { 790 my \$prior = \$::_V6_PRIOR_; 791 my \$match =\n" . 792 call_subrule( $subrule, $_[1]." ", "", @param ) . "; 793 \$::_V6_PRIOR_ = \$prior; 794 if ( \$match ) {" . 795 ( $capture_to_array 796 ? " push \@{\$named{'$name'}}, \$match;" 797 : " \$named{'$name'} = \$match;" 798 ) . " 799 \$pos = \$match->to; 800 1 801 } 802 else { 0 } 803 } 804$_[1] ## </named_capture>\n"; 805 } 806 elsif ( exists $program->{capturing_group} ) { 807 #print "aliased capturing_group\n"; 808 # $/<name> = $/[0] 809 { 810 local $capture_count = -1; 811 local $capture_to_array = 0; 812 $program = emit_rule( $program, $_[1].' ' ) 813 if ref( $program ); 814 } 815 return "$_[1] ## <named_capture> 816$_[1] ## pos: @$RegexPos 817$_[1] do{ 818 my \$match = Pugs::Runtime::Match->new( do { 819 my \$bool = 1; 820 my \$from = \$pos; 821 my \@match; 822 my \%named; 823 \$bool = 0 unless " . 824 $program . "; 825 { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef } 826 } ); 827 if ( \$match ) {" . 828 ( $capture_to_array 829 ? " push \@{\$named{'$name'}}, \$match;" 830 : " \$named{'$name'} = \$match;" 831 ) . " 832 \$pos = \$match->to; 833 1 834 } 835 else { 0 } 836 } 837$_[1] ## </named_capture>\n"; 838 } 839 else { 840 #print "aliased non_capturing_group\n"; 841 # $/<name> = "$/" 842 #print Dumper( $_[0] ); 843 $program = emit_rule( $program, $_[1].' ' ); 844 return "$_[1] ## <named_capture> 845$_[1] ## pos: @$RegexPos 846$_[1] do{ 847 my \$from = \$pos; 848 my \$bool = $program; 849 my \$match = Pugs::Runtime::Match->new( 850 { str => \\\$s, from => \\\$from, match => [], named => {}, bool => \\1, to => \\(0+\$pos), capture => undef } 851 );" . 852 ( $capture_to_array 853 ? " push \@{\$named{'$name'}}, \$match;" 854 : " \$named{'$name'} = \$match;" 855 ) . " 856 \$bool 857 } 858$_[1] ## </named_capture>\n"; 859 } 860} 861sub negate { 862 my $program = $_[0]; 863 #print "Negate: ", Dumper($_[0]); 864 $program = emit_rule( $program, $_[1].' ' ) 865 if ref( $program ); 866 return "$_[1] ## <negate> 867$_[1] ## pos: @$RegexPos 868$_[1] do{ 869$_[1] my \$pos1 = \$pos; 870$_[1] do { 871$_[1] my \$pos = \$pos1; 872$_[1] my \$from = \$pos; 873$_[1] my \@match; 874$_[1] my \%named; 875$_[1] \$bool = " . $program . " ? 0 : 1; 876$_[1] \$bool; 877$_[1] }; 878$_[1] } 879$_[1] ## </negate>\n"; 880} 881 882sub before { 883 my $mod = delete $_[0]{modifier} || ''; 884 #### before atom: $_[0] 885 return negate( { before => $_[0], _pos => $_[0]{rule}{_pos}, }, $_[1] ) if $mod eq '!'; 886 my $program = $_[0]{rule}; 887 $program = emit_rule( $program, $_[1].' ' ) 888 if ref( $program ); 889 return " 890$_[1] ## <before> 891$_[1] ## pos: @$RegexPos 892$_[1] do{ 893$_[1] my \$pos1 = \$pos; 894$_[1] do { 895$_[1] my \$pos = \$pos1; 896$_[1] my \$from = \$pos; 897$_[1] my \@match; 898$_[1] my \%named; 899$_[1] \$bool = 0 unless 900" . $program . "; 901$_[1] \$bool; 902$_[1] }; 903$_[1] } 904$_[1] ## </before>\n"; 905} 906 907sub after { 908 my $mod = delete $_[0]{modifier}; 909 return negate( { after => $_[0] }, $_[1] ) if $mod eq '!'; 910 local $direction = "-"; 911 my $program = $_[0]{rule}; 912 $program = emit_rule( $program, $_[1].' ' ) 913 if ref( $program ); 914 return "$_[1] ## <after> 915$_[1] ## pos: @$RegexPos 916$_[1] do{ 917$_[1] my \$pos1 = \$pos; 918$_[1] do { 919$_[1] my \$pos = \$pos1 - 1; 920$_[1] my \$from = \$pos; 921$_[1] my \@match; 922$_[1] my \%named; 923$_[1] \$bool = 0 unless 924" . $program . "; 925$_[1] \$bool; 926$_[1] }; 927$_[1] } 928$_[1] ## </after>\n"; 929} 930 931sub colon { 932 my $str = $_[0]; 933 return "$_[1] 1 # : no-op\n" 934 if $str eq ':'; 935 return "$_[1] ( \$pos >= length( \$s ) )\n" 936 if $str eq '$'; 937 return "$_[1] ( \$pos == 0 )\n" 938 if $str eq '^'; 939 940 return "$_[1] ( \$pos >= length( \$s ) || substr( \$s, \$pos ) =~ ".'/^(?:\n\r?|\r\n?)/m'." )\n" 941 if $str eq '$$'; 942 return "$_[1] ( \$pos == 0 || substr( \$s, 0, \$pos ) =~ ".'/(?:\n\r?|\r\n?)$/m'." )\n" 943 if $str eq '^^'; 944 945 return metasyntax( { metasyntax => '_wb_left', modifier => '?' }, $_[1] ) 946 if $str eq '<<'; 947 return metasyntax( { metasyntax => '_wb_right', modifier => '?' }, $_[1] ) 948 if $str eq '>>'; 949 950 die "'$str' not implemented"; 951} 952sub modifier { 953 my $str = $_[0]; 954 die "modifier '$str' not implemented"; 955} 956sub constant { 957 call_constant( @_ ); 958} 959 960sub char_class { 961 my $cmd = Pugs::Emitter::Rule::Perl5::CharClass::emit( $_[0] ); 962 return call_perl5($cmd, $_[1]); 963} 964 965sub call { 966 #die "not implemented: ", Dumper(\@_); 967 my $param = $_[0]{params}; 968 my $name = $_[0]{method}; 969 # capturing subrule 970 # <subrule ( param, param ) > 971 my ($param_list) = $param =~ /\{(.*)\}/; 972 $param_list = '' unless defined $param_list; 973 my @param = split( ',', $param_list ); 974 #print "param: ", Dumper(\@param); 975 976 # TODO 977 978 if ( $name eq 'at' ) { 979 $param_list ||= 0; # XXX compile-time only 980 return "$_[1] ( \$pos == $param_list )\n" 981 } 982 983 return named_capture( 984 { 985 ident => $name, 986 rule => { metasyntax => { metasyntax => $name }, _pos => $_[0]{_pos}, }, 987 }, 988 $_[1], 989 ); 990} 991 992sub metasyntax { 993 # <cmd> 994 #print Dumper(\@_); 995 my $cmd = $_[0]{metasyntax}; 996 my $modifier = delete $_[0]{modifier} || ''; # . ? ! 997 return negate( { metasyntax => $_[0], _pos => $_[0]{_pos} }, $_[1] ) if $modifier eq '!'; 998 999 my $prefix = substr( $cmd, 0, 1 ); 1000 if ( $prefix eq '@' ) { 1001 # XXX - wrap @array items - see end of Pugs::Grammar::Rule 1002 # TODO - param list 1003 my $name = substr( $cmd, 1 ); 1004 return 1005 "$_[1] ## <metasyntax> 1006$_[1] ## pos: @$RegexPos 1007$_[1] do { 1008 my \$match; 1009 for my \$subrule ( $cmd ) { 1010 \$match = \$subrule->match( \$s, \$grammar, { p => ( \$pos ), positionals => [ ], args => {} }, undef ); 1011 last if \$match; 1012 } 1013 if ( \$match ) {" . 1014 ( $capture_to_array 1015 ? " push \@{\$named{'$name'}}, \$match;" 1016 : " \$named{'$name'} = \$match;" 1017 ) . " 1018 \$pos = \$match->to; 1019 1 1020 } 1021 else { 0 } 1022 } 1023$_[1] ## </metasyntax>\n"; 1024 } 1025 1026 if ( $prefix eq '%' ) { 1027 # XXX - runtime or compile-time interpolation? 1028 my $name = substr( $cmd, 1 ); 1029 # print "<$cmd>\n"; 1030 # return variable( $cmd ); 1031 return "$_[1]## <metasyntax> 1032$_[1] ## pos: @$RegexPos 1033$_[1] do{ 1034 my \$match = " . variable( $cmd, $_[1] ) . "; 1035 if ( \$match ) {" . 1036 ( $capture_to_array 1037 ? " push \@{\$named{'$name'}}, \$match;" 1038 : " \$named{'$name'} = \$match;" 1039 ) . " 1040 \$pos = \$match->to; 1041 1 1042 } 1043 else { 0 } 1044 }\n$_[1]## </metasyntax>\n"; 1045 } 1046 1047 if ( $prefix eq '$' ) { 1048 if ( $cmd =~ /::/ ) { 1049 # call method in fully qualified $package::var 1050 # ...->match( $rule, $str, $grammar, $flags, $state ) 1051 # TODO - send $pos to subrule 1052 return 1053 "$_[1] ## <metasyntax>\n" . 1054 "$_[1] ## pos: @$RegexPos\n" . 1055 "$_[1] do {\n" . 1056 "$_[1] push \@match,\n" . 1057 "$_[1] $cmd->match( \$s, \$grammar, {p => \$pos}, undef );\n" . 1058 "$_[1] \$pos = \$match[-1]->to;\n" . 1059 "$_[1] !\$match[-1] != 1;\n" . 1060 "$_[1] }\n" . 1061 "$_[1] ## </metasyntax>\n"; 1062 } 1063 # call method in lexical $var 1064 # TODO - send $pos to subrule 1065 return 1066 "$_[1] ## <metasyntax>\n" . 1067 "$_[1] ## pos: @$RegexPos\n" . 1068 "$_[1] do {\n" . 1069 "$_[1] my \$r = Pugs::Runtime::Regex::get_variable( '$cmd' );\n" . 1070 "$_[1] push \@match,\n" . 1071 "$_[1] \$r->match( \$s, \$grammar, {p => \$pos}, undef );\n" . 1072 "$_[1] \$pos = \$match[-1]->to;\n" . 1073 "$_[1] !\$match[-1] != 1;\n" . 1074 "$_[1] }\n" . 1075 "$_[1] ## </metasyntax>\n"; 1076 } 1077 if ( $prefix eq q(') ) { # single quoted literal ' 1078 $cmd = substr( $cmd, 1, -1 ); 1079 return call_constant( $cmd, $_[1] ); 1080 } 1081 if ( $prefix eq q(") ) { # interpolated literal " 1082 $cmd = substr( $cmd, 1, -1 ); 1083 warn "<\"...\"> not implemented"; 1084 return; 1085 } 1086 if ( 1087 $modifier eq '.' 1088 || $modifier eq '?' # XXX FIXME 1089 ) 1090 { # non_capturing_subrule / code assertion 1091 #$cmd = substr( $cmd, 1 ); 1092 if ( $cmd =~ /^{/ ) { 1093 warn "code assertion not implemented"; 1094 return; 1095 } 1096 my @param; # TODO 1097 my $subrule = $cmd; 1098 return 1099"$_[1] ## <metasyntax> 1100$_[1] ## pos: @$RegexPos 1101$_[1] do { 1102$_[1] my \$prior = \$::_V6_PRIOR_; 1103$_[1] my \$match =\n" . 1104 call_subrule( $subrule, $_[1]." ", "", @param ) . "; 1105$_[1] \$::_V6_PRIOR_ = \$prior; 1106$_[1] my \$bool = (!\$match != 1); 1107$_[1] \$pos = \$match->to if \$bool; 1108$_[1] \$match; 1109$_[1] } 1110$_[1] ## </metasyntax>\n"; 1111 } 1112 if ( $prefix =~ /[_[:alnum:]]/ ) { 1113 if ( $cmd eq 'cut' ) { 1114 warn "<$cmd> not implemented"; 1115 return; 1116 } 1117 if ( $cmd eq 'commit' ) { 1118 warn "<$cmd> not implemented"; 1119 return; 1120 } 1121 if ( $cmd eq 'null' ) { 1122 return "$_[1] 1 # null\n" 1123 } 1124 # <subrule ( param, param ) > 1125 my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd ); 1126 $param_list ||= ''; 1127 1128 if ( $subrule eq 'at' ) { 1129 $param_list ||= 0; # XXX compile-time only 1130 return "$_[1] ( \$pos == $param_list )\n" 1131 } 1132 1133 return named_capture( 1134 { 1135 ident => $subrule, 1136 rule => { metasyntax => { metasyntax => $cmd }, _pos => $_[0]->{_pos} }, 1137 }, 1138 $_[1], 1139 ); 1140 } 1141 #### $prefix 1142 #### $modifier 1143 #if ( $prefix eq '.' ) { 1144 # my ( $method, $param_list ) = split( /[\(\)]/, $cmd ); 1145 # $method =~ s/^\.//; 1146 # $param_list ||= ''; 1147 # return " ( \$s->$method( $param_list ) ? 1 : 0 ) "; 1148 #} 1149 die "<$cmd> not implemented"; 1150} 1151 11521; 1153