1package Pugs::Emitter::Rule::Perl5::Ratchet;
2
3# p6-rule perl5 emitter for ":ratchet" (non-backtracking)
4# see: RuleInline.pl, RuleInline-more.pl for a program prototype
5
6#use Smart::Comments '####';
7use strict;
8use warnings;
9use Pugs::Emitter::Rule::Perl5::CharClass;
10use Data::Dumper;
11$Data::Dumper::Indent = 1;
12
13our $direction = "+";  # XXX make lexical
14our $sigspace = 0;
15our $capture_count;
16our $capture_to_array;
17our $RegexPos;
18
19our $count;
20sub id {
21    if (!defined $count) {
22        if (defined $::PCR_SEED) {
23            #warn "SET SEED!!!";
24            srand($::PCR_SEED);
25        }
26        $count = 1000 + int(rand(1000));
27    }
28    'I' . ($count++)
29}
30
31sub call_subrule {
32    my ( $subrule, $tab, $positionals, @param ) = @_;
33    $subrule = "\$grammar->" . $subrule
34        unless $subrule =~ / :: | \. | -> /x;
35    $subrule =~ s/\./->/;   # XXX - source filter
36
37    $positionals = shift @param if $positionals eq '' && @param == 1;
38
39    return
40"$tab     $subrule( \$s, { "
41        . "p => \$pos, "
42        . "positionals => [ $positionals ], "
43        . "args => {" .
44                 join(", ",@param) .
45             "}, "
46        . "}, undef )";
47}
48
49sub quote_constant {
50    my $const;
51    if ( $_[0] eq "\\" ) {
52        $const = "chr(".ord("\\").")";
53    }
54    elsif ( $_[0] eq "'" ) {
55        $const = "chr(".ord("'").")"
56    }
57    else {
58        $const = "'$_[0]'"
59    }
60    return $const;
61}
62
63sub call_constant {
64    return " 1 # null constant\n"
65        unless length($_[0]);
66    my $const = quote_constant( $_[0] );
67    my $len = length( eval $const );
68    #print "Const: [$_[0]] $const $len \n";
69    return
70"
71$_[1] ## <constant>
72$_[1] ## pos: @$RegexPos
73$_[1] ( ( substr( \$s, \$pos, $len ) eq $const )
74$_[1]     ? ( \$pos $direction= $len or 1 )
75$_[1]     : 0
76$_[1] )
77$_[1] ## </constant>\n";
78}
79
80sub call_perl5 {
81    my $const = $_[0];
82    $_[1] = '  ' unless defined $_[1];
83    #print "CONST: $const - $direction \n";
84    return
85"$_[1] ## <perl5>
86$_[1] ( ( substr( \$s, \$pos ) =~ m/^($const)/ )
87$_[1]     ? ( \$pos $direction= length( \$1 ) or 1 )
88$_[1]     : 0
89$_[1] )
90$_[1] ## </perl5>\n";
91}
92
93sub emit {
94    my ($grammar, $ast, $param) = @_;
95    # runtime parameters: $grammar, $string, $state, $arg_list
96    # rule parameters: see Runtime::Rule.pm
97    local $sigspace = $param->{sigspace} ? 1 : 0;   # XXX - $sigspace should be lexical
98    ### ratchet emit sigspace: $sigspace
99    local $capture_count = -1;
100    local $capture_to_array = 0;
101    #print "rule: ", Dumper( $ast );
102    return
103        "## <global>
104## sigspace: $sigspace
105## ratchet: 1
106do { my \$rule; \$rule = sub {
107  my \$grammar = \$_[0];
108  my \$s = \$_[1];
109  \$_[3] = \$_[2] unless defined \$_[3]; # backwards compat
110  no warnings 'substr', 'uninitialized', 'syntax';
111  my \%pad;\n" .
112        #"  my \$pos;\n" .
113        #"  print \"match arg_list = \$_[1]\n\";\n" .
114        #"  print 'match ', Dumper(\\\@_);\n" .
115        #"  print \"match arg_list = \@{[\%{\$_[1]} ]}\n\" if defined \$_[1];\n" .
116        #"  warn \"match pos = \", pos(\$_[1]), \"\\n\";\n" .
117"  my \$m;
118  my \$bool;
119  my \@pos;
120  # XXX :pos(X) takes the precedence over :continue ?
121  if (defined \$_[3]{p}) {
122    push \@pos, \$_[3]{p} || 0;
123  } elsif (\$_[3]{continue}) {
124    push \@pos, (pos(\$_[1]) || 0) .. length(\$s);
125  } else {
126    push \@pos, 0..length(\$s);
127  }
128  for my \$pos ( \@pos ) {
129    my \%index;
130    my \@match;
131    my \%named;
132    \$bool = 1;
133    \$named{KEY} = \$_[3]{KEY} if exists \$_[3]{KEY};
134    \$m = Pugs::Runtime::Match->new( {
135      str => \\\$s, from => \\(0+\$pos), to => \\(\$pos),
136      bool => \\\$bool, match => \\\@match, named => \\\%named, capture => undef,
137    } );
138    {
139      my \$prior = \$::_V6_PRIOR_;
140      local \$::_V6_PRIOR_ = \$prior;
141      \$bool = 0 unless
142" .
143        #"      do { TAILCALL: ;\n" .
144        emit_rule( $ast, '    ' ) . ";
145    }
146    if ( \$bool ) {
147      my \$prior = \$::_V6_PRIOR_;
148      \$::_V6_PRIOR_ = sub {
149        local \$main::_V6_PRIOR_ = \$prior;
150        \$rule->(\@_);
151      };
152      #warn \"pos2 = \", \$pos, \"\\n\";
153      pos(\$_[1]) = \$pos if \$_[3]{continue};
154      last;
155    }
156  } # /for
157  \$::_V6_MATCH_ = \$m;
158  return \$m;
159} }
160## </global>\n";
161}
162
163sub emit_rule {
164    my $n = $_[0];
165    my $tab = $_[1] . '  ';
166    die "unknown node: ", Dumper( $n )
167        unless ref( $n ) eq 'HASH';
168    #print "NODE ", Dumper($n);
169    my @keys = grep { substr($_, 0, 1) ne '_' } keys %$n;
170    ### Node keys: @keys
171    my ($k) = @keys;
172    my $v = $n->{$k};
173    local $RegexPos = $n->{_pos};
174    ### $RegexPos
175    if (!defined $RegexPos) {
176    #    warn "WARNING: No _pos slot found for AST node '$k'.\n";
177    #    warn Dumper($n);
178        $RegexPos = [];
179    }
180    # XXX - use real references
181    no strict 'refs';
182    #print "NODE ", Dumper($k), ", ", Dumper($v);
183    my $code = $k->( $v, $tab );
184    return $code;
185}
186
187#rule nodes
188
189sub non_capturing_group {
190    return emit_rule( $_[0], $_[1] );
191}
192sub quant {
193    my $term = $_[0]->{'term'};
194    my $quantifier = $_[0]->{quant}  || '';
195    my $greedy     = $_[0]->{greedy} || '';   # + ?
196    die "greediness control not implemented: $greedy"
197        if $greedy;
198    #print "QUANT: ",Dumper($_[0]);
199    my $id = id();
200    my $tab = ( $quantifier eq '' ) ? $_[1] : $_[1] . "  ";
201    my $ws = metasyntax( { metasyntax => 'ws', modifier => '.' }, $tab );
202    my $ws3 = ( $sigspace && $_[0]->{ws3} ne '' ) ? " &&\n$ws" : '';
203
204    my $rul;
205    {
206        #print "Term: ", Dumper($term), "\n";
207        my $cap = $capture_to_array;
208        local $capture_to_array = $cap || ( $quantifier ne '' );
209        $rul = emit_rule( $term, $tab );
210
211        # rollback on fail
212        $rul = "$_[1]  ( "
213            .  "  ( \$pad{$id} = \$pos or 1 ) &&\n"
214            .     $rul
215            .  " ||"
216            .  "    ( ( \$pos = \$pad{$id} ) && 0 )"
217            .  " )";
218    }
219
220    $rul = "$ws &&\n$rul" if $sigspace && $_[0]->{ws1} ne '';
221    $rul = "$rul &&\n$ws" if $sigspace && $_[0]->{ws2} ne '';
222    #print $rul;
223    return "
224$_[1] ## <group>
225$_[1] ## pos: @$RegexPos
226" . $rul . "
227$_[1] ## </group>\n"
228        if $quantifier eq '';
229    # *  +  ?
230    # TODO: *? +? ??
231    # TODO: *+ ++ ?+
232    # TODO: quantifier + capture creates Array
233    #warn Dumper( $quantifier );
234    if ( ref( $quantifier ) eq 'HASH' )
235    {
236        my $code = $quantifier->{closure};
237        if ( ref( $code ) ) {
238            if ( defined $Pugs::Compiler::Perl6::VERSION ) {
239                #print " perl6 compiler is loaded \n";
240                $code = Pugs::Emitter::Perl6::Perl5::emit( 'grammar', $code, 'self' );
241            }
242        };
243        my @count = eval $code;
244        #warn "code: $code = [ @count ]";
245
246        die "quantifier not implemented: " . Dumper( $quantifier )
247            if @count ne 1
248            || $count[0] == 0;
249
250        return
251            "$_[1] ## <quant>\n" .
252            "$_[1] ## pos: @$RegexPos\n" .
253            "$_[1] (\n" .
254            join( ' && ', ($rul) x $count[0] ) .
255            "\n" .
256            "$_[1] )$ws3\n" .
257            "$_[1] ## </quant>\n";
258    }
259    return
260        "$_[1] ## <quant>\n" .
261	"$_[1] ## pos: @$RegexPos\n" .
262        "$_[1] (\n$rul\n" .
263        "$_[1] || ( \$bool = 1 )\n" .
264        "$_[1] )$ws3\n" .
265        "$_[1] ## </quant>\n"
266        if $quantifier eq '?';
267    return
268        "$_[1] ## <quant>\n" .
269        "$_[1] ## pos: @$RegexPos\n" .
270        "$_[1] do { while (\n$rul) {}; \$bool = 1 }$ws3\n" .
271        "$_[1] ## </quant>\n"
272        if $quantifier eq '*';
273    return
274        "$_[1] ## <quant>\n" .
275        "$_[1] ## pos: @$RegexPos\n" .
276        "$_[1] (\n$rul\n" .
277        "$_[1] && do { while (\n$rul) {}; \$bool = 1 }\n" .
278        "$_[1] )$ws3\n" .
279        "$_[1] ## </quant>\n"
280        if $quantifier eq '+';
281    die "quantifier not implemented: $quantifier";
282}
283
284sub alt {
285    my @s;
286    # print 'Alt: ';
287    my $count = $capture_count;
288    my $max = -1;
289    my $id = id();
290    for ( @{$_[0]} ) {
291        $capture_count = $count;
292        my $tmp = emit_rule( $_, $_[1].'  ' );
293        # print ' ',$capture_count;
294        $max = $capture_count
295            if $capture_count > $max;
296        push @s, $tmp if $tmp;
297    }
298    $capture_count = $max;
299    # print " max = $capture_count\n";
300    return
301        "$_[1] ## <alt>
302$_[1] ## pos: @$RegexPos
303$_[1] (
304$_[1]     ( \$pad{$id} = \$pos or 1 )
305$_[1]     && (
306" . join( "
307$_[1]     )
308$_[1]   || (
309$_[1]     ( ( \$bool = 1 ) && ( \$pos = \$pad{$id} ) or 1 )
310$_[1]     && ",
311          @s
312    ) . "
313$_[1]   )
314$_[1] )
315$_[1] ## </alt>\n";
316}
317sub alt1 { &alt }
318sub conjunctive {
319    my @s;
320    # print 'conjunctive: ';
321    my $count = $capture_count;
322    my $max = -1;
323    my $id = id();
324    for ( @{$_[0]} ) {
325        $capture_count = $count;
326        my $tmp = emit_rule( $_, $_[1].'  ' );
327        # print ' ',$capture_count;
328        $max = $capture_count
329            if $capture_count > $max;
330        push @s, $tmp if $tmp;
331    }
332    $capture_count = $max;
333    # print " max = $capture_count\n";
334    return
335        "$_[1] ## <conjunctive>
336$_[1] ## pos: @$RegexPos
337$_[1] (
338$_[1]     ( \$pad{$id} = \$pos or 1 )
339$_[1]     && (
340" . join( "
341$_[1]     )
342$_[1]   && (
343$_[1]     ( ( \$bool = 1 ) && ( \$pos = \$pad{$id} ) or 1 )
344$_[1]     && ",
345          @s
346    ) . "
347$_[1]   )
348$_[1] )
349$_[1] ## </conjunctive>\n";
350}
351sub conjunctive1 { &conjunctive }
352sub concat {
353    my @s;
354
355=for optimizing
356    # optimize for the common case of "words"
357    # Note: this optimization has almost no practical effect
358    my $is_constant = 0;
359    for ( @{$_[0]} ) {
360        if ( ! $sigspace && exists $_->{quant} ) {
361            my $was_constant = $is_constant;
362            $is_constant =
363                   $_->{quant}->{quant} eq ''
364                && exists $_->{quant}->{term}->{constant};
365            #print "concat: ", Dumper( $_ );
366            if ( $is_constant && $was_constant && $direction ne '-' ) {
367                $s[-1]->{quant}->{term}->{constant} .=
368                    $_->{quant}->{term}->{constant};
369                #print "constant: ",$s[-1]->{quant}->{term}->{constant},"\n";
370                next;
371            }
372        }
373        push @s, $_;
374    }
375
376    for ( @s ) {
377        $_ = emit_rule( $_, $_[1] );
378    }
379=cut
380
381    # Try to remove non-greedy quantifiers, by inserting a lookahead;
382    # cheat: / .*? b /
383    # into:  / [ <!before b> . ]* b /
384    # TODO - make it work for '+' quantifier too
385    for my $i ( 0 .. @{$_[0]} - 1 ) {
386        if (   exists $_[0][$i]{quant}
387            && $_[0][$i]{quant}{quant}  eq '*'
388            && $_[0][$i]{quant}{greedy} eq '?'
389        ) {
390            my $tmp = { quant => {
391                    %{ $_[0][$i]{quant} },
392                    greedy => '', quant => ''
393                },
394                _pos => $_[0][$i]{_pos}
395            };
396            $_[0][$i] = {
397                _pos => $_[0][$i]{_pos},
398                quant => {
399                    greedy => '',
400                    quant  => $_[0][$i]{quant}{quant},
401                    ws1    => '',
402                    ws2    => '',
403                    ws3    => '',
404                    term   => {
405                        _pos => $_[0][$i]{_pos},
406                        concat => [
407                            {
408                                _pos => $_[0][$i]{_pos},
409                                before => {
410                                    rule     => {
411                                        _pos => $_[0][$i]{_pos},
412                                        concat => [
413                                            @{ $_[0] }[$i+1 .. $#{ $_[0] } ]
414                                        ],
415                                    },
416                                    modifier => '!',
417                                }
418                            },
419                            $tmp,
420                        ],
421                    },
422                },
423            };
424            #warn "Quant: ",Dumper($_[0]);
425        }
426    }
427
428    for ( @{$_[0]} ) {
429        my $tmp = emit_rule( $_, $_[1] );
430        push @s, $tmp if $tmp;
431    }
432    @s = reverse @s if $direction eq '-';
433    return
434"$_[1] ## <concat>
435$_[1] ## pos: @$RegexPos
436$_[1] (\n" . join( "\n$_[1] &&\n", @s ) . "
437$_[1] )
438$_[1] ## </concat>\n";
439}
440
441sub code {
442    return "$_[1] $_[0]\n";
443}
444
445sub dot {
446    "
447$_[1] ## <dot>
448$_[1] ## pos: @$RegexPos
449$_[1] ( substr( \$s, \$pos$direction$direction, 1 ) ne '' )
450$_[1] ## </dot>\n"
451}
452
453sub variable {
454    my $name = "$_[0]";
455    my $value = undef;
456    # XXX - eval $name doesn't look up in user lexical pad
457    # XXX - what &xxx interpolate to?
458
459    #print "VAR: $name \n";
460    # expand embedded $scalar
461    if ( $name =~ /^\$/ ) {
462        # $^a, $^b
463        if ( $name =~ /^ \$ \^ ([^\s]*) /x ) {
464            my $index = ord($1)-ord('a');
465            #print "Variable #$index\n";
466            #return "$_[1] constant( \$_[7][$index] )\n";
467
468            my $code =
469            "    ... sub {
470                #print \"Runtime Variable args[\", join(\",\",\@_) ,\"] \$_[7][$index]\\n\";
471                return constant( \$_[7][$index] )->(\@_);
472            }";
473            $code =~ s/^/$_[1]/mg;
474            return "$code\n";
475        }
476
477            $value = eval $name;
478    }
479
480    # expand embedded @arrays
481    if ( $name =~ /^\@/ ) {
482      my $code = q!
483          join(
484            '|',
485            ! . $name . q!
486          )
487      !;
488    return
489"$_[1] ## <variable>
490$_[1] ## pos: @$RegexPos
491$_[1] ( eval( '( substr( \$s, \$pos ) =~ m/^(' . $code . ')/ )
492$_[1]     ? ( \$pos $direction= length( \$1 ) or 1 )
493$_[1]     : 0
494$_[1]    ') )
495$_[1] ## </variable>\n";
496    }
497
498    # expand embedded %hash
499    if ( $name =~ /^%/ ) {
500        my $id = '$' . id();
501        my $preprocess_hash = 'Pugs::Runtime::Regex::preprocess_hash';
502        my $code =
503"
504          ## <variable>
505          ## pos: @$RegexPos
506          do {
507            our $id;
508            our ${id}_sizes;
509            unless ( $id ) {
510                my \$hash = \\$name;
511                my \%sizes = map { length(\$_) => 1 } keys \%\$hash;
512                ${id}_sizes = [ sort { \$b <=> \$a } keys \%sizes ];
513                " . #print \"sizes: \@${id}_sizes\\n\";
514                "$id = \$hash;
515            }
516            " . #print 'keys: ',Dumper( $id );
517            "my \$match = 0;
518            my \$key;
519            for ( \@". $id ."_sizes ) {
520                \$key = ( \$pos <= length( \$s )
521                            ? substr( \$s, \$pos, \$_ )
522                            : '' );
523                " . #print \"try ".$name." \$_ = \$key; \$s\\\n\";
524                "if ( exists ". $id ."->{\$key} ) {
525                    #\$named{KEY} = \$key;
526                    #\$::_V6_MATCH_ = \$m;
527                    #print \"m: \", Dumper( \$::_V6_MATCH_->data )
528                    #    if ( \$key eq 'until' );
529                    " . #print \"* ".$name."\{'\$key\'} at \$pos \\\n\";
530                    "\$match = $preprocess_hash( $id, \$key )->( \$s, \$grammar, { p => ( \$pos + \$_ ), positionals => [ ], args => { KEY => \$key } }, undef );
531                    " . #print \"match: \", Dumper( \$match->data );
532                    "last if \$match;
533                }
534            }
535            if ( \$match ) {
536                \$pos = \$match->to;
537                #print \"match: \$key at \$pos = \", Dumper( \$match->data );
538                \$bool = 1;
539            }; # else { \$bool = 0 }
540            \$match;
541          }
542          ## </variable>
543";
544        #print $code;
545        return $code;
546    }
547    die "interpolation of $name not implemented"
548        unless defined $value;
549
550    return call_constant( $value, $_[1] );
551}
552sub special_char {
553    my ($char, $data) = $_[0] =~ /^.(.)(.*)/;
554
555    return call_perl5( '\\N{$data}', $_[1] )
556        if $char eq 'c';
557    return call_perl5( '(?!\\N{$data}).', $_[1] )
558        if $char eq 'C';
559
560    return call_perl5( '\\x{'.$data.'}', $_[1] )
561        if $char eq 'x';
562    return call_perl5( '(?!\\x{'.$data.'}).', $_[1] )
563        if $char eq 'X';
564
565    return special_char( sprintf("\\x%X", oct($data) ) )
566        if $char eq 'o';
567    return special_char( sprintf("\\X%X", oct($data) ) )
568        if $char eq 'O';
569
570    return  call_perl5( '(?:\n\r?|\r\n?)', $_[1] )
571        if $char eq 'n';
572    return  call_perl5( '(?!\n\r?|\r\n?).', $_[1] )
573        if $char eq 'N';
574
575    # XXX - Infinite loop in pugs stdrules.t
576    #return metasyntax( '?_horizontal_ws', $_[1] )
577    return call_perl5( '[\x20\x09]' )
578        if $char eq 'h';
579    return call_perl5( '[^\x20\x09]' )
580        if $char eq 'H';
581    #return metasyntax( '?_vertical_ws', $_[1] )
582    return call_perl5( '[\x0A\x0D]' )
583        if $char eq 'v';
584    return call_perl5( '[^\x0A\x0D]' )
585        if $char eq 'V';
586
587    for ( qw( r n t e f w d s ) ) {
588        return call_perl5(   "\\$_",  $_[1] ) if $char eq $_;
589        return call_perl5( "[^\\$_]", $_[1] ) if $char eq uc($_);
590    }
591    $char = '\\\\' if $char eq '\\';
592    ### special char: $char
593    return call_constant( $char, $_[1] );
594}
595
596sub match_variable {
597    my $name = $_[0];
598    my $num = substr($name,1);
599    #print "var name: ", $num, "\n";
600
601    return
602"
603$_[1] ## <match_variable>
604$_[1] ## pos: @$RegexPos
605$_[1] ( eval( '( substr( \$s, \$pos ) =~ m/^(' . \$m->{$num} . ')/ )
606$_[1]     ? ( \$pos $direction= length( \$1 ) or 1 )
607$_[1]     : 0
608$_[1]    ') )
609$_[1] ## </match_varaible>
610";
611}
612
613sub closure {
614    #print "closure: ",Dumper($_[0]);
615    my $code     = $_[0]{closure};
616    my $modifier = $_[0]{modifier};  # 'plain', '', '?', '!'
617
618    die "invalid closure modifier: . "
619        if $modifier eq '.';
620
621    #die "closure modifier not implemented '$modifier'"
622    #    unless $modifier eq 'plain';
623
624    if (   ref( $code )
625        && defined $Pugs::Compiler::Perl6::VERSION
626    ) {
627        #print " perl6 compiler is loaded \n";
628        $code = Pugs::Emitter::Perl6::Perl5::emit( 'grammar', $code, 'self' );
629        $code = '{ my $_V6_SELF = shift; ' . $code . '}';  # make it a "method"
630    }
631    else {
632        #print " perl6 compiler is NOT loaded \n";
633        # XXX XXX XXX - source-filter - temporary hacks to translate p6 to p5
634        # $()<name>
635        $code =~ s/ ([^']) \$ \$ (\d+) /$1\${ \$_[0]->[$2] }/sgx;
636        $code =~ s/ ([^']) \$ (\d+) /$1\$_[0]->[$2]/sgx;
637        $code =~ s/ ([^']) \$ \( \) < (.*?) > /$1\$_[0]->{$2}/sgx;
638        # $<name>
639        $code =~ s/ ([^']) \$ \$ < (.*?) > /$1\${ \$_[0]->{qw($2)} }/sgx;
640        $code =~ s/ ([^']) \$ < (.*?) > /$1\$_[0]->{qw($2)}/sgx;
641        # $()
642        $code =~ s/ ([^']) \$ \( \) /$1\$_[0]->()/sgx;
643        # $/
644        $code =~ s/ ([^']) \$ \/ ([\{\[]) /$1\$_[0]->$2/sgx;
645        $code =~ s/ ([^']) \$ \/ /$1\$_[0]/sgx;
646        #$code =~ s/ use \s+ v6 \s* ; / # use v6\n/sgx;
647    }
648    #print "Code: $code\n";
649    # "plain" {...return ...}
650    return
651          "$_[1] ## <closure>\n"
652        . "$_[1] ## pos: @$RegexPos\n"
653        . "$_[1] do {\n"
654        . "$_[1]   local \$::_V6_SUCCEED = 1;\n"
655        . "$_[1]   \$::_V6_MATCH_ = \$m;\n"
656        . "$_[1]   \$m->data->{capture} = \\( sub $code->( \$m ) ); \n"
657        . "$_[1]   \$bool = \$::_V6_SUCCEED;\n"
658        . "$_[1]   \$::_V6_MATCH_ = \$m if \$bool; \n"
659        . "$_[1]   return \$m if \$bool; \n"
660        . "$_[1] }\n"
661        . "$_[1] ## </closure>\n"
662        if $code =~ /return/;
663
664    # "plain" {...} without return
665    return
666          "$_[1] ## <closure>\n"
667        . "$_[1] ## pos: @$RegexPos\n"
668        . "$_[1] do { \n"
669        . "$_[1]   local \$::_V6_SUCCEED = 1;\n"
670        . "$_[1]   \$::_V6_MATCH_ = \$m;\n"
671        . "$_[1]   sub $code->( \$m );\n"
672        . "$_[1]   1;\n"
673        . "$_[1] }\n"
674        . "$_[1] ## </closure>\n"
675        if $modifier eq 'plain';
676    # "?" <?{...}>
677    return
678        "$_[1] ## <closure>\n" .
679        "$_[1] ## pos: @$RegexPos\n" .
680        "$_[1] do { \n" .
681        "$_[1]   local \$::_V6_SUCCEED = 1;\n" .
682        "$_[1]   \$::_V6_MATCH_ = \$m;\n" .
683        "$_[1]   \$bool = ( sub $code->( \$m ) ) ? 1 : 0; \n" .
684        "$_[1] }" .
685        "$_[1] ## </closure>\n"
686        if $modifier eq '?';
687    # "!" <!{...}>
688    return
689        "$_[1] ## <closure>\n" .
690        "$_[1] ## pos: @$RegexPos\n" .
691        "$_[1] do { \n" .
692        "$_[1]   local \$::_V6_SUCCEED = 1;\n" .
693        "$_[1]   \$::_V6_MATCH_ = \$m;\n" .
694        "$_[1]   \$bool = ( sub $code->( \$m ) ) ? 0 : 1; \n" .
695        "$_[1] }" .
696        "$_[1] ## </closure>\n"
697        if $modifier eq '!';
698
699}
700sub capturing_group {
701    my $program = $_[0];
702
703    $capture_count++;
704    {
705        local $capture_count = -1;
706        local $capture_to_array = 0;
707        $program = emit_rule( $program, $_[1].'      ' )
708            if ref( $program );
709    }
710
711    return "
712$_[1] ## <capture>
713$_[1] do{
714$_[1]     my \$hash = do {
715$_[1]       my \$bool = 1;
716$_[1]       my \$from = \$pos;
717$_[1]       my \@match;
718$_[1]       my \%named;
719$_[1]       \$bool = 0 unless
720" .             $program . ";
721$_[1]       { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef }
722$_[1]     };
723$_[1]     my \$bool = \${\$hash->{'bool'}};" .
724        ( $capture_to_array
725        ? "
726$_[1]     if ( \$bool ) {
727$_[1]         push \@{ \$match[ $capture_count ] }, Pugs::Runtime::Match->new( \$hash );
728$_[1]     }"
729        : "
730$_[1]     \$match[ $capture_count ] = Pugs::Runtime::Match->new( \$hash );"
731        ) . "
732$_[1]     \$bool;
733$_[1] }
734$_[1] ## </capture>\n";
735}
736
737sub capture_as_result {
738    my $program = $_[0];
739
740    $capture_count++;
741    {
742        local $capture_count = -1;
743        local $capture_to_array = 0;
744        $program = emit_rule( $program, $_[1].'      ' )
745            if ref( $program );
746    }
747    return "$_[1] ## <capture>
748$_[1] ## pos: @$RegexPos
749$_[1] do{
750$_[1]     my \$hash = do {
751$_[1]       my \$bool = 1;
752$_[1]       my \$from = \$pos;
753$_[1]       my \@match;
754$_[1]       my \%named;
755$_[1]       \$bool = 0 unless
756" .             $program . ";
757$_[1]       { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef }
758$_[1]     };
759$_[1]     my \$bool = \${\$hash->{'bool'}};
760$_[1]     \$m->data->{capture} = \\( \"\" . Pugs::Runtime::Match->new( \$hash ) );
761$_[1]     \$bool;
762$_[1] }
763$_[1] ## </capture>\n";
764}
765sub named_capture {
766    my $name    = $_[0]{ident};
767    ### $name
768    if (ref($name) eq 'HASH') {
769        $name = $name->{match_variable} || $name->{variable};
770    }
771    $name =~ s/^[\$\@\%]//;  # TODO - change semantics as needed
772    my $program = $_[0]{rule};
773    #warn "name [$name]\n";
774
775    if ( exists $program->{metasyntax} ) {
776        #print "aliased subrule\n";
777        # $/<name> = $/<subrule>
778
779        my $cmd = $program->{metasyntax}{metasyntax};
780        die "invalid aliased subrule"
781            unless $cmd =~ /^[_[:alnum:]]/;
782
783        # <subrule ( param, param ) >
784        my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd );
785        $param_list = '' unless defined $param_list;
786        my @param = split( ',', $param_list );
787        return "$_[1] ## <named_capture>
788$_[1] ## pos: @$RegexPos
789$_[1] do {
790                my \$prior = \$::_V6_PRIOR_;
791                my \$match =\n" .
792                    call_subrule( $subrule, $_[1]."        ", "", @param ) . ";
793                \$::_V6_PRIOR_ = \$prior;
794                if ( \$match ) {" .
795                    ( $capture_to_array
796                    ? " push \@{\$named{'$name'}}, \$match;"
797                    : " \$named{'$name'} = \$match;"
798                    ) . "
799                    \$pos = \$match->to;
800                    1
801                }
802                else { 0 }
803            }
804$_[1] ## </named_capture>\n";
805    }
806    elsif ( exists $program->{capturing_group} ) {
807        #print "aliased capturing_group\n";
808        # $/<name> = $/[0]
809        {
810            local $capture_count = -1;
811            local $capture_to_array = 0;
812            $program = emit_rule( $program, $_[1].'      ' )
813                if ref( $program );
814        }
815        return "$_[1] ## <named_capture>
816$_[1] ## pos: @$RegexPos
817$_[1] do{
818                my \$match = Pugs::Runtime::Match->new( do {
819                    my \$bool = 1;
820                    my \$from = \$pos;
821                    my \@match;
822                    my \%named;
823                    \$bool = 0 unless " .
824                    $program . ";
825                    { str => \\\$s, from => \\\$from, match => \\\@match, named => \\\%named, bool => \\\$bool, to => \\(0+\$pos), capture => undef }
826                } );
827                if ( \$match ) {" .
828                    ( $capture_to_array
829                    ? " push \@{\$named{'$name'}}, \$match;"
830                    : " \$named{'$name'} = \$match;"
831                    ) . "
832                    \$pos = \$match->to;
833                    1
834                }
835                else { 0 }
836            }
837$_[1] ## </named_capture>\n";
838    }
839    else {
840        #print "aliased non_capturing_group\n";
841        # $/<name> = "$/"
842        #print Dumper( $_[0] );
843        $program = emit_rule( $program, $_[1].'      ' );
844        return "$_[1] ## <named_capture>
845$_[1] ## pos: @$RegexPos
846$_[1] do{
847                my \$from = \$pos;
848                my \$bool = $program;
849                my \$match = Pugs::Runtime::Match->new(
850                    { str => \\\$s, from => \\\$from, match => [], named => {}, bool => \\1, to => \\(0+\$pos), capture => undef }
851                );" .
852                ( $capture_to_array
853                ? " push \@{\$named{'$name'}}, \$match;"
854                : " \$named{'$name'} = \$match;"
855                ) . "
856                \$bool
857            }
858$_[1] ## </named_capture>\n";
859    }
860}
861sub negate {
862    my $program = $_[0];
863    #print "Negate: ", Dumper($_[0]);
864    $program = emit_rule( $program, $_[1].'        ' )
865        if ref( $program );
866    return "$_[1] ## <negate>
867$_[1] ## pos: @$RegexPos
868$_[1] do{
869$_[1]     my \$pos1 = \$pos;
870$_[1]     do {
871$_[1]       my \$pos = \$pos1;
872$_[1]       my \$from = \$pos;
873$_[1]       my \@match;
874$_[1]       my \%named;
875$_[1]       \$bool = " . $program . " ? 0 : 1;
876$_[1]       \$bool;
877$_[1]     };
878$_[1] }
879$_[1] ## </negate>\n";
880}
881
882sub before {
883    my $mod = delete $_[0]{modifier} || '';
884    #### before atom: $_[0]
885    return negate( { before => $_[0], _pos => $_[0]{rule}{_pos}, }, $_[1] ) if $mod eq '!';
886    my $program = $_[0]{rule};
887    $program = emit_rule( $program, $_[1].'        ' )
888        if ref( $program );
889    return "
890$_[1] ## <before>
891$_[1] ## pos: @$RegexPos
892$_[1] do{
893$_[1]     my \$pos1 = \$pos;
894$_[1]     do {
895$_[1]       my \$pos = \$pos1;
896$_[1]       my \$from = \$pos;
897$_[1]       my \@match;
898$_[1]       my \%named;
899$_[1]       \$bool = 0 unless
900" .             $program . ";
901$_[1]       \$bool;
902$_[1]     };
903$_[1] }
904$_[1] ## </before>\n";
905}
906
907sub after {
908    my $mod = delete $_[0]{modifier};
909    return negate( { after => $_[0] }, $_[1] ) if $mod eq '!';
910    local $direction = "-";
911    my $program = $_[0]{rule};
912    $program = emit_rule( $program, $_[1].'        ' )
913        if ref( $program );
914    return "$_[1] ## <after>
915$_[1] ## pos: @$RegexPos
916$_[1] do{
917$_[1]     my \$pos1 = \$pos;
918$_[1]     do {
919$_[1]       my \$pos = \$pos1 - 1;
920$_[1]       my \$from = \$pos;
921$_[1]       my \@match;
922$_[1]       my \%named;
923$_[1]       \$bool = 0 unless
924" .             $program . ";
925$_[1]       \$bool;
926$_[1]     };
927$_[1] }
928$_[1] ## </after>\n";
929}
930
931sub colon {
932    my $str = $_[0];
933    return "$_[1] 1 # : no-op\n"
934        if $str eq ':';
935    return "$_[1] ( \$pos >= length( \$s ) )\n"
936        if $str eq '$';
937    return "$_[1] ( \$pos == 0 )\n"
938        if $str eq '^';
939
940    return "$_[1] ( \$pos >= length( \$s ) || substr( \$s, \$pos ) =~ ".'/^(?:\n\r?|\r\n?)/m'." )\n"
941        if $str eq '$$';
942    return "$_[1] ( \$pos == 0 || substr( \$s, 0, \$pos ) =~ ".'/(?:\n\r?|\r\n?)$/m'." )\n"
943        if $str eq '^^';
944
945    return metasyntax( { metasyntax => '_wb_left', modifier => '?' }, $_[1] )
946        if $str eq '<<';
947    return metasyntax( { metasyntax => '_wb_right', modifier => '?' }, $_[1] )
948        if $str eq '>>';
949
950    die "'$str' not implemented";
951}
952sub modifier {
953    my $str = $_[0];
954    die "modifier '$str' not implemented";
955}
956sub constant {
957    call_constant( @_ );
958}
959
960sub char_class {
961    my $cmd = Pugs::Emitter::Rule::Perl5::CharClass::emit( $_[0] );
962    return call_perl5($cmd, $_[1]);
963}
964
965sub call {
966    #die "not implemented: ", Dumper(\@_);
967    my $param = $_[0]{params};
968    my $name = $_[0]{method};
969        # capturing subrule
970        # <subrule ( param, param ) >
971        my ($param_list) = $param =~ /\{(.*)\}/;
972        $param_list = '' unless defined $param_list;
973        my @param = split( ',', $param_list );
974        #print "param: ", Dumper(\@param);
975
976        # TODO
977
978        if ( $name eq 'at' ) {
979            $param_list ||= 0;   # XXX compile-time only
980            return "$_[1] ( \$pos == $param_list )\n"
981        }
982
983        return named_capture(
984            {
985                ident => $name,
986                rule => { metasyntax => { metasyntax => $name }, _pos => $_[0]{_pos}, },
987            },
988            $_[1],
989        );
990}
991
992sub metasyntax {
993    # <cmd>
994    #print Dumper(\@_);
995    my $cmd = $_[0]{metasyntax};
996    my $modifier = delete $_[0]{modifier} || '';   # . ? !
997    return negate( { metasyntax => $_[0], _pos => $_[0]{_pos} }, $_[1] ) if $modifier eq '!';
998
999    my $prefix = substr( $cmd, 0, 1 );
1000    if ( $prefix eq '@' ) {
1001        # XXX - wrap @array items - see end of Pugs::Grammar::Rule
1002        # TODO - param list
1003        my $name = substr( $cmd, 1 );
1004        return
1005            "$_[1] ## <metasyntax>
1006$_[1] ## pos: @$RegexPos
1007$_[1] do {
1008                my \$match;
1009                for my \$subrule ( $cmd ) {
1010                    \$match = \$subrule->match( \$s, \$grammar, { p => ( \$pos ), positionals => [ ], args => {} }, undef );
1011                    last if \$match;
1012                }
1013                if ( \$match ) {" .
1014                    ( $capture_to_array
1015                    ? " push \@{\$named{'$name'}}, \$match;"
1016                    : " \$named{'$name'} = \$match;"
1017                    ) . "
1018                    \$pos = \$match->to;
1019                    1
1020                }
1021                else { 0 }
1022            }
1023$_[1] ## </metasyntax>\n";
1024    }
1025
1026    if ( $prefix eq '%' ) {
1027        # XXX - runtime or compile-time interpolation?
1028        my $name = substr( $cmd, 1 );
1029        # print "<$cmd>\n";
1030        # return variable( $cmd );
1031        return "$_[1]## <metasyntax>
1032$_[1] ## pos: @$RegexPos
1033$_[1] do{
1034                my \$match = " . variable( $cmd, $_[1] ) . ";
1035                if ( \$match ) {" .
1036                    ( $capture_to_array
1037                    ? " push \@{\$named{'$name'}}, \$match;"
1038                    : " \$named{'$name'} = \$match;"
1039                    ) . "
1040                    \$pos = \$match->to;
1041                    1
1042                }
1043                else { 0 }
1044            }\n$_[1]## </metasyntax>\n";
1045    }
1046
1047    if ( $prefix eq '$' ) {
1048        if ( $cmd =~ /::/ ) {
1049            # call method in fully qualified $package::var
1050            # ...->match( $rule, $str, $grammar, $flags, $state )
1051            # TODO - send $pos to subrule
1052            return
1053                "$_[1]         ## <metasyntax>\n" .
1054                "$_[1]         ## pos: @$RegexPos\n" .
1055                "$_[1]         do {\n" .
1056                "$_[1]           push \@match,\n" .
1057                "$_[1]             $cmd->match( \$s, \$grammar, {p => \$pos}, undef );\n" .
1058                "$_[1]           \$pos = \$match[-1]->to;\n" .
1059                "$_[1]           !\$match[-1] != 1;\n" .
1060                "$_[1]         }\n" .
1061                "$_[1]         ## </metasyntax>\n";
1062        }
1063        # call method in lexical $var
1064        # TODO - send $pos to subrule
1065        return
1066                "$_[1]         ## <metasyntax>\n" .
1067                "$_[1]         ## pos: @$RegexPos\n" .
1068                "$_[1]         do {\n" .
1069                "$_[1]           my \$r = Pugs::Runtime::Regex::get_variable( '$cmd' );\n" .
1070                "$_[1]           push \@match,\n" .
1071                "$_[1]             \$r->match( \$s, \$grammar, {p => \$pos}, undef );\n" .
1072                "$_[1]           \$pos = \$match[-1]->to;\n" .
1073                "$_[1]           !\$match[-1] != 1;\n" .
1074                "$_[1]         }\n" .
1075                "$_[1]         ## </metasyntax>\n";
1076    }
1077    if ( $prefix eq q(') ) {   # single quoted literal '
1078        $cmd = substr( $cmd, 1, -1 );
1079        return call_constant( $cmd, $_[1] );
1080    }
1081    if ( $prefix eq q(") ) {   # interpolated literal "
1082        $cmd = substr( $cmd, 1, -1 );
1083        warn "<\"...\"> not implemented";
1084        return;
1085    }
1086    if  (
1087           $modifier eq '.'
1088        || $modifier eq '?'   # XXX FIXME
1089        )
1090    {   # non_capturing_subrule / code assertion
1091        #$cmd = substr( $cmd, 1 );
1092        if ( $cmd =~ /^{/ ) {
1093            warn "code assertion not implemented";
1094            return;
1095        }
1096        my @param; # TODO
1097        my $subrule = $cmd;
1098        return
1099"$_[1] ## <metasyntax>
1100$_[1] ## pos: @$RegexPos
1101$_[1] do {
1102$_[1]      my \$prior = \$::_V6_PRIOR_;
1103$_[1]      my \$match =\n" .
1104               call_subrule( $subrule, $_[1]."        ", "", @param ) . ";
1105$_[1]      \$::_V6_PRIOR_ = \$prior;
1106$_[1]      my \$bool = (!\$match != 1);
1107$_[1]      \$pos = \$match->to if \$bool;
1108$_[1]      \$match;
1109$_[1] }
1110$_[1] ## </metasyntax>\n";
1111    }
1112    if ( $prefix =~ /[_[:alnum:]]/ ) {
1113        if ( $cmd eq 'cut' ) {
1114            warn "<$cmd> not implemented";
1115            return;
1116        }
1117        if ( $cmd eq 'commit' ) {
1118            warn "<$cmd> not implemented";
1119            return;
1120        }
1121        if ( $cmd eq 'null' ) {
1122            return "$_[1] 1 # null\n"
1123        }
1124        # <subrule ( param, param ) >
1125        my ( $subrule, $param_list ) = split( /[\(\)]/, $cmd );
1126        $param_list ||= '';
1127
1128        if ( $subrule eq 'at' ) {
1129            $param_list ||= 0;   # XXX compile-time only
1130            return "$_[1] ( \$pos == $param_list )\n"
1131        }
1132
1133        return named_capture(
1134            {
1135                ident => $subrule,
1136                rule => { metasyntax => { metasyntax => $cmd }, _pos => $_[0]->{_pos} },
1137            },
1138            $_[1],
1139        );
1140    }
1141    #### $prefix
1142    #### $modifier
1143    #if ( $prefix eq '.' ) {
1144    #    my ( $method, $param_list ) = split( /[\(\)]/, $cmd );
1145    #    $method =~ s/^\.//;
1146    #    $param_list ||= '';
1147    #    return " ( \$s->$method( $param_list ) ? 1 : 0 ) ";
1148    #}
1149    die "<$cmd> not implemented";
1150}
1151
11521;
1153