1# Copyright (C) 2004-2012, Parrot Foundation.
2
3package Parrot::Pmc2c::Parser;
4
5use strict;
6use warnings;
7use base qw( Exporter );
8our @EXPORT_OK = qw( parse_pmc extract_balanced );
9use Parrot::Pmc2c::PMC ();
10use Parrot::Pmc2c::Method ();
11use Parrot::Pmc2c::Emitter ();
12use Parrot::Pmc2c::PCCMETHOD ();
13use Parrot::Pmc2c::UtilFunctions qw(count_newlines filename slurp);
14use File::Basename qw(basename);
15
16=head1 NAME
17
18Parrot::Pmc2c::Parser - PMC Parser
19
20=head1 SYNOPSIS
21
22    use Parrot::Pmc2c::Parser qw(
23        parse_pmc
24        extract_balanced
25    );
26
27=head1 DESCRIPTION
28
29Parrot::Pmc2c::Parser parses a pseudo-C syntax into a perl hash that is then dumped.
30
31=head1 SUBROUTINES
32
33This package exports two subroutines on request only.
34
35=head2 C<parse_pmc()>
36
37    $parsed_pmc_hash = parse_pmc($pmc2cMain, $filename);
38
39B<Purpose:>  Parse PMC code and return a hash ref of pmc attributes.
40
41B<Arguments:>  List of two arguments:
42
43=over 4
44
45=item *
46
47The pmc2cMain object
48
49=item *
50
51Filename of the pmc to parse.
52
53=back
54
55B<Return Values:>  Reference to a Parrot::Pmc2c::PMC object
56
57B<Comments:>  Called by C<Parrot::Pmc2c::Dumper::dump_pmc()>.
58
59=cut
60
61sub parse_pmc {
62    my ( $pmc2cMain, $filename ) = @_;
63
64    #slurp file contents
65    $filename = $pmc2cMain->find_file( filename( $filename, '.pmc' ), 1 );
66    my $code  = slurp($filename);
67
68    my ( $preamble, $hdr_preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines ) =
69        parse_top_level($code);
70
71    my $filebase = basename($filename);
72    $filebase =~ s/\.pmc$//;
73    die("PMC filename $filebase.pmc does not match pmclass name $pmcname!\n")
74        unless lc($filebase) eq lc($pmcname);
75    my $pmc = Parrot::Pmc2c::PMC->create($pmcname);
76    $pmc->preamble( Parrot::Pmc2c::Emitter->text( $preamble, $filename, 1 ) );
77    $pmc->hdr_preamble($hdr_preamble);
78    $pmc->name($pmcname);
79    $pmc->set_filename($filename);
80    $pmc->set_flags($flags);
81    $pmc->set_parents($parents);
82
83    # the +1 puts us on the current line
84    my $lineno = count_newlines($preamble) + $chewed_lines + 1;
85    my $class_init;
86
87    ($lineno, $pmcbody)    = find_attrs(  $pmc, $pmcbody, $lineno, $filename, $pmc2cMain);
88    ($lineno, $class_init) = find_methods($pmc, $pmcbody, $lineno, $filename);
89
90    $pmc->postamble( Parrot::Pmc2c::Emitter->text( $post, $filename, $lineno ) );
91
92    # ensure class_init is the last method in the method list
93    $pmc->add_method($class_init) if $class_init;
94    $pmc->vtable( $pmc2cMain->read_dump("vtable.pmc") );
95    $pmc->pre_method_gen();
96    $pmc->post_method_gen();
97
98    return $pmc;
99}
100
101sub find_attrs {
102    my ($pmc, $pmcbody, $lineno, $filename, $pmc2cMain) = @_;
103
104    #prepend parent ATTRs to this PMC's ATTR list, if possible
105    my $got_attrs_from = '';
106    foreach my $parent ( @{ $pmc->{parents} } ) {
107
108        my $parent_dump = $pmc2cMain->read_dump( lc($parent) . '.dump' );
109
110        if ( $got_attrs_from ne '' && $parent_dump->{has_attribute} ) {
111            die "$filename is trying to extend $got_attrs_from and $parent, ".
112                "but both these PMCs have ATTRs.";
113        }
114
115        if ( $parent_dump->{has_attribute} ) {
116            $got_attrs_from = $parent;
117            foreach my $parent_attrs ( @{ $parent_dump->{attributes} } ) {
118                $pmc->add_attribute($parent_attrs);
119            }
120        }
121    }
122
123    # backreferences here are all +1 because below the qr is wrapped in quotes
124    my $attr_re = qr{
125        ^
126        (?:
127          [;\n\s]*            # blank spaces and spurious semicolons
128          (?:/\*.*?\*/)?      # C comments
129        )*
130
131        # attribute marker
132        ATTR
133
134        # type
135        \s+
136        (   U?INTVAL
137          | FLOATVAL
138          | STRING\s+\*
139          | PMC\s+\*
140          | (?:struct\s+)?\w+\s+\*+
141          | (?:unsigned\s+)?char\s+\*+
142          | \w*
143        )
144
145        # name
146        \s*
147        (
148            \w+
149          | \(\*\w*\)\(.*?\)
150        )
151
152        # Array size
153        (\[\d+\])?
154
155        # modifiers
156        \s*
157        ((?::\w+\s*)*)
158
159        # declaration terminator
160        ;
161
162    # optional comment
163    \s*
164    (/\*.*?\*/)?
165    }sx;
166
167    while ($pmcbody =~ s/($attr_re)//o) {
168        my ($type, $name, $array_size, @modifiers, $comment);
169        $type = $2 || '';
170        $name = $3 || '';
171        $array_size = $4 || '';
172        @modifiers = ();
173        @modifiers = split /\s/, $5;
174        $comment = $6;
175
176        $lineno += count_newlines($1);
177
178        $pmc->add_attribute( {
179                name       => $name,
180                type       => $type,
181                array_size => $array_size,
182                modifiers  => \@modifiers,
183        } );
184    }
185
186    return ($lineno, $pmcbody);
187}
188
189sub find_methods {
190    my ($pmc, $pmcbody, $lineno, $filename) = @_;
191    my $class_init                          = 0;
192
193    # backreferences here are all +1 because below the qr is wrapped in quotes
194    my $signature_re = qr{
195        ^
196        (?:
197          [;\n\s]*            # blank spaces and spurious semicolons
198          (?:/\*.*?\*/)?      # C comments
199        )*
200
201        ((?:PARROT_\w+\s+)+)? # decorators
202
203        # vtable, method, or multi marker
204        (?:(VTABLE|METHOD|MULTI)\s+)?
205
206        ((?:\w+\s*?\**\s*)?\w+) # method name (includes return type)
207        \s*
208        \((                     # parameters
209            (?:\w+\s*\*?        # type (pointer optional)
210            \s*
211            \w+                 # argument name
212            (?:
213              \s+:\w+           # attribute
214              (?:\("[^\)]+"\))? # with optional parameter
215            )*
216            ,?\s*               # probably a comma and whitespace
217            )*                  # zero or more of these bad boys
218        )\)
219        \s*
220        ((?::(\w+)\s*)*)        # method attrs
221        \s*
222    }sx;
223
224    while ( $pmcbody =~ s/($signature_re)//o ) {
225        my ( $decorators, $marker, $methodname, $parameters, $rawattrs ) =
226            ( $2, $3, $4, $5, $6 );
227        my $attrs = defined $rawattrs ? parse_method_attrs($rawattrs) : {};
228        $lineno  += count_newlines($1);
229
230        my $returntype = '';
231
232        if ($methodname =~ /(.*\s+\*?)(\w+)/) {
233            ($returntype, $methodname) = ($1, $2);
234        }
235
236        ( my $methodblock, $pmcbody ) = extract_balanced($pmcbody);
237        my $block_lines = count_newlines($methodblock);
238
239        $methodblock = strip_outer_brackets($methodblock);
240
241        # remove pmclass 4 space indent
242        $methodblock =~ s/^[ ]{4}//mg;
243
244        # trim trailing ws from last line
245        $methodblock =~ s/\n[\t ]+$/\n/g;
246
247        # detect manual_wb via PARROT_GC_WRITE_BARRIER automatically
248        if ($methodblock =~ m|^\s*(/* no )?PARROT_GC_WRITE_BARRIER|m) {
249            $attrs->{manual_wb} = 1;
250        }
251
252        $decorators ||= '';
253        $decorators   =~ s/^\s*(.*?)\s*$/$1/s;
254        $decorators   = [ split /\s+/ => $decorators ];
255
256        $returntype = 'void' if (defined $marker && $marker eq 'METHOD');
257
258        my $method = Parrot::Pmc2c::Method->new(
259            {
260                name        => $methodname,
261                parent_name => $pmc->name,
262                body        => Parrot::Pmc2c::Emitter->text( $methodblock, $filename, $lineno ),
263                return_type => $returntype,
264                parameters  => $parameters,
265                attrs       => $attrs,
266                decorators  => $decorators,
267                type        => $marker && $marker =~ /MULTI/  ? Parrot::Pmc2c::Method::MULTI      :
268                               $marker && $marker !~ /VTABLE/ ? Parrot::Pmc2c::Method::NON_VTABLE :
269                                                                Parrot::Pmc2c::Method::VTABLE
270            }
271        );
272
273        # METHOD needs FixedIntegerArray header
274        if ( $method->type eq Parrot::Pmc2c::Method::NON_VTABLE ) {
275            # rewrite_pccmethod() modifies $method in-place
276            Parrot::Pmc2c::PCCMETHOD::rewrite_pccmethod( $method, $pmc );
277            $pmc->set_flag('need_fia_header');
278        }
279        elsif ( $method->type eq Parrot::Pmc2c::Method::MULTI ) {
280            # rewrite_multi_sub() modifies $method in-place
281            Parrot::Pmc2c::PCCMETHOD::rewrite_multi_sub( $method, $pmc );
282        }
283
284        if ( $method->type eq Parrot::Pmc2c::Method::NON_VTABLE
285        ||   $method->type eq Parrot::Pmc2c::Method::MULTI ) {
286            # Name-mangle NCI and multi methods to avoid conflict with vtables
287            # mangle_name() modifies $method in-place
288            Parrot::Pmc2c::PCCMETHOD::mangle_name( $method );
289        }
290
291        # PCCINVOKE needs FixedIntegerArray header
292        $pmc->set_flag('need_fia_header') if $methodblock =~ /PCCINVOKE/;
293
294        # the class_init method is added last after all other methods
295        if ( $methodname eq 'class_init' ) {
296            $class_init = $method;
297        }
298        else {
299            $pmc->add_method($method);
300        }
301
302        $lineno += $block_lines;
303    }
304
305    # include the remainder in the line count, minus the last one
306    # (the last one is included in the postamble directly)
307    chomp $pmcbody;
308    $lineno += count_newlines($pmcbody);
309
310    return ($lineno, $class_init);
311}
312
313sub strip_outer_brackets {
314    my ($method_body) = @_;
315    die "First character in $method_body is not a {"
316        unless substr( $method_body, 0,  1 ) eq '{';
317
318    die "Last character in $method_body is not a }"
319        unless substr( $method_body, -1, 1 ) eq '}';
320
321    return substr $method_body, 1, -1;
322}
323
324=head2 C<parse_top_level()>
325
326    my ($preamble, $hdr_preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines)
327        = parse_top_level(\$code);
328
329B<Purpose:>  Extract a pmc signature from the code ref.
330
331B<Argument:>  PMC file contents slurped by C<parse_pmc()>.
332
333B<Return Values:>  List of eight elements:
334
335=over 4
336
337=item *
338
339the code found before the pmc signature;
340
341=item *
342
343the code declared to be the header preamble. will be included at the start of the header.
344
345=item *
346
347the name of the pmc
348
349=item *
350
351a hash ref containing the flags associated with the pmc (such as
352C<extends> and C<provides>).
353
354=item *
355
356the list of parents this pmc extends
357
358=item *
359
360the body of the pmc
361
362=item *
363
364the code found after the pmc body
365
366=item *
367
368number of newlines in the pmc signature that need to be added to the
369running total of lines in the file
370
371=back
372
373B<Comments:>  Called internally by C<parse_pmc()>.
374
375=cut
376
377sub parse_top_level {
378    my $code = shift;
379
380    my $top_level_re = qr{
381        ^                 # beginning of line
382        (?:
383            (.*?)         # preamble 1
384            ^ BEGIN_PMC_HEADER_PREAMBLE \s*
385            ^ (.*?)       # header preamble
386            ^ END_PMC_HEADER_PREAMBLE \s*
387            ^ (.*?)       # preamble 2
388        |   (.*?)         # preamble 3
389        )
390
391        ^
392        (
393            \s*
394            pmclass       # pmclass keyword
395            \s+           # whitespace
396            ([\w]*)       # pmc name
397            ((?:\s+\w+)*)   # pmc attributes
398            \s*           # whitespace
399        )
400        \{                # pmc body beginning marker
401    }smx;
402    $code =~ s[$top_level_re][{]smx or die "No pmclass found\n";
403    my ( $hdr_preamble, $pmc_signature, $pmcname, $attributes ) = ( $2, $5, $6, $7 );
404    my $preamble = do {
405        no warnings 'uninitialized';
406        $1 . $3 . $4;
407    };
408
409    my $chewed_lines         = count_newlines($pmc_signature);
410    my ( $flags, $parents )  = parse_flags( $attributes, $pmcname );
411    my ( $body, $postamble ) = extract_balanced($code);
412
413    # trim out the { }
414    $body = strip_outer_brackets($body);
415
416    return ( $preamble, $hdr_preamble, $pmcname, $flags, $parents,
417            $body, $postamble, $chewed_lines );
418}
419
420our %has_value  = map { $_ => 1 } qw(does group hll);
421our %has_values = map { $_ => 1 } qw(provides extends maps lib);
422
423=head2 C<parse_flags()>
424
425    my ($flags, $parents) = parse_flags($attributes, $pmcname);
426
427B<Purpose:>  Extract a pmc signature from the code ref.
428
429B<Argument:>  PMC file contents slurped by C<parse_pmc()>.
430
431B<Return Values:>  List of two elements:
432
433=over 4
434
435=item *
436
437a hash ref containing the flags associated with the pmc (such as
438C<extends> and C<provides>).
439
440=item *
441
442the list of parents this pmc extends
443
444=back
445
446B<Comments:>  Called internally by C<parse_top_level()>.
447
448=cut
449
450sub parse_flags {
451    my ( $data, $pmcname ) = @_;
452
453    my ( $flags, @parents );
454
455    my @words = $data =~ /(\w+)/g;
456
457    while ( @words ) {
458        my $name = shift @words;
459        if ( $has_value{$name} || $has_values{$name} ) {
460            my $value = shift @words;
461            die "Parser error: no value for '$name'" unless $value;
462
463            if ( $name eq 'extends' ) {
464                push @parents, $value;
465            }
466            elsif ( $has_values{$name} ) {
467                $flags->{$name}{$value} = 1;
468            }
469            else {
470                $flags->{$name} = $value;
471            }
472        }
473        else {
474            $flags->{$name} = 1;
475        }
476    }
477
478    # setup some defaults
479    if ( $pmcname ne 'default' ) {
480        push @parents, 'default' unless @parents;
481        $flags->{provides}{scalar} = 1 unless $flags->{provides};
482    }
483
484    return ( $flags, \@parents );
485}
486
487=head2 C<extract_balanced()>
488
489    ($pmcbody, $post) = extract_balanced($code);
490
491B<Purpose:>  Remove a balanced C<{}> construct from the beginning of C<$code>.
492Return it and the remaining code.
493
494B<Argument:>  The code ref which was the first argument provided to
495C<parse_pmc()>.
496
497B<Return Values:>  List of two elements:
498
499=over 4
500
501=item *
502
503String beginning with C<{> and ending with C<}>.  In between is found C code
504where the comments hold strings of Perl comments written in POD.
505
506=item *
507
508String holding the balance of the code.  Same style as first element, but
509without the braces.
510
511=back
512
513B<Comments:>  Called twice within C<parse_pmc()>.  Will die with error message
514C<Badly balanced> if not balanced.
515
516=cut
517
518sub extract_balanced {
519    my $code       = shift;
520    my $unbalanced = 0;
521
522    die 'Unexpected whitespace, expecting' if $code =~ /^\s+/;
523    die 'bad block open: ', substr( $code, 0, 40 ), '...' unless $code =~ /^\{/;
524
525    # create a copy and remove strings and comments so that
526    # unbalanced {} can be used in them in PMCs, being careful to
527    # preserve string length.
528    local $_ = $code;
529    s[
530        ( ' (?: \\. | [^'] )* '     # remove ' strings
531        | " (?: \\. | [^"] )* "     # remove " strings
532        | /\* .*? \*/ )             # remove C comments
533    ]
534    [ "-" x length $1 ]sexg;
535
536    while (/ (\{) | (\}) /gx) {
537        if ($1) {
538            $unbalanced++;
539        }
540        else {    # $2
541            $unbalanced--;
542            return ( substr( $code, 0, pos, "" ), $code ) if not $unbalanced;
543        }
544    }
545
546    die "Badly balanced PMC source\n" if $unbalanced;
547    return;
548}
549
550=head2 C<parse_method_attrs()>
551
552    $attrs = parse_method_attrs($method_attributes);
553
554B<Purpose:>  Parse a list of method attributes and return a hash ref of them.
555
556B<Arguments:>  String captured from regular expression.
557
558B<Return Values:>  Reference to hash of attribute values.
559
560B<Comments:>  Called within C<parse_pmc()>.
561
562=cut
563
564sub parse_method_attrs {
565    my $flags = shift;
566
567    my %result;
568    ++$result{$1} while $flags =~ /:(\w+)/g;
569    $result{manual_wb}++ if $result{no_wb};
570
571    return \%result;
572}
573
5741;
575
576# Local Variables:
577#   mode: cperl
578#   cperl-indent-level: 4
579#   fill-column: 100
580# End:
581# vim: expandtab shiftwidth=4:
582