1# Copyright (C) 2004-2012, Parrot Foundation. 2 3package Parrot::Pmc2c::Parser; 4 5use strict; 6use warnings; 7use base qw( Exporter ); 8our @EXPORT_OK = qw( parse_pmc extract_balanced ); 9use Parrot::Pmc2c::PMC (); 10use Parrot::Pmc2c::Method (); 11use Parrot::Pmc2c::Emitter (); 12use Parrot::Pmc2c::PCCMETHOD (); 13use Parrot::Pmc2c::UtilFunctions qw(count_newlines filename slurp); 14use File::Basename qw(basename); 15 16=head1 NAME 17 18Parrot::Pmc2c::Parser - PMC Parser 19 20=head1 SYNOPSIS 21 22 use Parrot::Pmc2c::Parser qw( 23 parse_pmc 24 extract_balanced 25 ); 26 27=head1 DESCRIPTION 28 29Parrot::Pmc2c::Parser parses a pseudo-C syntax into a perl hash that is then dumped. 30 31=head1 SUBROUTINES 32 33This package exports two subroutines on request only. 34 35=head2 C<parse_pmc()> 36 37 $parsed_pmc_hash = parse_pmc($pmc2cMain, $filename); 38 39B<Purpose:> Parse PMC code and return a hash ref of pmc attributes. 40 41B<Arguments:> List of two arguments: 42 43=over 4 44 45=item * 46 47The pmc2cMain object 48 49=item * 50 51Filename of the pmc to parse. 52 53=back 54 55B<Return Values:> Reference to a Parrot::Pmc2c::PMC object 56 57B<Comments:> Called by C<Parrot::Pmc2c::Dumper::dump_pmc()>. 58 59=cut 60 61sub parse_pmc { 62 my ( $pmc2cMain, $filename ) = @_; 63 64 #slurp file contents 65 $filename = $pmc2cMain->find_file( filename( $filename, '.pmc' ), 1 ); 66 my $code = slurp($filename); 67 68 my ( $preamble, $hdr_preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines ) = 69 parse_top_level($code); 70 71 my $filebase = basename($filename); 72 $filebase =~ s/\.pmc$//; 73 die("PMC filename $filebase.pmc does not match pmclass name $pmcname!\n") 74 unless lc($filebase) eq lc($pmcname); 75 my $pmc = Parrot::Pmc2c::PMC->create($pmcname); 76 $pmc->preamble( Parrot::Pmc2c::Emitter->text( $preamble, $filename, 1 ) ); 77 $pmc->hdr_preamble($hdr_preamble); 78 $pmc->name($pmcname); 79 $pmc->set_filename($filename); 80 $pmc->set_flags($flags); 81 $pmc->set_parents($parents); 82 83 # the +1 puts us on the current line 84 my $lineno = count_newlines($preamble) + $chewed_lines + 1; 85 my $class_init; 86 87 ($lineno, $pmcbody) = find_attrs( $pmc, $pmcbody, $lineno, $filename, $pmc2cMain); 88 ($lineno, $class_init) = find_methods($pmc, $pmcbody, $lineno, $filename); 89 90 $pmc->postamble( Parrot::Pmc2c::Emitter->text( $post, $filename, $lineno ) ); 91 92 # ensure class_init is the last method in the method list 93 $pmc->add_method($class_init) if $class_init; 94 $pmc->vtable( $pmc2cMain->read_dump("vtable.pmc") ); 95 $pmc->pre_method_gen(); 96 $pmc->post_method_gen(); 97 98 return $pmc; 99} 100 101sub find_attrs { 102 my ($pmc, $pmcbody, $lineno, $filename, $pmc2cMain) = @_; 103 104 #prepend parent ATTRs to this PMC's ATTR list, if possible 105 my $got_attrs_from = ''; 106 foreach my $parent ( @{ $pmc->{parents} } ) { 107 108 my $parent_dump = $pmc2cMain->read_dump( lc($parent) . '.dump' ); 109 110 if ( $got_attrs_from ne '' && $parent_dump->{has_attribute} ) { 111 die "$filename is trying to extend $got_attrs_from and $parent, ". 112 "but both these PMCs have ATTRs."; 113 } 114 115 if ( $parent_dump->{has_attribute} ) { 116 $got_attrs_from = $parent; 117 foreach my $parent_attrs ( @{ $parent_dump->{attributes} } ) { 118 $pmc->add_attribute($parent_attrs); 119 } 120 } 121 } 122 123 # backreferences here are all +1 because below the qr is wrapped in quotes 124 my $attr_re = qr{ 125 ^ 126 (?: 127 [;\n\s]* # blank spaces and spurious semicolons 128 (?:/\*.*?\*/)? # C comments 129 )* 130 131 # attribute marker 132 ATTR 133 134 # type 135 \s+ 136 ( U?INTVAL 137 | FLOATVAL 138 | STRING\s+\* 139 | PMC\s+\* 140 | (?:struct\s+)?\w+\s+\*+ 141 | (?:unsigned\s+)?char\s+\*+ 142 | \w* 143 ) 144 145 # name 146 \s* 147 ( 148 \w+ 149 | \(\*\w*\)\(.*?\) 150 ) 151 152 # Array size 153 (\[\d+\])? 154 155 # modifiers 156 \s* 157 ((?::\w+\s*)*) 158 159 # declaration terminator 160 ; 161 162 # optional comment 163 \s* 164 (/\*.*?\*/)? 165 }sx; 166 167 while ($pmcbody =~ s/($attr_re)//o) { 168 my ($type, $name, $array_size, @modifiers, $comment); 169 $type = $2 || ''; 170 $name = $3 || ''; 171 $array_size = $4 || ''; 172 @modifiers = (); 173 @modifiers = split /\s/, $5; 174 $comment = $6; 175 176 $lineno += count_newlines($1); 177 178 $pmc->add_attribute( { 179 name => $name, 180 type => $type, 181 array_size => $array_size, 182 modifiers => \@modifiers, 183 } ); 184 } 185 186 return ($lineno, $pmcbody); 187} 188 189sub find_methods { 190 my ($pmc, $pmcbody, $lineno, $filename) = @_; 191 my $class_init = 0; 192 193 # backreferences here are all +1 because below the qr is wrapped in quotes 194 my $signature_re = qr{ 195 ^ 196 (?: 197 [;\n\s]* # blank spaces and spurious semicolons 198 (?:/\*.*?\*/)? # C comments 199 )* 200 201 ((?:PARROT_\w+\s+)+)? # decorators 202 203 # vtable, method, or multi marker 204 (?:(VTABLE|METHOD|MULTI)\s+)? 205 206 ((?:\w+\s*?\**\s*)?\w+) # method name (includes return type) 207 \s* 208 \(( # parameters 209 (?:\w+\s*\*? # type (pointer optional) 210 \s* 211 \w+ # argument name 212 (?: 213 \s+:\w+ # attribute 214 (?:\("[^\)]+"\))? # with optional parameter 215 )* 216 ,?\s* # probably a comma and whitespace 217 )* # zero or more of these bad boys 218 )\) 219 \s* 220 ((?::(\w+)\s*)*) # method attrs 221 \s* 222 }sx; 223 224 while ( $pmcbody =~ s/($signature_re)//o ) { 225 my ( $decorators, $marker, $methodname, $parameters, $rawattrs ) = 226 ( $2, $3, $4, $5, $6 ); 227 my $attrs = defined $rawattrs ? parse_method_attrs($rawattrs) : {}; 228 $lineno += count_newlines($1); 229 230 my $returntype = ''; 231 232 if ($methodname =~ /(.*\s+\*?)(\w+)/) { 233 ($returntype, $methodname) = ($1, $2); 234 } 235 236 ( my $methodblock, $pmcbody ) = extract_balanced($pmcbody); 237 my $block_lines = count_newlines($methodblock); 238 239 $methodblock = strip_outer_brackets($methodblock); 240 241 # remove pmclass 4 space indent 242 $methodblock =~ s/^[ ]{4}//mg; 243 244 # trim trailing ws from last line 245 $methodblock =~ s/\n[\t ]+$/\n/g; 246 247 # detect manual_wb via PARROT_GC_WRITE_BARRIER automatically 248 if ($methodblock =~ m|^\s*(/* no )?PARROT_GC_WRITE_BARRIER|m) { 249 $attrs->{manual_wb} = 1; 250 } 251 252 $decorators ||= ''; 253 $decorators =~ s/^\s*(.*?)\s*$/$1/s; 254 $decorators = [ split /\s+/ => $decorators ]; 255 256 $returntype = 'void' if (defined $marker && $marker eq 'METHOD'); 257 258 my $method = Parrot::Pmc2c::Method->new( 259 { 260 name => $methodname, 261 parent_name => $pmc->name, 262 body => Parrot::Pmc2c::Emitter->text( $methodblock, $filename, $lineno ), 263 return_type => $returntype, 264 parameters => $parameters, 265 attrs => $attrs, 266 decorators => $decorators, 267 type => $marker && $marker =~ /MULTI/ ? Parrot::Pmc2c::Method::MULTI : 268 $marker && $marker !~ /VTABLE/ ? Parrot::Pmc2c::Method::NON_VTABLE : 269 Parrot::Pmc2c::Method::VTABLE 270 } 271 ); 272 273 # METHOD needs FixedIntegerArray header 274 if ( $method->type eq Parrot::Pmc2c::Method::NON_VTABLE ) { 275 # rewrite_pccmethod() modifies $method in-place 276 Parrot::Pmc2c::PCCMETHOD::rewrite_pccmethod( $method, $pmc ); 277 $pmc->set_flag('need_fia_header'); 278 } 279 elsif ( $method->type eq Parrot::Pmc2c::Method::MULTI ) { 280 # rewrite_multi_sub() modifies $method in-place 281 Parrot::Pmc2c::PCCMETHOD::rewrite_multi_sub( $method, $pmc ); 282 } 283 284 if ( $method->type eq Parrot::Pmc2c::Method::NON_VTABLE 285 || $method->type eq Parrot::Pmc2c::Method::MULTI ) { 286 # Name-mangle NCI and multi methods to avoid conflict with vtables 287 # mangle_name() modifies $method in-place 288 Parrot::Pmc2c::PCCMETHOD::mangle_name( $method ); 289 } 290 291 # PCCINVOKE needs FixedIntegerArray header 292 $pmc->set_flag('need_fia_header') if $methodblock =~ /PCCINVOKE/; 293 294 # the class_init method is added last after all other methods 295 if ( $methodname eq 'class_init' ) { 296 $class_init = $method; 297 } 298 else { 299 $pmc->add_method($method); 300 } 301 302 $lineno += $block_lines; 303 } 304 305 # include the remainder in the line count, minus the last one 306 # (the last one is included in the postamble directly) 307 chomp $pmcbody; 308 $lineno += count_newlines($pmcbody); 309 310 return ($lineno, $class_init); 311} 312 313sub strip_outer_brackets { 314 my ($method_body) = @_; 315 die "First character in $method_body is not a {" 316 unless substr( $method_body, 0, 1 ) eq '{'; 317 318 die "Last character in $method_body is not a }" 319 unless substr( $method_body, -1, 1 ) eq '}'; 320 321 return substr $method_body, 1, -1; 322} 323 324=head2 C<parse_top_level()> 325 326 my ($preamble, $hdr_preamble, $pmcname, $flags, $parents, $pmcbody, $post, $chewed_lines) 327 = parse_top_level(\$code); 328 329B<Purpose:> Extract a pmc signature from the code ref. 330 331B<Argument:> PMC file contents slurped by C<parse_pmc()>. 332 333B<Return Values:> List of eight elements: 334 335=over 4 336 337=item * 338 339the code found before the pmc signature; 340 341=item * 342 343the code declared to be the header preamble. will be included at the start of the header. 344 345=item * 346 347the name of the pmc 348 349=item * 350 351a hash ref containing the flags associated with the pmc (such as 352C<extends> and C<provides>). 353 354=item * 355 356the list of parents this pmc extends 357 358=item * 359 360the body of the pmc 361 362=item * 363 364the code found after the pmc body 365 366=item * 367 368number of newlines in the pmc signature that need to be added to the 369running total of lines in the file 370 371=back 372 373B<Comments:> Called internally by C<parse_pmc()>. 374 375=cut 376 377sub parse_top_level { 378 my $code = shift; 379 380 my $top_level_re = qr{ 381 ^ # beginning of line 382 (?: 383 (.*?) # preamble 1 384 ^ BEGIN_PMC_HEADER_PREAMBLE \s* 385 ^ (.*?) # header preamble 386 ^ END_PMC_HEADER_PREAMBLE \s* 387 ^ (.*?) # preamble 2 388 | (.*?) # preamble 3 389 ) 390 391 ^ 392 ( 393 \s* 394 pmclass # pmclass keyword 395 \s+ # whitespace 396 ([\w]*) # pmc name 397 ((?:\s+\w+)*) # pmc attributes 398 \s* # whitespace 399 ) 400 \{ # pmc body beginning marker 401 }smx; 402 $code =~ s[$top_level_re][{]smx or die "No pmclass found\n"; 403 my ( $hdr_preamble, $pmc_signature, $pmcname, $attributes ) = ( $2, $5, $6, $7 ); 404 my $preamble = do { 405 no warnings 'uninitialized'; 406 $1 . $3 . $4; 407 }; 408 409 my $chewed_lines = count_newlines($pmc_signature); 410 my ( $flags, $parents ) = parse_flags( $attributes, $pmcname ); 411 my ( $body, $postamble ) = extract_balanced($code); 412 413 # trim out the { } 414 $body = strip_outer_brackets($body); 415 416 return ( $preamble, $hdr_preamble, $pmcname, $flags, $parents, 417 $body, $postamble, $chewed_lines ); 418} 419 420our %has_value = map { $_ => 1 } qw(does group hll); 421our %has_values = map { $_ => 1 } qw(provides extends maps lib); 422 423=head2 C<parse_flags()> 424 425 my ($flags, $parents) = parse_flags($attributes, $pmcname); 426 427B<Purpose:> Extract a pmc signature from the code ref. 428 429B<Argument:> PMC file contents slurped by C<parse_pmc()>. 430 431B<Return Values:> List of two elements: 432 433=over 4 434 435=item * 436 437a hash ref containing the flags associated with the pmc (such as 438C<extends> and C<provides>). 439 440=item * 441 442the list of parents this pmc extends 443 444=back 445 446B<Comments:> Called internally by C<parse_top_level()>. 447 448=cut 449 450sub parse_flags { 451 my ( $data, $pmcname ) = @_; 452 453 my ( $flags, @parents ); 454 455 my @words = $data =~ /(\w+)/g; 456 457 while ( @words ) { 458 my $name = shift @words; 459 if ( $has_value{$name} || $has_values{$name} ) { 460 my $value = shift @words; 461 die "Parser error: no value for '$name'" unless $value; 462 463 if ( $name eq 'extends' ) { 464 push @parents, $value; 465 } 466 elsif ( $has_values{$name} ) { 467 $flags->{$name}{$value} = 1; 468 } 469 else { 470 $flags->{$name} = $value; 471 } 472 } 473 else { 474 $flags->{$name} = 1; 475 } 476 } 477 478 # setup some defaults 479 if ( $pmcname ne 'default' ) { 480 push @parents, 'default' unless @parents; 481 $flags->{provides}{scalar} = 1 unless $flags->{provides}; 482 } 483 484 return ( $flags, \@parents ); 485} 486 487=head2 C<extract_balanced()> 488 489 ($pmcbody, $post) = extract_balanced($code); 490 491B<Purpose:> Remove a balanced C<{}> construct from the beginning of C<$code>. 492Return it and the remaining code. 493 494B<Argument:> The code ref which was the first argument provided to 495C<parse_pmc()>. 496 497B<Return Values:> List of two elements: 498 499=over 4 500 501=item * 502 503String beginning with C<{> and ending with C<}>. In between is found C code 504where the comments hold strings of Perl comments written in POD. 505 506=item * 507 508String holding the balance of the code. Same style as first element, but 509without the braces. 510 511=back 512 513B<Comments:> Called twice within C<parse_pmc()>. Will die with error message 514C<Badly balanced> if not balanced. 515 516=cut 517 518sub extract_balanced { 519 my $code = shift; 520 my $unbalanced = 0; 521 522 die 'Unexpected whitespace, expecting' if $code =~ /^\s+/; 523 die 'bad block open: ', substr( $code, 0, 40 ), '...' unless $code =~ /^\{/; 524 525 # create a copy and remove strings and comments so that 526 # unbalanced {} can be used in them in PMCs, being careful to 527 # preserve string length. 528 local $_ = $code; 529 s[ 530 ( ' (?: \\. | [^'] )* ' # remove ' strings 531 | " (?: \\. | [^"] )* " # remove " strings 532 | /\* .*? \*/ ) # remove C comments 533 ] 534 [ "-" x length $1 ]sexg; 535 536 while (/ (\{) | (\}) /gx) { 537 if ($1) { 538 $unbalanced++; 539 } 540 else { # $2 541 $unbalanced--; 542 return ( substr( $code, 0, pos, "" ), $code ) if not $unbalanced; 543 } 544 } 545 546 die "Badly balanced PMC source\n" if $unbalanced; 547 return; 548} 549 550=head2 C<parse_method_attrs()> 551 552 $attrs = parse_method_attrs($method_attributes); 553 554B<Purpose:> Parse a list of method attributes and return a hash ref of them. 555 556B<Arguments:> String captured from regular expression. 557 558B<Return Values:> Reference to hash of attribute values. 559 560B<Comments:> Called within C<parse_pmc()>. 561 562=cut 563 564sub parse_method_attrs { 565 my $flags = shift; 566 567 my %result; 568 ++$result{$1} while $flags =~ /:(\w+)/g; 569 $result{manual_wb}++ if $result{no_wb}; 570 571 return \%result; 572} 573 5741; 575 576# Local Variables: 577# mode: cperl 578# cperl-indent-level: 4 579# fill-column: 100 580# End: 581# vim: expandtab shiftwidth=4: 582