1use strict; use warnings; 2package Inline::CPP::Parser::RecDescent; 3 4# Dev versions will have a _0xx suffix. 5# We eval the $VERSION to accommodate dev version numbering as described in 6# perldoc perlmodstyle 7our $VERSION = '0.80'; 8#$VERSION = eval $VERSION; ## no critic (eval) 9 10use Carp; 11 12sub register { 13 { 14 extends => [qw(CPP)], 15 overrides => [qw(get_parser)], 16 } 17} 18 19sub get_parser { 20 my $o = shift; 21 return Inline::CPP::Parser::RecDescent::get_parser_recdescent($o); 22} 23 24sub get_parser_recdescent { 25 my $o = shift; 26 eval { require Parse::RecDescent }; 27 croak <<END if $@; 28This invocation of Inline requires the Parse::RecDescent module. 29$@ 30END 31 no warnings qw/ once /; ## no critic (warnings) 32 $::RD_HINT = 1; # Turns on Parse::RecDescent's warnings/diagnostics. 33 my $parser = Parse::RecDescent->new(grammar()); 34 $parser->{data}{typeconv} = $o->{ILSM}{typeconv}; 35 $parser->{ILSM} = $o->{ILSM}; # give parser access to config options 36 return $parser; 37} 38 39use vars qw($TYPEMAP_KIND $fixkey); 40 41# Parse::RecDescent 1.90 and later have an incompatible change 42# 'The key of an %item entry for a repeated subrule now includes 43# the repetition specifier.' 44# Hence various hash keys may or may not need trailing '(s?)' depending on 45# the version of Parse::RecDescent we are using. 46 47require Parse::RecDescent; 48 49# Deal with Parse::RecDescent's version numbers for development 50# releases (eg, '1.96_000') resulting in a warning about non-numeric in > 51# comparison. 52{ # Lexical scope. 53 # Eval away the underscore. "1.96_000" => "1.96000". 54 # Use that "stable release" version number as the basis for our numeric 55 # comparison. 56 my $stable_version = eval $Parse::RecDescent::VERSION; ## no critic (eval) 57 $fixkey = ($stable_version > 1.89) 58 ? sub{ $_[0] } : sub{ local $_=shift; s/\(.*\)$//; $_ }; 59} # End lexical scope. 60 61 62#============================================================================ 63# Regular expressions to match code blocks, numbers, strings, parenthesized 64# expressions, function calls, and macros. The more complex regexes are only 65# implemented in 5.6.0 and above, so they're in eval-blocks. 66# 67# These are all adapted from the output of Damian Conway's excellent 68# Regexp::Common module. In future, Inline::CPP may depend directly on it, 69# but for now I'll just duplicate the code. 70use vars qw( $code_block $string $number $parens $funccall ); 71 72#============================================================================ 73 74# $RE{balanced}{-parens=>q|{}()[]"'|} 75eval <<'END'; ## no critic (eval) 76$code_block = qr'(?-xism:(?-xism:(?:[{](?:(?>[^][)(}{]+)|(??{$Inline::CPP::Parser::RecDescent::code_block}))*[}]))|(?-xism:(?-xism:(?:[(](?:(?>[^][)(}{]+)|(??{$Inline::CPP::Parser::RecDescent::code_block}))*[)]))|(?-xism:(?-xism:(?:[[](?:(?>[^][)(}{]+)|(??{$Inline::CPP::Parser::RecDescent::code_block}))*[]]))|(?-xism:(?!)))))'; 77END 78$code_block = qr'{[^}]*}' if $@; # For the stragglers: here's a lame regexp. 79 80# $RE{balanced}{-parens=>q|()"'|} 81eval <<'END'; ## no critic (eval) 82$parens = qr'(?-xism:(?-xism:(?:[(](?:(?>[^)(]+)|(??{$Inline::CPP::Parser::RecDescent::parens}))*[)]))|(?-xism:(?!)))'; 83END 84$parens = qr'\([^)]*\)' if $@; # For the stragglers: here's another 85 86# $RE{quoted} 87$string 88 = qr'(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\')|(?:\`)(?:[^\\\`]*(?:\\.[^\\\`]*)*)(?:\`))'; 89 90# $RE{num}{real}|$RE{num}{real}{-base=>16}|$RE{num}{int} 91$number 92 = qr'(?:(?i)(?:[+-]?)(?:(?=[0123456789]|[.])(?:[0123456789]*)(?:(?:[.])(?:[0123456789]{0,}))?)(?:(?:[E])(?:(?:[+-]?)(?:[0123456789]+))|))|(?:(?i)(?:[+-]?)(?:(?=[0123456789ABCDEF]|[.])(?:[0123456789ABCDEF]*)(?:(?:[.])(?:[0123456789ABCDEF]{0,}))?)(?:(?:[G])(?:(?:[+-]?)(?:[0123456789ABCDEF]+))|))|(?:(?:[+-]?)(?:\d+))'; 93$funccall 94 = qr/(?:[_a-zA-Z][_a-zA-Z0-9]*::)*[_a-zA-Z][_a-zA-Z0-9]*(?:$Inline::CPP::Parser::RecDescent::parens)?/; 95 96#============================================================================ 97# Inline::CPP's grammar 98#============================================================================ 99sub grammar { 100 return <<'END'; 101 102{ use Data::Dumper; } 103 104{ 105 sub fixkey { &$Inline::CPP::Parser::RecDescent::fixkey } 106} 107 108{ 109 sub handle_args { 110 my ($args) = @_; 111 my %argsdef; 112 $argsdef{arg_names} = [ map $_->{name}, @$args ]; 113 $argsdef{arg_types} = [ map $_->{type}, @$args ]; 114 $argsdef{arg_offsets} = [ map $_->{offset}, @$args ]; 115 $argsdef{arg_optional} = [ map $_->{optional}, @$args ]; 116 \%argsdef; 117 } 118 sub handle_class_def { 119 my ($thisparser, $def) = @_; 120# print "Found a class: $def->[0]\n"; 121 my $class = $def->[0]; 122 my @parts; 123 for my $part (@{$def->[1]}) { push @parts, @$_ for @$part } 124 push @{$thisparser->{data}{classes}}, $class 125 unless defined $thisparser->{data}{class}{$class}; 126 $thisparser->{data}{class}{$class} = \@parts; 127# print "Class $class:\n", Dumper \@parts; 128 Inline::CPP::Parser::RecDescent::typemap($thisparser, $class); 129 [$class, \@parts]; 130 } 131 sub handle_typedef { 132 my ($thisparser, $t) = @_; 133 my ($name, $type) = @{$t}{qw(name type)}; 134# print "found a typedef: $name => $type\n"; 135 136 # XXX: this doesn't handle non-class typedefs that we could handle, 137 # e.g. "typedef int my_int_t" 138 139 if ($thisparser->{data}{class}{$type} 140 && !exists($thisparser->{data}{class}{$name})) { 141 push @{$thisparser->{data}{classes}}, $name; 142 $thisparser->{data}{class}{$name} = $thisparser->{data}{class}{$type}; 143 Inline::CPP::Parser::RecDescent::typemap($thisparser, $name); 144 } 145 $t; 146 } 147 sub handle_enum { 148 my ($thisparser, $t) = @_; 149 $t; 150 } 151} 152 153code: part(s) {1} 154 155part: comment 156 | typedef 157 { 158 handle_typedef($thisparser, $item[1]); 159 1; 160 } 161 | enum 162 { 163 my $t = handle_enum($thisparser, $item[1]); 164 push @{$thisparser->{data}{enums}}, $t; 165 1; 166 } 167 | class_def 168 { 169 handle_class_def($thisparser, $item[1]); 170 1; 171 } 172 | function_def 173 { 174# print "found a function: $item[1]->{name}\n"; 175 my $name = $item[1]->{name}; 176 my $i=0; 177 for my $arg (@{$item[1]->{args}}) { 178 $arg->{name} = 'dummy' . ++$i unless defined $arg->{name}; 179 } 180 Inline::CPP::Parser::RecDescent::strip_ellipsis($thisparser, 181 $item[1]->{args}); 182 push @{$thisparser->{data}{functions}}, $name 183 unless defined $thisparser->{data}{function}{$name}; 184 my %funcdef = %{ $item[1] }; 185 %funcdef = (%funcdef, %{ handle_args(delete $funcdef{args}) }); 186 $thisparser->{data}{function}{$name} = \%funcdef; 187# print Dumper $item[1]; 188 1; 189 } 190 | all 191 192typedef: 'typedef' class IDENTIFIER(?) '{' <commit> class_part(s?) '}' IDENTIFIER ';' 193 { 194 my ($class, $parts); 195 $class = $item[3][0] || 'anon_class'.($thisparser->{data}{anonclass}++); 196 ($class, $parts)= handle_class_def($thisparser, [$class, $item{fixkey('class_part(s?)')}]); 197 { thing => 'typedef', name => $item[8], type => $class, body => $parts } 198 } 199 | 'typedef' IDENTIFIER IDENTIFIER ';' 200 { { thing => 'typedef', name => $item[3], type => $item[2] } } 201 | 'typedef' /[^;]*/ ';' 202 { 203# dprint "Typedef $item{__DIRECTIVE1__} is too heinous\n"; 204 { thing => 'comment'} 205 } 206 207enum: 'enum' IDENTIFIER(?) '{' <leftop: enum_item ',' enum_item> '}' ';' 208 { 209 { thing => 'enum', name => $item{fixkey('IDENTIFIER(?)')}[0], 210 body => $item{__DIRECTIVE1__} } 211 } 212 213enum_item: IDENTIFIER '=' <commit> /[0-9]+/ 214 { [$item{IDENTIFIER}, $item{__PATTERN1__}] } 215 | IDENTIFIER 216 { [$item{IDENTIFIER}, undef] } 217 218class_def: class IDENTIFIER '{' <commit> class_part(s?) '}' ';' 219 { 220 [@item{'IDENTIFIER',fixkey('class_part(s?)')}] 221 } 222 | class IDENTIFIER ':' <commit> <leftop: inherit ',' inherit> 223 '{' class_part(s?) '}' ';' 224 { 225 push @{$item{fixkey('class_part(s?)')}}, [$item{__DIRECTIVE2__}]; 226 [@item{'IDENTIFIER',fixkey('class_part(s?)')}] 227 } 228 229inherit: scope IDENTIFIER 230 { {thing => 'inherits', name => $item[2], scope => $item[1]} } 231 232class_part: comment { [ {thing => 'comment'} ] } 233 | scope ':' <commit> class_decl(s?) 234 { 235 for my $part (@{$item{fixkey('class_decl(s?)')}}) { 236 $_->{scope} = $item[1] for @$part; 237 } 238 $item{fixkey('class_decl(s?)')} 239 } 240 | class_decl(s) 241 { 242 for my $part (@{$item[1]}) { 243 $_->{scope} = $thisparser->{data}{defaultscope} 244 for @$part; 245 } 246 $item[1] 247 } 248 249class_decl: comment { [{thing => 'comment'}] } 250 | typedef { [ handle_typedef($thisparser, $item[1]) ] } 251 | enum { [ handle_enum($thisparser, $item[1]) ] } 252 | class_def 253 { 254 my ($class, $parts) = handle_class_def($thisparser, $item[1]); 255 [{ thing => 'class', name => $class, body => $parts }]; 256 } 257 | method_def 258 { 259 $item[1]->{thing} = 'method'; 260# print "class_decl found a method: $item[1]->{name}\n"; 261 my $i=0; 262 for my $arg (@{$item[1]->{args}}) { 263 $arg->{name} = 'dummy' . ++$i unless defined $arg->{name}; 264 } 265 Inline::CPP::Parser::RecDescent::strip_ellipsis($thisparser, 266 $item[1]->{args}); 267 my %funcdef = %{ $item[1] }; 268 %funcdef = (%funcdef, %{ handle_args(delete $funcdef{args}) }); 269 [\%funcdef]; 270 } 271 | member_def 272 { 273# print "class_decl found one or more members:\n", Dumper(\@item); 274 $_->{thing} = 'member' for @{$item[1]}; 275 $item[1]; 276 } 277 278function_def: operator <commit> ';' 279 { 280 $item[1] 281 } 282 | operator <commit> smod(?) code_block 283 { 284 $item[1] 285 } 286 | IDENTIFIER '(' <commit> <leftop: arg ',' arg>(s?) ')' smod(?) code_block 287 { 288 {name => $item{IDENTIFIER}, args => $item{__DIRECTIVE2__}, return_type => '' } 289 } 290 | return_type IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' ';' 291 { 292 {return_type => $item[1], name => $item[2], args => $item{__DIRECTIVE1__} } 293 } 294 | return_type IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' smod(?) code_block 295 { 296 {return_type => $item{return_type}, name => $item[2], args => $item{__DIRECTIVE1__} } 297 } 298 299method_def: operator <commit> method_imp 300 { 301# print "method operator:\n", Dumper $item[1]; 302 $item[1]; 303 } 304 305 | IDENTIFIER '(' <commit> <leftop: arg ',' arg>(s?) ')' method_imp 306 { 307# print "con-/de-structor found: $item[1]\n"; 308 {name => $item[1], args => $item{__DIRECTIVE2__}, abstract => ${$item{method_imp}} }; 309 } 310 | return_type IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' method_imp 311 { 312# print "method found: $item[2]\n"; 313 $return = 314 {name => $item[2], return_type => $item[1], args => $item[4], 315 abstract => ${$item[6]}, 316 rconst => $thisparser->{data}{smod}{const}, 317 }; 318 $thisparser->{data}{smod}{const} = 0; 319 } 320 321operator: return_type(?) 'operator' /\(\)|[^()]+/ '(' <leftop: arg ',' arg>(s?) ')' 322 { 323# print "Found operator: $item[1][0] operator $item[3]\n"; 324 {name=> "operator $item[3]", args => $item[5], ret => $item[1][0]} 325 } 326 327# By adding smod, we allow 'const' member functions. This would also bind to 328# incorrect C++ with the word 'static' after the argument list, but we don't 329# care at all because such code would never be compiled successfully. 330 331# By adding init, we allow constructors to initialize references. Again, we'll 332# allow them anywhere, but our goal is not to enforce c++ standards -- that's 333# the compiler's job. 334method_imp: smod(?) ';' { \0 } 335 | smod(?) '=' <commit> '0' ';' { \1 } 336 | smod(?) initlist(?) code_block { \0 } 337 | smod(?) '=' '0' code_block { \0 } 338 339initlist: ':' <leftop: subexpr ',' subexpr> 340 341member_def: anytype <leftop: var ',' var> ';' 342 { 343 my @retval; 344 for my $def (@{$item[2]}) { 345 my $type = join '', $item[1], @{$def->[0]}; 346 my $name = $def->[1]; 347# print "member found: type=$type, name=$name\n"; 348 push @retval, { name => $name, type => $type }; 349 } 350 \@retval; 351 } 352 353var: star(s?) IDENTIFIER '=' expr { [@item[1,2]] } 354 | star(s?) IDENTIFIER '[' expr ']' { [@item[1,2]] } 355 | star(s?) IDENTIFIER { [@item[1,2]] } 356 357arg: type IDENTIFIER '=' expr 358 { 359# print "argument $item{IDENTIFIER} found\n"; 360# print "expression: $item{expr}\n"; 361 {type => $item[1], name => $item{IDENTIFIER}, optional => 1, 362 offset => $thisoffset} 363 } 364 | type IDENTIFIER 365 { 366# print "argument $item{IDENTIFIER} found\n"; 367 {type => $item[1], name => $item{IDENTIFIER}, offset => $thisoffset} 368 } 369 | type { {type => $item[1]} } 370 | '...' 371 { {name => '...', type => '...', offset => $thisoffset} } 372 373ident_part: /[~_a-z]\w*/i '<' <commit> <leftop: IDENTIFIER ',' IDENTIFIER>(s?) '>' 374 { 375 $item[1].'<'.join('', @{$item[4]}).'>' 376 } 377 378 | /[~_a-z]\w*/i 379 { 380 $item[1] 381 } 382 383IDENTIFIER: <leftop: ident_part '::' ident_part> 384 { 385 my $x = join '::', @{$item[1]}; 386# print "IDENTIFIER: $x\n"; 387 $x 388 } 389 390# Parse::RecDescent is retarded in this one case: if a subrule fails, it 391# gives up the entire rule. This is a stupid way to get around that. 392return_type: rtype2 | rtype1 393rtype1: TYPE star(s?) 394 { 395 $return = $item[1]; 396 $return .= join '',' ',@{$item[2]} if @{$item[2]}; 397# print "rtype1: $return\n"; 398# return undef 399# unless(defined$thisparser->{data}{typeconv}{valid_rtypes}{$return}); 400 } 401rtype2: modifier(s) TYPE star(s?) 402 { 403 $return = $item[2]; 404 $return = join ' ',grep{$_}@{$item[1]},$return 405 if @{$item[1]}; 406 $return .= join '',' ',@{$item[3]} if @{$item[3]}; 407# print "rtype2: $return\n"; 408# return undef 409# unless(defined$thisparser->{data}{typeconv}{valid_rtypes}{$return}); 410 $return = 'static ' . $return 411 if $thisparser->{data}{smod}{static}; 412 $thisparser->{data}{smod}{static} = 0; 413 } 414 415type: type2 | type1 416type1: TYPE star(s?) 417 { 418 $return = $item[1]; 419 $return .= join '',' ',@{$item{fixkey('star(s?)')}} if @{$item{fixkey('star(s?)')}}; 420# print "type1: $return\n"; 421# return undef 422# unless(defined$thisparser->{data}{typeconv}{valid_types}{$return}); 423 } 424type2: modifier(s) TYPE star(s?) 425 { 426 $return = $item{TYPE}; 427 $return = join ' ',grep{$_}@{$item[1]},$return if @{$item[1]}; 428 $return .= join '',' ',@{$item{fixkey('star(s?)')}} if @{$item{fixkey('star(s?)')}}; 429# print "type2: $return\n"; 430# return undef 431# unless(defined$thisparser->{data}{typeconv}{valid_types}{$return}); 432 } 433 434anytype: anytype2 | anytype1 435anytype1: TYPE star(s?) 436 { 437 $return = $item[1]; 438 $return .= join '',' ',@{$item[2]} if @{$item[2]}; 439 } 440anytype2: modifier(s) TYPE star(s?) 441 { 442 $return = $item[2]; 443 $return = join ' ',grep{$_}@{$item[1]},$return if @{$item[1]}; 444 $return .= join '',' ',@{$item[3]} if @{$item[3]}; 445 } 446 447comment: m{\s* // [^\n]* \n }x 448 | m{\s* /\* (?:[^*]+|\*(?!/))* \*/ ([ \t]*)? }x 449 450# long and short aren't recognized as modifiers because they break when used 451# as regular types. Another Parse::RecDescent problem is greedy matching; I 452# need tmodifier to "give back" long or short in cases where keeping them would 453# cause the modifier rule to fail. One side-effect is 'long long' can never 454# be parsed correctly here. 455modifier: tmod 456 | smod { ++$thisparser->{data}{smod}{$item[1]}; ''} 457 | nmod { '' } 458tmod: 'unsigned' # | 'long' | 'short' 459smod: 'const' | 'static' 460nmod: 'extern' | 'virtual' | 'mutable' | 'volatile' | 'inline' 461 462scope: 'public' | 'private' | 'protected' 463 464class: 'class' { $thisparser->{data}{defaultscope} = 'private'; $item[1] } 465 | 'struct' { $thisparser->{data}{defaultscope} = 'public'; $item[1] } 466 467star: '*' | '&' 468 469code_block: /$Inline::CPP::Parser::RecDescent::code_block/ 470 471# Consume expressions 472expr: <leftop: subexpr OP subexpr> { 473 my $o = join '', @{$item[1]}; 474# print "expr: $o\n"; 475 $o; 476} 477subexpr: /$Inline::CPP::Parser::RecDescent::funccall/ # Matches a macro, too 478 | /$Inline::CPP::Parser::RecDescent::string/ 479 | /$Inline::CPP::Parser::RecDescent::number/ 480 | UOP subexpr 481OP: '+' | '-' | '*' | '/' | '^' | '&' | '|' | '%' | '||' | '&&' 482UOP: '~' | '!' | '-' | '*' | '&' 483 484TYPE: IDENTIFIER 485 486all: /.*/ 487 488END 489} 490 491#============================================================================ 492# Generate typemap code for the classes and structs we bind to. This allows 493# functions declared after a class to return or accept class objects as 494# parameters. 495#============================================================================ 496$TYPEMAP_KIND = 'O_Inline_CPP_Class'; 497 498sub typemap { 499 my ($parser, $typename) = @_; 500 501# print "Inline::CPP::Parser::RecDescent::typemap(): typename=$typename\n"; 502 503 my ($TYPEMAP, $INPUT, $OUTPUT); 504 $TYPEMAP = "$typename *\t\t$TYPEMAP_KIND\n"; 505 $INPUT = <<"END"; 506 if (sv_isobject(\$arg) && (SvTYPE(SvRV(\$arg)) == SVt_PVMG)) { 507 \$var = (\$type)SvIV((SV*)SvRV( \$arg )); 508 } 509 else { 510 warn ( \\"\${Package}::\$func_name() -- \$var is not a blessed reference\\" ); 511 XSRETURN_UNDEF; 512 } 513END 514 $OUTPUT = <<"END"; 515 sv_setref_pv( \$arg, CLASS, (void*)\$var ); 516END 517 518 my $ctypename = $typename . ' *'; 519 $parser->{data}{typeconv}{input_expr}{$TYPEMAP_KIND} ||= $INPUT; 520 $parser->{data}{typeconv}{output_expr}{$TYPEMAP_KIND} ||= $OUTPUT; 521 $parser->{data}{typeconv}{type_kind}{$ctypename} = $TYPEMAP_KIND; 522 $parser->{data}{typeconv}{valid_types}{$ctypename}++; 523 $parser->{data}{typeconv}{valid_rtypes}{$ctypename}++; 524 return; 525} 526 527#============================================================================ 528# Default action is to strip ellipses from the C++ code. This allows having 529# _only_ a '...' in the code, just like XS. It is the default. 530#============================================================================ 531sub strip_ellipsis { 532 my ($parser, $args) = @_; 533 return if $parser->{ILSM}{PRESERVE_ELLIPSIS}; 534 for (my $i = 0; $i < @$args; $i++) { 535 next unless $args->[$i]{name} eq '...'; 536 537 # if it's the first one, just strip it 538 if ($i == 0) { 539 substr($parser->{ILSM}{code}, $args->[$i]{offset} - 3, 3, ' '); 540 } 541 else { 542 my $prev = $i - 1; 543 my $prev_offset = $args->[$prev]{offset}; 544 my $length = $args->[$i]{offset} - $prev_offset; 545 substr($parser->{ILSM}{code}, $prev_offset, $length) =~ s/\S/ /g; 546 } 547 } 548 return; 549} 550 551my $hack = sub { # Appease -w using Inline::Files 552 print Parse::RecDescent::IN ''; 553 print Parse::RecDescent::IN ''; 554 print Parse::RecDescent::TRACE_FILE ''; 555 print Parse::RecDescent::TRACE_FILE ''; 556}; 557 5581; 559 560=head1 Inline::CPP::Parser::RecDescent 561 562All functions are internal. No documentation necessary. 563 564=cut 565