1use strict; use warnings;
2package Inline::CPP::Parser::RecDescent;
3
4# Dev versions will have a _0xx suffix.
5# We eval the $VERSION to accommodate dev version numbering as described in
6# perldoc perlmodstyle
7our $VERSION = '0.80';
8#$VERSION = eval $VERSION;  ## no critic (eval)
9
10use Carp;
11
12sub register {
13    {
14     extends => [qw(CPP)],
15     overrides => [qw(get_parser)],
16    }
17}
18
19sub get_parser {
20    my $o = shift;
21    return Inline::CPP::Parser::RecDescent::get_parser_recdescent($o);
22}
23
24sub get_parser_recdescent {
25    my $o = shift;
26    eval { require Parse::RecDescent };
27    croak <<END if $@;
28This invocation of Inline requires the Parse::RecDescent module.
29$@
30END
31    no warnings qw/ once /;    ## no critic (warnings)
32    $::RD_HINT = 1;    # Turns on Parse::RecDescent's warnings/diagnostics.
33    my $parser = Parse::RecDescent->new(grammar());
34    $parser->{data}{typeconv} = $o->{ILSM}{typeconv};
35    $parser->{ILSM} = $o->{ILSM};    # give parser access to config options
36    return $parser;
37}
38
39use vars qw($TYPEMAP_KIND $fixkey);
40
41# Parse::RecDescent 1.90 and later have an incompatible change
42# 'The key of an %item entry for a repeated subrule now includes
43# the repetition specifier.'
44# Hence various hash keys may or may not need trailing '(s?)' depending on
45# the version of Parse::RecDescent we are using.
46
47require Parse::RecDescent;
48
49# Deal with Parse::RecDescent's version numbers for development
50# releases (eg, '1.96_000') resulting in a warning about non-numeric in >
51# comparison.
52{    # Lexical scope.
53      # Eval away the underscore.  "1.96_000" => "1.96000".
54      # Use that "stable release" version number as the basis for our numeric
55      # comparison.
56  my $stable_version = eval $Parse::RecDescent::VERSION;    ## no critic (eval)
57  $fixkey = ($stable_version > 1.89)
58    ? sub{ $_[0] } : sub{ local $_=shift; s/\(.*\)$//; $_ };
59}    # End lexical scope.
60
61
62#============================================================================
63# Regular expressions to match code blocks, numbers, strings, parenthesized
64# expressions, function calls, and macros. The more complex regexes are only
65# implemented in 5.6.0 and above, so they're in eval-blocks.
66#
67# These are all adapted from the output of Damian Conway's excellent
68# Regexp::Common module. In future, Inline::CPP may depend directly on it,
69# but for now I'll just duplicate the code.
70use vars qw( $code_block $string $number $parens $funccall );
71
72#============================================================================
73
74# $RE{balanced}{-parens=>q|{}()[]"'|}
75eval <<'END';    ## no critic (eval)
76$code_block = qr'(?-xism:(?-xism:(?:[{](?:(?>[^][)(}{]+)|(??{$Inline::CPP::Parser::RecDescent::code_block}))*[}]))|(?-xism:(?-xism:(?:[(](?:(?>[^][)(}{]+)|(??{$Inline::CPP::Parser::RecDescent::code_block}))*[)]))|(?-xism:(?-xism:(?:[[](?:(?>[^][)(}{]+)|(??{$Inline::CPP::Parser::RecDescent::code_block}))*[]]))|(?-xism:(?!)))))';
77END
78$code_block = qr'{[^}]*}' if $@;    # For the stragglers: here's a lame regexp.
79
80# $RE{balanced}{-parens=>q|()"'|}
81eval <<'END';                       ## no critic (eval)
82$parens = qr'(?-xism:(?-xism:(?:[(](?:(?>[^)(]+)|(??{$Inline::CPP::Parser::RecDescent::parens}))*[)]))|(?-xism:(?!)))';
83END
84$parens = qr'\([^)]*\)' if $@;      # For the stragglers: here's another
85
86# $RE{quoted}
87$string
88  = qr'(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\')|(?:\`)(?:[^\\\`]*(?:\\.[^\\\`]*)*)(?:\`))';
89
90# $RE{num}{real}|$RE{num}{real}{-base=>16}|$RE{num}{int}
91$number
92  = qr'(?:(?i)(?:[+-]?)(?:(?=[0123456789]|[.])(?:[0123456789]*)(?:(?:[.])(?:[0123456789]{0,}))?)(?:(?:[E])(?:(?:[+-]?)(?:[0123456789]+))|))|(?:(?i)(?:[+-]?)(?:(?=[0123456789ABCDEF]|[.])(?:[0123456789ABCDEF]*)(?:(?:[.])(?:[0123456789ABCDEF]{0,}))?)(?:(?:[G])(?:(?:[+-]?)(?:[0123456789ABCDEF]+))|))|(?:(?:[+-]?)(?:\d+))';
93$funccall
94  = qr/(?:[_a-zA-Z][_a-zA-Z0-9]*::)*[_a-zA-Z][_a-zA-Z0-9]*(?:$Inline::CPP::Parser::RecDescent::parens)?/;
95
96#============================================================================
97# Inline::CPP's grammar
98#============================================================================
99sub grammar {
100  return <<'END';
101
102{ use Data::Dumper; }
103
104{
105  sub fixkey { &$Inline::CPP::Parser::RecDescent::fixkey }
106}
107
108{
109    sub handle_args {
110        my ($args) = @_;
111        my %argsdef;
112        $argsdef{arg_names} = [ map $_->{name}, @$args ];
113        $argsdef{arg_types} = [ map $_->{type}, @$args ];
114        $argsdef{arg_offsets} = [ map $_->{offset}, @$args ];
115        $argsdef{arg_optional} = [ map $_->{optional}, @$args ];
116        \%argsdef;
117    }
118    sub handle_class_def {
119        my ($thisparser, $def) = @_;
120#         print "Found a class: $def->[0]\n";
121        my $class = $def->[0];
122        my @parts;
123        for my $part (@{$def->[1]}) { push @parts, @$_ for @$part }
124        push @{$thisparser->{data}{classes}}, $class
125            unless defined $thisparser->{data}{class}{$class};
126        $thisparser->{data}{class}{$class} = \@parts;
127#   print "Class $class:\n", Dumper \@parts;
128        Inline::CPP::Parser::RecDescent::typemap($thisparser, $class);
129        [$class, \@parts];
130    }
131    sub handle_typedef {
132        my ($thisparser, $t) = @_;
133        my ($name, $type) = @{$t}{qw(name type)};
134#   print "found a typedef: $name => $type\n";
135
136        # XXX: this doesn't handle non-class typedefs that we could handle,
137        # e.g. "typedef int my_int_t"
138
139        if ($thisparser->{data}{class}{$type}
140            && !exists($thisparser->{data}{class}{$name})) {
141            push @{$thisparser->{data}{classes}}, $name;
142            $thisparser->{data}{class}{$name} = $thisparser->{data}{class}{$type};
143            Inline::CPP::Parser::RecDescent::typemap($thisparser, $name);
144        }
145        $t;
146    }
147    sub handle_enum {
148        my ($thisparser, $t) = @_;
149        $t;
150    }
151}
152
153code: part(s) {1}
154
155part: comment
156    | typedef
157      {
158        handle_typedef($thisparser, $item[1]);
159        1;
160      }
161    | enum
162      {
163        my $t = handle_enum($thisparser, $item[1]);
164        push @{$thisparser->{data}{enums}}, $t;
165        1;
166      }
167    | class_def
168      {
169         handle_class_def($thisparser, $item[1]);
170     1;
171      }
172    | function_def
173      {
174#         print "found a function: $item[1]->{name}\n";
175         my $name = $item[1]->{name};
176     my $i=0;
177     for my $arg (@{$item[1]->{args}}) {
178        $arg->{name} = 'dummy' . ++$i unless defined $arg->{name};
179     }
180     Inline::CPP::Parser::RecDescent::strip_ellipsis($thisparser,
181                          $item[1]->{args});
182     push @{$thisparser->{data}{functions}}, $name
183           unless defined $thisparser->{data}{function}{$name};
184     my %funcdef = %{ $item[1] };
185     %funcdef = (%funcdef, %{ handle_args(delete $funcdef{args}) });
186     $thisparser->{data}{function}{$name} = \%funcdef;
187#    print Dumper $item[1];
188     1;
189      }
190    | all
191
192typedef: 'typedef' class IDENTIFIER(?) '{' <commit> class_part(s?) '}' IDENTIFIER ';'
193       {
194     my ($class, $parts);
195         $class = $item[3][0] || 'anon_class'.($thisparser->{data}{anonclass}++);
196         ($class, $parts)= handle_class_def($thisparser, [$class, $item{fixkey('class_part(s?)')}]);
197     { thing => 'typedef', name => $item[8], type => $class, body => $parts }
198       }
199       | 'typedef' IDENTIFIER IDENTIFIER ';'
200       { { thing => 'typedef', name => $item[3], type => $item[2] } }
201       | 'typedef' /[^;]*/ ';'
202       {
203#         dprint "Typedef $item{__DIRECTIVE1__} is too heinous\n";
204         { thing => 'comment'}
205       }
206
207enum: 'enum' IDENTIFIER(?) '{' <leftop: enum_item ',' enum_item> '}' ';'
208       {
209    { thing => 'enum', name => $item{fixkey('IDENTIFIER(?)')}[0],
210          body => $item{__DIRECTIVE1__} }
211       }
212
213enum_item: IDENTIFIER '=' <commit> /[0-9]+/
214         { [$item{IDENTIFIER}, $item{__PATTERN1__}] }
215         | IDENTIFIER
216         { [$item{IDENTIFIER}, undef] }
217
218class_def: class IDENTIFIER '{' <commit> class_part(s?) '}' ';'
219           {
220              [@item{'IDENTIFIER',fixkey('class_part(s?)')}]
221           }
222     | class IDENTIFIER ':' <commit> <leftop: inherit ',' inherit>
223            '{' class_part(s?) '}' ';'
224       {
225          push @{$item{fixkey('class_part(s?)')}}, [$item{__DIRECTIVE2__}];
226          [@item{'IDENTIFIER',fixkey('class_part(s?)')}]
227       }
228
229inherit: scope IDENTIFIER
230    { {thing => 'inherits', name => $item[2], scope => $item[1]} }
231
232class_part: comment { [ {thing => 'comment'} ] }
233      | scope ':' <commit> class_decl(s?)
234            {
235          for my $part (@{$item{fixkey('class_decl(s?)')}}) {
236                  $_->{scope} = $item[1] for @$part;
237          }
238          $item{fixkey('class_decl(s?)')}
239        }
240      | class_decl(s)
241            {
242          for my $part (@{$item[1]}) {
243                  $_->{scope} = $thisparser->{data}{defaultscope}
244            for @$part;
245          }
246          $item[1]
247        }
248
249class_decl: comment { [{thing => 'comment'}] }
250          | typedef { [ handle_typedef($thisparser, $item[1]) ] }
251          | enum { [ handle_enum($thisparser, $item[1]) ] }
252          | class_def
253            {
254               my ($class, $parts) = handle_class_def($thisparser, $item[1]);
255               [{ thing => 'class', name => $class, body => $parts }];
256            }
257          | method_def
258        {
259              $item[1]->{thing} = 'method';
260#         print "class_decl found a method: $item[1]->{name}\n";
261          my $i=0;
262          for my $arg (@{$item[1]->{args}}) {
263        $arg->{name} = 'dummy' . ++$i unless defined $arg->{name};
264          }
265          Inline::CPP::Parser::RecDescent::strip_ellipsis($thisparser,
266                           $item[1]->{args});
267          my %funcdef = %{ $item[1] };
268          %funcdef = (%funcdef, %{ handle_args(delete $funcdef{args}) });
269          [\%funcdef];
270        }
271          | member_def
272        {
273#         print "class_decl found one or more members:\n", Dumper(\@item);
274              $_->{thing} = 'member' for @{$item[1]};
275          $item[1];
276        }
277
278function_def: operator <commit> ';'
279              {
280                   $item[1]
281              }
282            | operator <commit> smod(?) code_block
283              {
284                  $item[1]
285              }
286            | IDENTIFIER '(' <commit> <leftop: arg ',' arg>(s?) ')' smod(?) code_block
287              {
288                {name => $item{IDENTIFIER}, args => $item{__DIRECTIVE2__}, return_type => '' }
289              }
290            | return_type IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' ';'
291              {
292                {return_type => $item[1], name => $item[2], args => $item{__DIRECTIVE1__} }
293              }
294            | return_type IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' smod(?) code_block
295              {
296                {return_type => $item{return_type}, name => $item[2], args => $item{__DIRECTIVE1__} }
297              }
298
299method_def: operator <commit> method_imp
300            {
301#               print "method operator:\n", Dumper $item[1];
302               $item[1];
303            }
304
305          | IDENTIFIER '(' <commit> <leftop: arg ',' arg>(s?) ')' method_imp
306            {
307#         print "con-/de-structor found: $item[1]\n";
308              {name => $item[1], args => $item{__DIRECTIVE2__}, abstract => ${$item{method_imp}} };
309            }
310          | return_type IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' method_imp
311            {
312#         print "method found: $item[2]\n";
313          $return =
314                {name => $item[2], return_type => $item[1], args => $item[4],
315             abstract => ${$item[6]},
316                 rconst => $thisparser->{data}{smod}{const},
317                };
318          $thisparser->{data}{smod}{const} = 0;
319            }
320
321operator: return_type(?) 'operator' /\(\)|[^()]+/ '(' <leftop: arg ',' arg>(s?) ')'
322          {
323#            print "Found operator: $item[1][0] operator $item[3]\n";
324            {name=> "operator $item[3]", args => $item[5], ret => $item[1][0]}
325          }
326
327# By adding smod, we allow 'const' member functions. This would also bind to
328# incorrect C++ with the word 'static' after the argument list, but we don't
329# care at all because such code would never be compiled successfully.
330
331# By adding init, we allow constructors to initialize references. Again, we'll
332# allow them anywhere, but our goal is not to enforce c++ standards -- that's
333# the compiler's job.
334method_imp: smod(?) ';' { \0 }
335          | smod(?) '=' <commit> '0' ';' { \1 }
336          | smod(?) initlist(?) code_block { \0 }
337          | smod(?) '=' '0' code_block { \0 }
338
339initlist: ':' <leftop: subexpr ',' subexpr>
340
341member_def: anytype <leftop: var ',' var> ';'
342            {
343          my @retval;
344          for my $def (@{$item[2]}) {
345              my $type = join '', $item[1], @{$def->[0]};
346          my $name = $def->[1];
347#             print "member found: type=$type, name=$name\n";
348          push @retval, { name => $name, type => $type };
349          }
350          \@retval;
351            }
352
353var: star(s?) IDENTIFIER '=' expr { [@item[1,2]] }
354   | star(s?) IDENTIFIER '[' expr ']' { [@item[1,2]] }
355   | star(s?) IDENTIFIER          { [@item[1,2]] }
356
357arg: type IDENTIFIER '=' expr
358     {
359#       print "argument $item{IDENTIFIER} found\n";
360#       print "expression: $item{expr}\n";
361    {type => $item[1], name => $item{IDENTIFIER}, optional => 1,
362     offset => $thisoffset}
363     }
364   | type IDENTIFIER
365     {
366#       print "argument $item{IDENTIFIER} found\n";
367       {type => $item[1], name => $item{IDENTIFIER}, offset => $thisoffset}
368     }
369   | type { {type => $item[1]} }
370   | '...'
371     { {name => '...', type => '...', offset => $thisoffset} }
372
373ident_part: /[~_a-z]\w*/i '<' <commit> <leftop: IDENTIFIER ',' IDENTIFIER>(s?) '>'
374        {
375       $item[1].'<'.join('', @{$item[4]}).'>'
376        }
377
378      | /[~_a-z]\w*/i
379        {
380           $item[1]
381        }
382
383IDENTIFIER: <leftop: ident_part '::' ident_part>
384        {
385              my $x = join '::', @{$item[1]};
386#              print "IDENTIFIER: $x\n";
387              $x
388        }
389
390# Parse::RecDescent is retarded in this one case: if a subrule fails, it
391# gives up the entire rule. This is a stupid way to get around that.
392return_type: rtype2 | rtype1
393rtype1: TYPE star(s?)
394        {
395         $return = $item[1];
396         $return .= join '',' ',@{$item[2]} if @{$item[2]};
397#    print "rtype1: $return\n";
398#          return undef
399#            unless(defined$thisparser->{data}{typeconv}{valid_rtypes}{$return});
400        }
401rtype2: modifier(s) TYPE star(s?)
402    {
403         $return = $item[2];
404         $return = join ' ',grep{$_}@{$item[1]},$return
405           if @{$item[1]};
406         $return .= join '',' ',@{$item[3]} if @{$item[3]};
407#    print "rtype2: $return\n";
408#          return undef
409#            unless(defined$thisparser->{data}{typeconv}{valid_rtypes}{$return});
410     $return = 'static ' . $return
411       if $thisparser->{data}{smod}{static};
412         $thisparser->{data}{smod}{static} = 0;
413    }
414
415type: type2 | type1
416type1: TYPE star(s?)
417        {
418         $return = $item[1];
419         $return .= join '',' ',@{$item{fixkey('star(s?)')}} if @{$item{fixkey('star(s?)')}};
420#    print "type1: $return\n";
421#          return undef
422#            unless(defined$thisparser->{data}{typeconv}{valid_types}{$return});
423        }
424type2: modifier(s) TYPE star(s?)
425    {
426         $return = $item{TYPE};
427         $return = join ' ',grep{$_}@{$item[1]},$return if @{$item[1]};
428         $return .= join '',' ',@{$item{fixkey('star(s?)')}} if @{$item{fixkey('star(s?)')}};
429#    print "type2: $return\n";
430#          return undef
431#            unless(defined$thisparser->{data}{typeconv}{valid_types}{$return});
432    }
433
434anytype: anytype2 | anytype1
435anytype1: TYPE star(s?)
436         {
437           $return = $item[1];
438           $return .= join '',' ',@{$item[2]} if @{$item[2]};
439         }
440anytype2: modifier(s) TYPE star(s?)
441         {
442           $return = $item[2];
443           $return = join ' ',grep{$_}@{$item[1]},$return if @{$item[1]};
444           $return .= join '',' ',@{$item[3]} if @{$item[3]};
445         }
446
447comment: m{\s* // [^\n]* \n }x
448       | m{\s* /\* (?:[^*]+|\*(?!/))* \*/  ([ \t]*)? }x
449
450# long and short aren't recognized as modifiers because they break when used
451# as regular types. Another Parse::RecDescent problem is greedy matching; I
452# need tmodifier to "give back" long or short in cases where keeping them would
453# cause the modifier rule to fail. One side-effect is 'long long' can never
454# be parsed correctly here.
455modifier: tmod
456        | smod { ++$thisparser->{data}{smod}{$item[1]}; ''}
457    | nmod { '' }
458tmod: 'unsigned' # | 'long' | 'short'
459smod: 'const' | 'static'
460nmod: 'extern' | 'virtual' | 'mutable' | 'volatile' | 'inline'
461
462scope: 'public' | 'private' | 'protected'
463
464class: 'class' { $thisparser->{data}{defaultscope} = 'private'; $item[1] }
465     | 'struct' { $thisparser->{data}{defaultscope} = 'public'; $item[1] }
466
467star: '*' | '&'
468
469code_block: /$Inline::CPP::Parser::RecDescent::code_block/
470
471# Consume expressions
472expr: <leftop: subexpr OP subexpr> {
473    my $o = join '', @{$item[1]};
474#   print "expr: $o\n";
475    $o;
476}
477subexpr: /$Inline::CPP::Parser::RecDescent::funccall/ # Matches a macro, too
478       | /$Inline::CPP::Parser::RecDescent::string/
479       | /$Inline::CPP::Parser::RecDescent::number/
480       | UOP subexpr
481OP: '+' | '-' | '*' | '/' | '^' | '&' | '|' | '%' | '||' | '&&'
482UOP: '~' | '!' | '-' | '*' | '&'
483
484TYPE: IDENTIFIER
485
486all: /.*/
487
488END
489}
490
491#============================================================================
492# Generate typemap code for the classes and structs we bind to. This allows
493# functions declared after a class to return or accept class objects as
494# parameters.
495#============================================================================
496$TYPEMAP_KIND = 'O_Inline_CPP_Class';
497
498sub typemap {
499  my ($parser, $typename) = @_;
500
501#    print "Inline::CPP::Parser::RecDescent::typemap(): typename=$typename\n";
502
503  my ($TYPEMAP, $INPUT, $OUTPUT);
504  $TYPEMAP = "$typename *\t\t$TYPEMAP_KIND\n";
505  $INPUT   = <<"END";
506    if (sv_isobject(\$arg) && (SvTYPE(SvRV(\$arg)) == SVt_PVMG)) {
507        \$var = (\$type)SvIV((SV*)SvRV( \$arg ));
508    }
509    else {
510        warn ( \\"\${Package}::\$func_name() -- \$var is not a blessed reference\\" );
511        XSRETURN_UNDEF;
512    }
513END
514  $OUTPUT = <<"END";
515    sv_setref_pv( \$arg, CLASS, (void*)\$var );
516END
517
518  my $ctypename = $typename . ' *';
519  $parser->{data}{typeconv}{input_expr}{$TYPEMAP_KIND}  ||= $INPUT;
520  $parser->{data}{typeconv}{output_expr}{$TYPEMAP_KIND} ||= $OUTPUT;
521  $parser->{data}{typeconv}{type_kind}{$ctypename} = $TYPEMAP_KIND;
522  $parser->{data}{typeconv}{valid_types}{$ctypename}++;
523  $parser->{data}{typeconv}{valid_rtypes}{$ctypename}++;
524  return;
525}
526
527#============================================================================
528# Default action is to strip ellipses from the C++ code. This allows having
529# _only_ a '...' in the code, just like XS. It is the default.
530#============================================================================
531sub strip_ellipsis {
532  my ($parser, $args) = @_;
533  return if $parser->{ILSM}{PRESERVE_ELLIPSIS};
534  for (my $i = 0; $i < @$args; $i++) {
535    next unless $args->[$i]{name} eq '...';
536
537    # if it's the first one, just strip it
538    if ($i == 0) {
539      substr($parser->{ILSM}{code}, $args->[$i]{offset} - 3, 3, '   ');
540    }
541    else {
542      my $prev        = $i - 1;
543      my $prev_offset = $args->[$prev]{offset};
544      my $length      = $args->[$i]{offset} - $prev_offset;
545      substr($parser->{ILSM}{code}, $prev_offset, $length) =~ s/\S/ /g;
546    }
547  }
548  return;
549}
550
551my $hack = sub { # Appease -w using Inline::Files
552    print Parse::RecDescent::IN '';
553    print Parse::RecDescent::IN '';
554    print Parse::RecDescent::TRACE_FILE '';
555    print Parse::RecDescent::TRACE_FILE '';
556};
557
5581;
559
560=head1 Inline::CPP::Parser::RecDescent
561
562All functions are internal.  No documentation necessary.
563
564=cut
565