1package C::Scan; 2 3require Exporter; 4use Config '%Config'; 5use File::Basename; 6use Data::Flow qw(0.05); 7use strict; # Earlier it catches ISA and EXPORT. 8 9@C::Scan::ISA = qw(Exporter Data::Flow); 10 11# Items to export into callers namespace by default. Note: do not export 12# names by default without a very good reason. Use EXPORT_OK instead. 13# Do not simply export all your public functions/methods/constants. 14 15@C::Scan::EXPORT = qw( 16 ); 17@C::Scan::EXPORT_OK = qw( 18 ); 19# this flag tells cpp to only output macros 20$C::Scan::MACROS_ONLY = '-dM'; 21 22$C::Scan::VERSION = '0.74'; 23 24my (%keywords,%style_keywords); 25for (qw(asm auto break case char continue default do double else enum 26 extern float for fortran goto if int long register return short 27 sizeof static struct switch typedef union unsigned signed while void)) { 28 $keywords{$_}++; 29} 30for (qw(bool class const delete friend inline new operator overload private 31 protected public virtual)) { 32 $style_keywords{'C++'}{$_}++; 33} 34for (qw(__func__ _Complex _Imaginary _Bool inline restrict)) { 35 $style_keywords{'C9X'}{$_}++; 36} 37for (qw(inline const asm noreturn format section 38 constructor destructor unused weak)) { 39 $style_keywords{'GNU'}{$_}++; 40 $style_keywords{'GNU'}{"__$ {_}__"}++; 41} 42 $style_keywords{'GNU'}{__attribute__}++; 43 $style_keywords{'GNU'}{__extension__}++; 44 $style_keywords{'GNU'}{__consts}++; 45 $style_keywords{'GNU'}{__const}++; 46 47my $recipes 48 = { Defines => { default => '' }, 49 cppstdin => { default => $Config{cppstdin} }, 50 cppflags => { default => $Config{cppflags} }, 51 cppminus => { default => $Config{cppminus} }, 52 c_styles => { default => [qw(C++ GNU C9X)] }, 53 add_cppflags => { default => '' }, 54 keywords => { prerequisites => ['c_styles'], 55 output => sub { 56 my %kw = %keywords; 57 my %add; 58 for ( @{ shift->{c_styles} } ) { 59 %add = %{ $style_keywords{$_} }; 60 %kw = (%kw, %add); 61 } 62 \%kw; 63 }, }, 64 'undef' => { default => undef }, 65 filename_filter => { default => undef }, 66 full_text => { class_filter => [ 'text', 'C::Preprocessed', 67 qw(undef filename Defines includeDirs Cpp)] }, 68 text => { class_filter => [ 'text', 'C::Preprocessed', 69 qw(filename_filter filename Defines includeDirs Cpp)] }, 70 text_only_from => { class_filter => [ 'text_only_from', 'C::Preprocessed', 71 qw(filename_filter filename Defines includeDirs Cpp)] }, 72 includes => { filter => [ \&includes, 73 qw(filename Defines includeDirs Cpp) ], }, 74 includeDirs => { prerequisites => ['filedir'], 75 output => sub { 76 my $data = shift; 77 [ $data->{filedir}, '/usr/local/include', '.']; 78 } }, 79 Cpp => { prerequisites => [qw(cppminus add_cppflags cppflags cppstdin)], 80 output => sub { 81 my $data = shift; 82 return { cppstdin => $data->{cppstdin}, 83 cppflags => "$data->{cppflags} $data->{add_cppflags}", 84 cppminus => $data->{cppminus} }; 85 } }, 86 filedir => { output => sub { dirname ( shift->{filename} || '.' ) } }, 87 sanitized => { filter => [ \&sanitize, 'text'], }, 88 toplevel => { filter => [ \&top_level, 'sanitized'], }, 89 full_sanitized => { filter => [ \&sanitize, 'full_text'], }, 90 full_toplevel => { filter => [ \&top_level, 'full_sanitized'], }, 91 no_type_decl => { filter => [ \&remove_type_decl, 'toplevel'], }, 92 typedef_chunks => { filter => [ \&typedef_chunks, 'full_toplevel'], }, 93 typedefs_maybe => { filter => [ sub {[keys %{+shift}]}, 'typedef_hash'], }, 94 typedefs_whited => { filter => [ \&typedefs_whited, 95 'full_sanitized', 'typedef_chunks', 96 'keywords_rex'], }, 97 typedef_texts => { filter => [ \&typedef_texts, 98 'full_text', 'typedef_chunks'], }, 99 typedef_hash => { filter => [ \&typedef_hash, 100 'typedef_texts', 'typedefs_whited'], }, 101 typedef_structs => { filter => [ \&typedef_structs, 102 'typedef_hash'], }, 103 defines_maybe => { filter => [ \&defines_maybe, 'filename'], }, 104 defines_no_args => { prerequisites => ['defines_maybe'], 105 output => sub { shift->{defines_maybe}->[0] }, }, 106 defines_args => { prerequisites => ['defines_maybe'], 107 output => sub { shift->{defines_maybe}->[1] }, }, 108 109 defines_full => { filter => [ \&defines_full, 110 qw(filename Defines includeDirs Cpp) ], }, 111 defines_no_args_full => { prerequisites => ['defines_full'], 112 output => sub { shift->{defines_full}->[0] }, }, 113 defines_args_full => { prerequisites => ['defines_full'], 114 output => sub { shift->{defines_full}->[1] }, }, 115 116 decl_inlines => { filter => [ \&functions_in, 'no_type_decl'], }, 117 inline_chunks => { filter => [ sub { shift->[0] }, 'decl_inlines'], }, 118 inlines => { filter => [ \&from_chunks, 'inline_chunks', 'text'], }, 119 decl_chunks => { filter => [ sub { shift->[1] }, 'decl_inlines'], }, 120 decls => { filter => [ \&from_chunks, 'decl_chunks', 'text'], }, 121 fdecl_chunks => { filter => [ sub { shift->[4] }, 'decl_inlines'], }, 122 fdecls => { filter => [ \&from_chunks, 'fdecl_chunks', 'text'], }, 123 mdecl_chunks => { filter => [ sub { shift->[2] }, 'decl_inlines'], }, 124 mdecls => { filter => [ \&from_chunks, 'mdecl_chunks', 'text'], }, 125 vdecl_chunks => { filter => [ sub { shift->[3] }, 'decl_inlines'], }, 126 vdecls => { filter => [ \&from_chunks, 'vdecl_chunks', 'text'], }, 127 vdecl_hash => { filter => [ \&vdecl_hash, 'vdecls', 'mdecls' ], }, 128 parsed_fdecls => { filter => [ \&do_declarations, 'fdecls', 129 'typedef_hash', 'keywords'], }, 130 keywords_rex => { filter => [ sub { my @k = keys %{ shift() }; 131 local $" = '|'; 132 my $r = "(?:@k)"; 133 eval 'qr/$r/' or $r # Older Perls 134 }, 'keywords'], }, 135 }; 136 137sub from_chunks { 138 my $chunks = shift; 139 my $txt = shift; 140 my @out; 141 my $i = 0; 142 while ($i < @$chunks) { 143 push @out, substr $txt, $chunks->[$i], $chunks->[ $i + 1 ] - $chunks->[$i]; 144 $i += 2; 145 } 146 \@out; 147} 148 149#sub process { request($recipes, @_) } 150# Preloaded methods go here. 151 152sub includes { 153 my %seen; 154 my $stream = new C::Preprocessed (@_) 155 or die "Cannot open pipe from cppstdin: $!\n"; 156 157 while (<$stream>) { 158 next unless m(^\s*\#\s* # Leading hash 159 (line\s*)? # 1: Optional line 160 ([0-9]+)\s* # 2: Line number 161 (.*) # 3: The rest 162 )x; 163 my $include = $3; 164 $include = $1 if $include =~ /"(.*)"/; # Filename may be in quotes 165 $include =~ s,\\\\,/,g if $^O eq 'os2'; 166 $seen{$include}++ if $include ne ""; 167 } 168 [keys %seen]; 169} 170 171sub defines_maybe { 172 my $file = shift; 173 my ($mline,$line,%macros,%macrosargs,$sym,$args); 174 open(C, $file) or die "Cannot open file $file: $!\n"; 175 while (not eof(C) and $line = <C>) { 176 next unless 177 ( $line =~ s[ 178 ^ \s* \# \s* # Start of directive 179 define \s+ 180 (\w+) # 1: symbol 181 (?: 182 \( (.*?) \s* \) # 2: Minimal match for arguments 183 # in parenths (without trailing 184 # spaces) 185 )? # optional, no grouping 186 \s* # rest is the definition 187 ([\s\S]*) # 3: the rest 188 ][]x ); 189 ($sym, $args, $mline) = ($1, $2, $3); 190 $mline .= <C> while not eof(C) and $mline =~ s/\\\n/\n/; 191 chomp $mline; 192 #print "sym: `$sym', args: `$args', mline: `$mline'\n"; 193 if (defined $args) { 194 $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline]; 195 } else { 196 $macros{$sym} = $mline; 197 } 198 } 199 close(C) or die "Cannot close file $file: $!\n"; 200 [\%macros, \%macrosargs]; 201} 202 203sub defines_full { 204 my $Cpp = $_[3]; 205 my ($mline,$line,%macros,%macrosargs,$sym,$args); 206 207 # save the old cppflags and add the flag for only ouputting macro definitions 208 my $old_cppstdin = $Cpp->{'cppstdin'}; 209 $Cpp->{'cppstdin'} = $old_cppstdin . " " . $C::Scan::MACROS_ONLY; 210 211 my $stream = new C::Preprocessed (@_) 212 or die "Cannot open pipe from cppstdin: $!\n"; 213 214 while (defined ($line = <$stream>)) { 215 next unless 216 ( $line =~ s[ 217 ^ \s* \# \s* # Start of directive 218 define \s+ 219 (\w+) # 1: symbol 220 (?: 221 \( (.*?) \s* \) # 2: Minimal match for arguments 222 # in parenths (without trailing 223 # spaces) 224 )? # optional, no grouping 225 \s* # rest is the definition 226 ([\s\S]*) # 3: the rest 227 ][]x ); 228 ($sym, $args, $mline) = ($1, $2, $3); 229 $mline .= <$stream> while ($mline =~ s/\\\n/\n/); 230 chomp $mline; 231#print STDERR "sym: `$sym', args: `$args', mline: `$mline'\n"; 232 if (defined $args) { 233 $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline]; 234 } else { 235 $macros{$sym} = $mline; 236 } 237 } 238 # restore the original cppflags 239 $Cpp->{'cppstdin'} = $old_cppstdin; 240 [\%macros, \%macrosargs]; 241} 242 243# sub nexttypedef { 244# return unless $_[0] =~ /(\G|^|;)\s*typedef\b/g; 245# my $start = pos($_[0]) - 7; 246# nextsemi($_[0]); 247# my $end = pos $_[0]; 248# # warn "Found `", substr($_[0], $start, $end - $start), "'\n" if $debug; 249# return $start, $end; 250# } 251 252# sub nextsemi { 253# my $n = 0; 254# while ($_[0] =~ /([\(\{\[])|([\]\)\}])|(\;)/g) { 255# $n++ if defined $1; 256# $n-- if defined $2; 257# return if defined $3 and $n == 0; 258# } 259# die "No semicolon on the outer level"; 260# } 261 262sub typedef_texts { 263 my ($txt, $chunks) = (shift, shift); 264 my ($b, $e, $in, @out); 265 my @in = @$chunks; 266 while (($b, $e) = splice @in, 0, 2) { 267 $in = substr($txt, $b, $e - $b); 268 # remove any remaining directives 269 $in =~ s/^ ( \s* \# .* ( \\ $ \n .* )* ) / ' ' x length($1)/xgem; 270 push @out, $in; 271 } 272 \@out; 273} 274 275sub typedef_hash_old { 276 +{ map {($_,1)} map /(\w+)/, @{$_[0]} }; 277} 278 279sub typedef_hash { 280 my ($typedefs, $whited) = (shift,shift); 281 my %out; 282 283 loop: 284 for my $o (0..$#$typedefs) { 285 my $wh = $whited->[$o]; 286 my $td = $typedefs->[$o]; 287 if ($wh =~ /,/ or not $wh =~ /\w/) { # Hard case, guessimates ... 288 # Determine whether the new thingies are inside parens 289 $wh =~ /,/g; 290 my $p = pos $wh; 291 my ($s, $e); 292 if (matchingbrace($wh)) { # Inside. Easy part: just split on /,/... 293 $e = pos($wh) - 1; 294 $s = $e; 295 my $d = 0; 296 # Skip back 297 while (--$s >= 0) { 298 my $c = substr $wh, $s, 1; 299 if ($c =~ /[\(\{\[]/) { 300 $d--; 301 } elsif ($c =~ /[\)\]\}]/) { 302 $d++; 303 } 304 last if $d < 0; 305 } 306 if ($s < 0) { # Should not happen 307 warn("panic: could not match braces in\n\t$td\nwhited as\n\t$wh\n"); 308 next loop; 309 } 310 $s++; 311 } else { # We are at toplevel 312 # We need to skip back all the modifiers attached to the first thingy 313 # Guesstimates: everything after the first '*' (inclusive) 314 pos $wh = 0; 315 $wh = /(?=\w)/g; 316 my $ws = pos $wh; 317 my $pre = substr $wh, 0, $ws; 318 $s = $ws; 319 $s = pos $pre if $pre =~ /(?=\*)/g; 320 $e = length $wh; 321 } 322 # Now: need to split $td based on commas in $wh! 323 # And need to split each chunk of $td based on word in the chunk of $wh! 324 my $td_decls = substr($td, $s, $e - $s); 325 my ($pre, $post) = (substr($td, 0, $s), substr($td, $e)); 326 my $wh_decls = substr($wh, $s, $e - $s); 327 my @wh_decls = split /,/, $wh_decls; 328 my $td_s = 0; 329 my (@td_decl, @td_pre, @td_post, @td_word); 330 for my $wh_d (@wh_decls) { 331 my $td_d = substr $td, $td_s, length $wh_d; 332 push @td_decl, $td_d; 333 $wh_d =~ /(\w+)/g; 334 push @td_word, $1; 335 push @td_post, substr $td_d, pos($wh_d); 336 push @td_pre, substr $td_d, pos($wh_d) - length $1, length $1; 337 $td_s += 1 + length $wh_d; # Skip over ',' 338 } 339 for my $i (0..$#wh_decls) { 340 my $p = "$td_post[$i]$post"; 341 $p = '' unless $p =~ /\S/; 342 $out{$td_word[$i]} = ["$pre$td_pre[$i]", $p]; 343 } 344 } else { # Only one thing defined... 345 $wh =~ /(\w+)/g; 346 my $e = pos $wh; 347 my $s = $e - length $1; 348 my $type = $1; 349 my $pre = substr $td, 0, $s; 350 my $post = substr $td, $e, length($td) - $e; 351 $post = '' unless $post =~ /\S/; 352 $out{$type} = [$pre, $post]; 353 } 354 } 355 \%out; 356} 357 358sub typedef_chunks { # Input is toplevel, output: starts and ends 359 my $txt = shift; 360 pos $txt = 0; 361 my ($b, $e, @out); 362 while ($txt =~ /\btypedef\b/g) { 363 push @out, pos $txt; 364 $txt =~ /(?=;)|\Z/g; 365 push @out, pos $txt; 366 } 367 \@out; 368} 369 370sub typedef_structs { 371 my $typehash = shift; 372 my %structs; 373 while (my($key, $text) = each %$typehash) { 374 my $name = parse_struct($text->[0], \%structs); 375 $structs{$key} = defined($name) ? $structs{$name} : undef; 376 } 377 \%structs; 378} 379 380sub parse_struct { 381 my($in, $structs) = @_; 382 my($b, $e, $chunk, $vars, $struct, $structname); 383 ($structname, $in) = $in =~ / 384 ^ \s* ( (?: struct | union ) (?: \s+ \w+ )? ) \s* { \s* (.*?) \s* } \s* $ 385 /gisx or return; 386 $structname .= " _ANON" unless $structname =~ /\s/; 387 $structname .= " 0" if exists $structs->{$structname}; 388 $structname =~ s/(\d+$)/$1 + 1/e while exists $structs->{$structname}; 389 $b = 0; 390 while ($in =~ /(\{|;|$)/g) { 391 matchingbrace($in), next if $1 eq '{'; 392 $e = pos($in); 393 next if $b == $e; 394 $chunk = substr($in, $b, $e - $b); 395 $b = $e; 396 if ($chunk =~ /\G\s*(struct|union).*\}/gs) { 397 my $term = pos $chunk; 398 my $name = parse_struct(substr($chunk, 0, $term), $structs); 399 $vars = parse_vars(join ' ', $name, substr $chunk, $term); 400 } else { 401 $vars = parse_vars($chunk); 402 } 403 push @$struct, @$vars; 404 } 405 $structs->{$structname} = $struct; 406 $structname; 407} 408 409sub parse_vars { 410 my $in = shift; 411 my($vars, $type, $word, $id, $post); 412 while ($in =~ /\G\s*([\[;,]|\S+?\b|$)\s*/g) { 413 $word = $1; 414 if ($word eq ';' || $word eq '') { 415 next unless defined $id; 416 $type = 'int' unless defined $type; # or is this an error? 417 push @$vars, [ $type, $post, $id ]; 418 ($type, $post, $id) = (undef, undef, undef); 419 } elsif ($word eq ',') { 420 warn "panic: expecting name before comma in '$in'\n" unless defined $id; 421 $type = 'int' unless defined $type; # or is this an error? 422 push @$vars, [ $type, $post, $id ]; 423 $type =~ s/[ *]*$//; 424 $id = undef; 425 } elsif ($word eq '[') { 426 warn "panic: expecting name before '[' in '$in'\n" unless defined $id; 427 $type = 'int' unless defined $type; # or is this an error? 428 my $b = pos $in; 429 matchingbrace($in); 430 $post .= $word . substr $in, $b, pos($in) - $b; 431 } else { 432 if (defined $post) { 433 warn "panic: not expecting '$word' after array bounds in '$in'\n"; 434 } else { 435 $type = join ' ', grep defined, $type, $id if defined $id; 436 $id = $word; 437 } 438 } 439 } 440 $vars; 441} 442 443sub vdecl_hash { 444 my($vdecls, $mdecls) = @_; 445 my %vdecl_hash; 446 for (@$vdecls, @$mdecls) { 447 next if /[()]/; # ignore functions, and function pointers 448 my $copy = $_; 449 next unless $copy =~ s/^\s*extern\s*//; 450 my $vars = parse_vars($copy); 451 $vdecl_hash{$_->[2]} = [ @$_[0, 1] ] for @$vars; 452 } 453 \%vdecl_hash; 454} 455 456# The output is the list of list of inline chunks and list of 457# declaration chunks. 458 459sub functions_in { # The arg is text without type declarations. 460 my $in = shift; # remove_type_decl(top_level(sanitize($txt))); 461 # What remains now consists of variable and function declarations, 462 # and inline functions. 463 $in =~ /(?=\S)/g; 464 my ($b, $e, $b1, $e1, @inlines, @decls, @mdecls, @fdecls, @vdecls); 465 $b = pos $in; 466 my $chunk; 467 while ($b != length $in) { 468 $in =~ /;/g or pos $in = $b, $in =~ /.*\S|\Z/g ; # Or last non-space 469 $e = pos $in; 470 $chunk = substr $in, $b, $e - $b; 471 # Now subdivide the chunk. 472 # 473 # What we got is one chunk, probably finished by `;'. Whoever, it 474 # may start with several inline functions. 475 # 476 # Note that inline functions contain ( ) { } in the stripped version. 477 $b1 = 0; 478 while ($chunk =~ /\(\s*\)\s*\{\s*\}/g) { 479 $e1 = pos $chunk; 480 push @inlines, $b + $b1, $b + $e1; 481 $chunk =~ /(?=\S)/g; 482 $b1 = pos $chunk; 483 $b1 = length $chunk, last unless defined $b1; 484 } 485 if ($e - $b - $b1 > 0) { 486 push @decls, $b + $b1, $e; 487 substr ($chunk, 0, $b1) = ''; 488 if ($chunk =~ /,/) { # Contains multiple declarations. 489 push @mdecls, $b + $b1, $e; 490 } else { # Non-multiple. 491 my $isvar = 1; 492 # Since leading \s* is not optimized, this is quadratic! 493 $chunk =~ s{ 494 ( ( const 495 | __attribute__ \s* \( \s* \) 496 ) \s* )* ( ; \s* )? \Z # Strip from the end 497 }()x; 498 $chunk =~ s/\s*\Z//; 499 if ($chunk =~ /\)\Z/) { # Function declaration ends on ")"! 500 if ($chunk !~ m{ 501 \( .* \( # Multiple parenths 502 }x 503 and $chunk =~ / \w \s* \( /x) { # Most probably pointer to a function? 504 $isvar = 0; 505 } 506 } 507 if ($isvar) { # Heuristically variable 508 push @vdecls, $b + $b1, $e; 509 } else { 510 push @fdecls, $b + $b1, $e; 511 } 512 } 513 } 514 $in =~ /\G\s*/g ; 515 $b = pos $in; 516 } 517 [\@inlines, \@decls, \@mdecls, \@vdecls, \@fdecls]; 518} 519 520sub typedefs_whited { # Input is sanitized text, and list of beg/end. 521 my @lst = @{$_[1]}; 522 my @out; 523 my ($b, $e); 524 while ($b = shift @lst) { 525 $e = shift @lst; 526 push @out, whited_decl($_[2], substr $_[0], $b, $e - $b); 527 } 528 \@out; 529} 530 531# XXXX This is heuristical in many respects... 532# Recipe: remove all struct-ish chunks. Remove all array specifiers. 533# Remove GCC attribute specifiers. 534# What remains may contain function's arguments, old types, and newly 535# defined types. 536# Remove function arguments using heuristics methods. 537# Now out of several words in a row the last one is a newly defined type. 538 539sub whited_decl { # Input is sanitized. 540 my $keywords_rex = shift; 541 my $in = shift; # Text of a declaration 542 my $rest = $in; 543 my $out = $in; # Whited out $in 544 545 # Remove all the structs 546 while ($out =~ /(\b(struct|union|class|enum)(\s+\w+)?\s*\{)/g) { 547 my $pos_start = pos($out) - length $1; 548 549 matchingbrace($out); 550 my $pos_end = pos $out; 551 substr($out, $pos_start, $pos_end - $pos_start) = 552 ' ' x ($pos_end - $pos_start); 553 pos $out = $pos_end; 554 } 555 556 # Deal with glibc's wierd ass __attribute__ tag. Just dump it. 557 # Maaaybe this should check to see if you're using GCC, but I don't 558 # think so since glibc is nice enough to do that for you. [MGS] 559 while ( $out =~ m/(\b(__attribute__|attribute)\s*\((?=\s*\())/g ) { 560 my $att_pos_start = pos($out) - length($1); 561 562 # Need to figure out where ((..)) ends. 563 matchingbrace($out); 564 my $att_pos_end = pos $out; 565 566 # Remove the __attribute__ tag. 567 substr($out, $att_pos_start, $att_pos_end - $att_pos_start) = 568 ' ' x ($att_pos_end - $att_pos_start); 569 pos $out = $att_pos_end; 570 } 571 572 # Remove arguments of functions (heuristics only). 573 # These things (start) arglist of a declared function: 574 # paren word comma 575 # paren word space non-paren 576 # paren keyword paren 577 # start a list of arguments. (May be "cdecl *myfunc"?) XXXXX ????? 578 while ( $out =~ /(\(\s*(\w+(,|\s+[^\)\s])|$keywords_rex\s*\)))/g ) { 579 my $pos_start = pos($out) - length($1); 580 pos $out = $pos_start + 1; 581 matchingbrace($out); 582 substr ($out, $pos_start + 1, pos($out) - 2 - $pos_start) 583 = ' ' x (pos($out) - 2 - $pos_start); 584 } 585 # Remove array specifiers 586 $out =~ s/(\[[\w\s\+]*\])/ ' ' x length $1 /ge; 587 my $tout = $out; 588 # Several words in a row cannot be new typedefs, but the last one. 589 $out =~ s/((\w+\s+)+(?=[^\s,;\[\{\)]))/ ' ' x length $1 /ge; 590 unless ($out =~ /\w/) { 591 # Probably a function-type declaration: typedef int f(int); 592 # Redo scan leaving the last word of the first group of words: 593 $tout =~ /(\w+\s+)*(\w+)/g; 594 $out = ' ' x (pos($tout) - length $2) 595 . $2 . ' ' x (length($tout) - pos($tout)); 596 # warn "function typedef\n\t'$in'\nwhited-out as\n\t'$out'\n"; 597 } 598 warn "panic: length mismatch\n\t'$in'\nwhited-out as\n\t'$out'\n" 599 if length($in) != length $out; 600 # Sanity check 601 warn "panic: multiple types without intervening comma in\n\t$in\nwhited-out as\n\t$out\n" 602 if $out =~ /\w[^\w,]+\w/; 603 warn "panic: no types found in\n\t$in\nwhited-out as\n\t$out\n" 604 unless $out =~ /\w/; 605 $out 606} 607 608sub matchingbrace { 609 # pos($_[0]) is after the opening brace now 610 my $n = 0; 611 while ($_[0] =~ /([\{\[\(])|([\]\)\}])/g) { 612 $1 ? $n++ : $n-- ; 613 return 1 if $n < 0; 614 } 615 # pos($_[0]) is after the closing brace now 616 return; # false 617} 618 619sub remove_Comments_no_Strings { # We expect that no strings are around 620 my $in = shift; 621 $in =~ s,/(/.*|\*[\s\S]*?\*/),,g ; # C and C++ 622 die "Unfinished comment" if $in =~ m,/\*, ; 623 $in; 624} 625 626sub sanitize { # We expect that no strings are around 627 my $in = shift; 628 # C and C++, strings and characters 629 $in =~ s{ / ( 630 / .* # C++ style 631 | 632 \* [\s\S]*? \*/ # C style 633 ) # (1) 634 | '((?:[^\\\']|\\.)+)' # (2) Character constants 635 | "((?:[^\\\"]|\\.)*)" # (3) Strings 636 | ( ^ \s* \# .* # (4) Preprocessor 637 ( \\ $ \n .* )* ) # and continuation lines 638 } { 639 # We want to preserve the length, so that one may go back 640 defined $1 ? ' ' x (1 + length $1) : 641 defined $4 ? ' ' x length $4 : 642 defined $2 ? "'" . ' ' x length($2) . "'" : 643 defined $3 ? '"' . ' ' x length($3) . '"' : '???' 644 }xgem ; 645 die "Unfinished comment" if $in =~ m{ /\* }x; 646 $in; 647} 648 649sub top_level { # We expect argument is sanitized 650 # Note that this may remove the variable in declaration: int (*func)(); 651 my $in = shift; 652 my $start; 653 my $out = $in; 654 while ($in =~ /[\[\{\(]/g ) { 655 $start = pos $in; 656 matchingbrace($in); 657 substr($out, $start, pos($in) - 1 - $start) 658 = ' ' x (pos($in) - 1 - $start); 659 } 660 $out; 661} 662 663sub remove_type_decl { # We suppose that the arg is top-level only. 664 my $in = shift; 665 $in =~ s/(\b__extension__)(\s+typedef\b)/(' ' x length $1) . $2/gse; 666 $in =~ s/(\btypedef\b.*?;)/' ' x length $1/gse; 667 # The following form may appear only in the declaration of the type itself: 668 $in =~ 669 s/(\b(enum|struct|union|class)\b[\s\w]*\{\s*\}\s*;)/' ' x length $1/gse; 670 # Pre-declarations: 671 $in =~ 672 s/(\b(enum|struct|union|class)\b[\s\w]*;)/' ' x length $1/gse; 673 $in; 674} 675 676sub new { 677 my $class = shift; 678 my $out = SUPER::new $class $recipes; 679 $out->set(@_); 680 $out; 681} 682 683sub do_declarations { 684 my @d = map do_declaration($_, $_[1], $_[2]), @{ $_[0] }; 685 \@d; 686} 687 688# Forth argument: if defined, there maybe no identifier. Generate one 689# basing on this argument. 690 691sub do_declaration { 692 my ($decl, $typedefs, $keywords, $argnum) = @_; 693 $decl =~ s/;?\s*$//; 694 my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater); 695 $decl =~ s/^\s*extern\b\s*//; 696 $pos = 0; 697 while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) { 698 $w = $1; 699 if ($w =~ /^(struct|class|enum|union)$/) { 700 $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'"; 701 } 702 $pos = pos $decl; 703 } 704 pos $decl = $pos; 705 $decl =~ /\G[\s*]*\*/g or pos $decl = $pos; 706 $type = substr $decl, 0, pos $decl; 707 $decl =~ /\G\s*/g or pos $decl = length $type; # ???? 708 $pos = pos $decl; 709 if (defined $argnum) { 710 if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2] 711 $ident = $1; 712 $repeater = $2; 713 $pos = pos $decl; 714 } else { 715 pos $decl = $pos = length $decl; 716 $type = $decl; 717 $ident = "arg$argnum"; 718 } 719 } else { 720 die "Cannot process declaration `$decl' without an identifier" 721 unless $decl =~ /\G(\w+)/g; 722 $ident = $1; 723 $pos = pos $decl; 724 } 725 $decl =~ /\G\s*/g or pos $decl = $pos; 726 $pos = pos $decl; 727 if (pos $decl != length $decl) { 728 pos $decl = $pos; 729 die "Expecting parenth after identifier in `$decl'\nafter `", 730 substr($decl, 0, $pos), "'" 731 unless $decl =~ /\G\(/g; 732 my $argstring = substr($decl, pos($decl) - length $decl); 733 matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'"; 734 $argstring = substr($argstring, 0, pos($argstring) - 1); 735 $argstring =~ s/ ^ ( \s* void )? \s* $ //x; 736 $args = []; 737 my @args; 738 if ($argstring ne '') { 739 my $top = top_level $argstring; 740 my $p = 0; 741 my $arg; 742 while ($top =~ /,/g) { 743 $arg = substr($argstring, $p, pos($top) - 1 - $p); 744 $arg =~ s/^\s+|\s+$//gs; 745 push @args, $arg; 746 $p = pos $top; 747 } 748 $arg = substr $argstring, $p; 749 $arg =~ s/^\s+|\s+$//gs; 750 push @args, $arg; 751 } 752 my $i = 0; 753 for (@args) { 754 push @$args, do_declaration1($_, $typedefs, $keywords, $i++); 755 } 756 } 757 [$type, $ident, $args, $decl, $repeater]; 758} 759 760sub do_declaration1 { 761 my ($decl, $typedefs, $keywords, $argnum) = @_; 762 $decl =~ s/;?\s*$//; 763 my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater); 764 $pos = 0; 765 while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) { 766 $w = $1; 767 if ($w =~ /^(struct|class|enum|union)$/) { 768 $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'"; 769 } 770 $pos = pos $decl; 771 } 772 pos $decl = $pos; 773 $decl =~ /\G[\s*]*\*/g or pos $decl = $pos; 774 $type = substr $decl, 0, pos $decl; 775 $decl =~ /\G\s*/g or pos $decl = length $type; # ???? 776 $pos = pos $decl; 777 if (defined $argnum) { 778 if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2] 779 $ident = $1; 780 $repeater = $2; 781 $pos = pos $decl; 782 } else { 783 pos $decl = $pos = length $decl; 784 $type = $decl; 785 $ident = "arg$argnum"; 786 } 787 } else { 788 die "Cannot process declaration `$decl' without an identifier" 789 unless $decl =~ /\G(\w+)/g; 790 $ident = $1; 791 $pos = pos $decl; 792 } 793 $decl =~ /\G\s*/g or pos $decl = $pos; 794 $pos = pos $decl; 795 if (pos $decl != length $decl) { 796 pos $decl = $pos; 797 die "Expecting parenth after identifier in `$decl'\nafter `", 798 substr($decl, 0, $pos), "'" 799 unless $decl =~ /\G\(/g; 800 my $argstring = substr($decl, pos($decl) - length $decl); 801 matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'"; 802 $argstring = substr($argstring, 0, pos($argstring) - 1); 803 $argstring =~ s/ ^ ( \s* void )? \s* $ //x; 804 $args = []; 805 my @args; 806 if ($argstring ne '') { 807 my $top = top_level $argstring; 808 my $p = 0; 809 my $arg; 810 while ($top =~ /,/g) { 811 $arg = substr($argstring, $p, pos($top) - 1 - $p); 812 $arg =~ s/^\s+|\s+$//gs; 813 push @args, $arg; 814 $p = pos $top; 815 } 816 $arg = substr $argstring, $p; 817 $arg =~ s/^\s+|\s+$//gs; 818 push @args, $arg; 819 } 820 my $i = 0; 821 for (@args) { 822 push @$args, do_declaration2($_, $typedefs, $keywords, $i++); 823 } 824 } 825 [$type, $ident, $args, $decl, $repeater]; 826} 827 828############################################################ 829 830package C::Preprocessed; 831use Symbol; 832use File::Basename; 833use Config; 834 835sub new { 836 die "usage: C::Preprocessed->new(filename[, defines[, includes[, cpp]]])" 837 if @_ < 2 or @_ > 5; 838 my ($class, $filename, $Defines, $Includes, $Cpp) 839 = (shift, shift, shift, shift, shift); 840 $Cpp ||= \%Config::Config; 841 my $filedir = dirname $filename || '.'; 842 $Includes ||= [$filedir, '/usr/local/include', '.']; 843 my $addincludes = ""; 844 $addincludes = "-I" . join(" -I", @$Includes) 845 if defined $Includes and @$Includes; 846 my($sym) = gensym; 847 my $cmd = "echo '\#include \"$filename\"' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |"; 848 #my $cmd = "$Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} < $filename |"; 849 #my $cmd = "echo '\#include <$filename>' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |"; 850 851 (open($sym, $cmd) or die "Cannot open pipe from `$cmd': $!") 852 and bless $sym => $class; 853} 854 855sub text { 856 my $class = shift; 857 my $filter = shift; 858 if (defined $filter) { 859 return text_only_from($class, $filter, @_); 860 } 861 my $stream = $class->new(@_); 862 my $oh = select $stream; 863 $/ = undef; 864 select $oh; 865 <$stream>; 866} 867 868sub text_only_from { 869 my $class = shift; 870 my $from = shift || die "Expecting argument in `text_only_from'"; 871 my $stream = $class->new(@_); 872 my $on = $from eq $_[0]; 873 my $eqregexp = $on ? '\"\"|' : ''; 874 my @out; 875 while (<$stream>) { 876 #print; 877 878 $on = /$eqregexp[\"\/]\Q$from\"/ if /^\#/; 879 push @out, $_ if $on; 880 } 881 join '', @out; 882} 883 884sub DESTROY { 885 close($_[0]) 886 or die "Cannot close pipe from `$Config::Config{cppstdin}': err $?, $!\n"; 887} 888 889# Autoload methods go after __END__, and are processed by the autosplit program. 890# Return to the principal package. 891package C::Scan; 892 8931; 894__END__ 895 896=head1 NAME 897 898C::Scan - scan C language files for easily recognized constructs. 899 900=head1 SYNOPSIS 901 902 $c = new C::Scan 'filename' => $filename, 'filename_filter' => $filter, 903 'add_cppflags' => $addflags; 904 $c->set('includeDirs' => [$Config::Config{shrpdir}]); 905 906 my $fdec = $c->get('parsed_fdecls'); 907 908 909=head1 DESCRIPTION 910 911B<This description is I<VERY> incomplete.> 912 913This module uses C<Data::Flow> interface, thus one uses it in the 914following fashion: 915 916 $c = new C::Scan(attr1 => $value1, attr2 => $value2); 917 $c->set( attr3 => $value3 ); 918 919 $value4 = $c->get('attr4'); 920 921Attributes are depending on some other attributes. The only 922I<required> attribute, i.e., the attribute which I<should> be set, is 923C<filename>, which denotes which file to parse. 924 925All other attributes are either optional, or would be calculated basing on values of required and optional attributes. 926 927=head2 Output attributes 928 929=over 14 930 931=item C<includes> 932 933Value: reference to a list of included files. 934 935=item C<defines_args> 936 937Value: reference to hash of macros with arguments. The values are 938references to an array of length 2, the first element is a reference 939to the list of arguments, the second one being the expansion. 940Newlines are not unescaped, thus 941 942 #define C(x,y) E\ 943 F 944 945will finish with C<("C" =E<gt> [ ["x", "y"], "E\nF"])>. 946 947=item C<defines_no_args> 948 949Value: reference to hash of macros without arguments. Newlines are 950not escaped, thus 951 952 #define A B 953 954will finish with C<("A" =E<gt> "B")>. 955 956=item C<fdecls> 957 958Value: reference to list of declarations of functions. 959 960=item C<inlines> 961 962Value: reference to list of definitions of functions. 963 964=item C<parsed_fdecls> 965 966Value: reference to list of parsed declarations of functions. 967 968A parsed declaration is a reference to a list of C<(rt, nm, args, ft, 969mod)>. Here C<rt> is return type of a function, C<nm> is the name, 970C<args> is the list of arguments, C<ft> is the full text of the 971declaration, and C<mod> is the modifier (which is always C<undef>). 972 973Each entry in the list C<args> is of the same form C<(ty, nm, args, 974ft, mod)>, here C<ty> is the type of an argument, C<nm> is the name (a 975generated one if missing in the declaration), C<args> is C<undef>, and 976C<mod> is the string of array modifiers. 977 978=item C<typedef_hash> 979 980Value: a reference to a hash which contains known C<typedef>s as keys. 981Values of the hash are array references of length 2, with what should 982be put before/after the type for a standalone typedef declaration (but 983without the C<typedef> substring). 984 985Parse uses naive heuristics. 986 987=item C<typedef_texts> 988 989Value: a reference to a list which contains known expansions of 990C<typedef>s. 991 992=item C<typedefs_maybe> 993 994Value: a reference to a list of C<typedef>ed names. Heuristics are used. 995 996=item C<vdecls> 997 998Value: a reference to a list of C<extern> variable declarations. 999 1000=item C<vdecl_hash> 1001 1002Value: a reference to a hash of parsed C<extern> variable declarations, 1003containing the variable names as keys. Values of the hash are array 1004references of length 2, with what should be put before/after the name 1005for a standalone extern variable declaration (but without the C<extern> 1006substring). 1007 1008=item C<typedef_structs> 1009 1010Value: a reference to a hash of parsed struct declarations from typedefs. 1011Keys are typedefed names, values are C<undef> if not a struct or union, 1012else an array reference of definitions of the elements of the structure; 1013each definition is itself an array reference of length 3, consisting of 1014what should be put before/after the name for a standalone variable 1015declaration, followed by the name of the element. Anonymous structs and 1016unions used within the definitions are given an arbitrary name including 1017the string C<ANON>, and referred to using that name. 1018 1019=back 1020 1021=cut 1022