1package C::Scan;
2
3require Exporter;
4use Config '%Config';
5use File::Basename;
6use Data::Flow qw(0.05);
7use strict;			# Earlier it catches ISA and EXPORT.
8
9@C::Scan::ISA = qw(Exporter Data::Flow);
10
11# Items to export into callers namespace by default. Note: do not export
12# names by default without a very good reason. Use EXPORT_OK instead.
13# Do not simply export all your public functions/methods/constants.
14
15@C::Scan::EXPORT = qw(
16	    );
17@C::Scan::EXPORT_OK = qw(
18			);
19# this flag tells cpp to only output macros
20$C::Scan::MACROS_ONLY = '-dM';
21
22$C::Scan::VERSION = '0.74';
23
24my (%keywords,%style_keywords);
25for (qw(asm auto break case char continue default do double else enum
26        extern float for fortran goto if int long register return short
27        sizeof static struct switch typedef union unsigned signed while void)) {
28  $keywords{$_}++;
29}
30for (qw(bool class const delete friend inline new operator overload private
31        protected public virtual)) {
32  $style_keywords{'C++'}{$_}++;
33}
34for (qw(__func__ _Complex _Imaginary _Bool inline restrict)) {
35  $style_keywords{'C9X'}{$_}++;
36}
37for (qw(inline const asm noreturn format section
38	constructor destructor unused weak)) {
39  $style_keywords{'GNU'}{$_}++;
40  $style_keywords{'GNU'}{"__$ {_}__"}++;
41}
42  $style_keywords{'GNU'}{__attribute__}++;
43  $style_keywords{'GNU'}{__extension__}++;
44  $style_keywords{'GNU'}{__consts}++;
45  $style_keywords{'GNU'}{__const}++;
46
47my $recipes
48  = { Defines => { default => '' },
49      cppstdin => { default => $Config{cppstdin} },
50      cppflags => { default => $Config{cppflags} },
51      cppminus => { default => $Config{cppminus} },
52      c_styles => { default => [qw(C++ GNU C9X)] },
53      add_cppflags => { default => '' },
54      keywords => { prerequisites => ['c_styles'],
55		    output => sub {
56		      my %kw = %keywords;
57		      my %add;
58		      for ( @{ shift->{c_styles} } ) {
59			%add = %{ $style_keywords{$_} };
60			%kw = (%kw, %add);
61		      }
62		      \%kw;
63		    }, },
64      'undef' => { default => undef },
65      filename_filter => { default => undef },
66      full_text => { class_filter => [ 'text', 'C::Preprocessed',
67				       qw(undef filename Defines includeDirs Cpp)] },
68      text => { class_filter => [ 'text', 'C::Preprocessed',
69				  qw(filename_filter filename Defines includeDirs Cpp)] },
70      text_only_from => { class_filter => [ 'text_only_from', 'C::Preprocessed',
71					    qw(filename_filter filename Defines includeDirs Cpp)] },
72      includes => { filter => [ \&includes,
73				qw(filename Defines includeDirs Cpp) ], },
74      includeDirs =>  { prerequisites => ['filedir'],
75			output => sub {
76			  my $data = shift;
77			  [ $data->{filedir}, '/usr/local/include', '.'];
78			} },
79      Cpp => { prerequisites => [qw(cppminus add_cppflags cppflags cppstdin)],
80	       output => sub {
81		 my $data = shift;
82		 return { cppstdin => $data->{cppstdin},
83			  cppflags => "$data->{cppflags} $data->{add_cppflags}",
84			  cppminus => $data->{cppminus} };
85	       } },
86      filedir => { output => sub { dirname ( shift->{filename} || '.' ) } },
87      sanitized => { filter => [ \&sanitize, 'text'], },
88      toplevel => { filter => [ \&top_level, 'sanitized'], },
89      full_sanitized => { filter => [ \&sanitize, 'full_text'], },
90      full_toplevel => { filter => [ \&top_level, 'full_sanitized'], },
91      no_type_decl => { filter => [ \&remove_type_decl, 'toplevel'], },
92      typedef_chunks => { filter => [ \&typedef_chunks, 'full_toplevel'], },
93      typedefs_maybe => { filter => [ sub {[keys %{+shift}]}, 'typedef_hash'], },
94      typedefs_whited => { filter => [ \&typedefs_whited,
95				      'full_sanitized', 'typedef_chunks',
96				      'keywords_rex'], },
97      typedef_texts => { filter => [ \&typedef_texts,
98				      'full_text', 'typedef_chunks'], },
99      typedef_hash => { filter => [ \&typedef_hash,
100				    'typedef_texts', 'typedefs_whited'], },
101      typedef_structs => { filter => [ \&typedef_structs,
102				       'typedef_hash'], },
103      defines_maybe => { filter => [ \&defines_maybe, 'filename'], },
104      defines_no_args => { prerequisites => ['defines_maybe'],
105			   output => sub { shift->{defines_maybe}->[0] }, },
106      defines_args => { prerequisites => ['defines_maybe'],
107			output => sub { shift->{defines_maybe}->[1] }, },
108
109      defines_full => { filter => [ \&defines_full,
110				    qw(filename Defines includeDirs Cpp) ], },
111      defines_no_args_full => { prerequisites => ['defines_full'],
112				output => sub { shift->{defines_full}->[0] }, },
113      defines_args_full => { prerequisites => ['defines_full'],
114			output => sub { shift->{defines_full}->[1] }, },
115
116      decl_inlines => { filter => [ \&functions_in, 'no_type_decl'], },
117      inline_chunks => { filter => [ sub { shift->[0] }, 'decl_inlines'], },
118      inlines => { filter => [ \&from_chunks, 'inline_chunks', 'text'], },
119      decl_chunks => { filter => [ sub { shift->[1] }, 'decl_inlines'], },
120      decls => { filter => [ \&from_chunks, 'decl_chunks', 'text'], },
121      fdecl_chunks => { filter => [ sub { shift->[4] }, 'decl_inlines'], },
122      fdecls => { filter => [ \&from_chunks, 'fdecl_chunks', 'text'], },
123      mdecl_chunks => { filter => [ sub { shift->[2] }, 'decl_inlines'], },
124      mdecls => { filter => [ \&from_chunks, 'mdecl_chunks', 'text'], },
125      vdecl_chunks => { filter => [ sub { shift->[3] }, 'decl_inlines'], },
126      vdecls => { filter => [ \&from_chunks, 'vdecl_chunks', 'text'], },
127      vdecl_hash => { filter => [ \&vdecl_hash, 'vdecls', 'mdecls' ], },
128      parsed_fdecls => { filter => [ \&do_declarations, 'fdecls',
129				     'typedef_hash', 'keywords'], },
130      keywords_rex => { filter => [ sub { my @k = keys %{ shift() };
131					  local $" = '|';
132					  my $r = "(?:@k)";
133					  eval 'qr/$r/' or $r	# Older Perls
134					}, 'keywords'], },
135    };
136
137sub from_chunks {
138  my $chunks = shift;
139  my $txt = shift;
140  my @out;
141  my $i = 0;
142  while ($i < @$chunks) {
143    push @out, substr $txt, $chunks->[$i], $chunks->[ $i + 1 ] - $chunks->[$i];
144    $i += 2;
145  }
146  \@out;
147}
148
149#sub process { request($recipes, @_) }
150# Preloaded methods go here.
151
152sub includes {
153  my %seen;
154  my $stream = new C::Preprocessed (@_)
155    or die "Cannot open pipe from cppstdin: $!\n";
156
157  while (<$stream>) {
158    next unless m(^\s*\#\s*	# Leading hash
159		  (line\s*)?	# 1: Optional line
160		  ([0-9]+)\s*	# 2: Line number
161		  (.*)		# 3: The rest
162		 )x;
163    my $include = $3;
164    $include = $1 if $include =~ /"(.*)"/; # Filename may be in quotes
165    $include =~ s,\\\\,/,g if $^O eq 'os2';
166    $seen{$include}++ if $include ne "";
167  }
168  [keys %seen];
169}
170
171sub defines_maybe {
172  my $file = shift;
173  my ($mline,$line,%macros,%macrosargs,$sym,$args);
174  open(C, $file) or die "Cannot open file $file: $!\n";
175  while (not eof(C) and $line = <C>) {
176    next unless
177      ( $line =~ s[
178		   ^ \s* \# \s*	# Start of directive
179		   define \s+
180		   (\w+)	# 1: symbol
181		   (?:
182		    \( (.*?) \s* \) # 2: Minimal match for arguments
183                                    # in parenths (without trailing
184                                    # spaces)
185		   )?		# optional, no grouping
186		   \s*		# rest is the definition
187		   ([\s\S]*)	# 3: the rest
188		  ][]x );
189    ($sym, $args, $mline) = ($1, $2, $3);
190    $mline .= <C> while not eof(C) and $mline =~ s/\\\n/\n/;
191    chomp $mline;
192    #print "sym: `$sym', args: `$args', mline: `$mline'\n";
193    if (defined $args) {
194      $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline];
195    } else {
196      $macros{$sym} = $mline;
197    }
198  }
199  close(C) or die "Cannot close file $file: $!\n";
200  [\%macros, \%macrosargs];
201}
202
203sub defines_full {
204  my $Cpp = $_[3];
205  my ($mline,$line,%macros,%macrosargs,$sym,$args);
206
207  # save the old cppflags and add the flag for only ouputting macro definitions
208  my $old_cppstdin = $Cpp->{'cppstdin'};
209  $Cpp->{'cppstdin'} = $old_cppstdin . " " . $C::Scan::MACROS_ONLY;
210
211  my $stream = new C::Preprocessed (@_)
212    or die "Cannot open pipe from cppstdin: $!\n";
213
214  while (defined ($line = <$stream>)) {
215    next unless
216      ( $line =~ s[
217		   ^ \s* \# \s*	# Start of directive
218		   define \s+
219		   (\w+)	# 1: symbol
220		   (?:
221		    \( (.*?) \s* \) # 2: Minimal match for arguments
222                                    # in parenths (without trailing
223                                    # spaces)
224		   )?		# optional, no grouping
225		   \s*		# rest is the definition
226		   ([\s\S]*)	# 3: the rest
227		  ][]x );
228    ($sym, $args, $mline) = ($1, $2, $3);
229    $mline .= <$stream> while ($mline =~ s/\\\n/\n/);
230    chomp $mline;
231#print STDERR "sym: `$sym', args: `$args', mline: `$mline'\n";
232    if (defined $args) {
233      $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline];
234    } else {
235      $macros{$sym} = $mline;
236    }
237  }
238  # restore the original cppflags
239  $Cpp->{'cppstdin'} = $old_cppstdin;
240  [\%macros, \%macrosargs];
241}
242
243# sub nexttypedef {
244#   return unless $_[0] =~ /(\G|^|;)\s*typedef\b/g;
245#   my $start = pos($_[0]) - 7;
246#   nextsemi($_[0]);
247#   my $end = pos $_[0];
248#   # warn "Found `", substr($_[0], $start, $end - $start), "'\n" if $debug;
249#   return $start, $end;
250# }
251
252# sub nextsemi {
253#   my $n = 0;
254#   while ($_[0] =~ /([\(\{\[])|([\]\)\}])|(\;)/g) {
255#     $n++ if defined $1;
256#     $n-- if defined $2;
257#     return if defined $3 and $n == 0;
258#   }
259#   die "No semicolon on the outer level";
260# }
261
262sub typedef_texts {
263  my ($txt, $chunks) = (shift, shift);
264  my ($b, $e, $in, @out);
265  my @in = @$chunks;
266  while (($b, $e) = splice @in, 0, 2) {
267    $in = substr($txt, $b, $e - $b);
268    # remove any remaining directives
269    $in =~ s/^ ( \s* \# .* ( \\ $ \n .* )* ) / ' ' x length($1)/xgem;
270    push @out, $in;
271  }
272  \@out;
273}
274
275sub typedef_hash_old {
276  +{ map {($_,1)} map /(\w+)/, @{$_[0]} };
277}
278
279sub typedef_hash {
280  my ($typedefs, $whited) = (shift,shift);
281  my %out;
282
283 loop:
284  for my $o (0..$#$typedefs) {
285    my $wh = $whited->[$o];
286    my $td = $typedefs->[$o];
287    if ($wh =~ /,/ or not $wh =~ /\w/) { # Hard case, guessimates ...
288      # Determine whether the new thingies are inside parens
289      $wh =~ /,/g;
290      my $p = pos $wh;
291      my ($s, $e);
292      if (matchingbrace($wh)) {	# Inside.  Easy part: just split on /,/...
293	$e = pos($wh) - 1;
294	$s = $e;
295	my $d = 0;
296	# Skip back
297	while (--$s >= 0) {
298	  my $c = substr $wh, $s, 1;
299	  if ($c =~ /[\(\{\[]/) {
300	    $d--;
301	  } elsif ($c =~ /[\)\]\}]/) {
302	    $d++;
303	  }
304	  last if $d < 0;
305	}
306	if ($s < 0) {		# Should not happen
307	  warn("panic: could not match braces in\n\t$td\nwhited as\n\t$wh\n");
308	  next loop;
309	}
310	$s++;
311      } else {			# We are at toplevel
312	# We need to skip back all the modifiers attached to the first thingy
313	# Guesstimates: everything after the first '*' (inclusive)
314	pos $wh = 0;
315	$wh = /(?=\w)/g;
316	my $ws = pos $wh;
317	my $pre = substr $wh, 0, $ws;
318	$s = $ws;
319	$s = pos $pre if $pre =~ /(?=\*)/g;
320	$e = length $wh;
321      }
322      # Now: need to split $td based on commas in $wh!
323      # And need to split each chunk of $td based on word in the chunk of $wh!
324      my $td_decls = substr($td, $s, $e - $s);
325      my ($pre, $post) = (substr($td, 0, $s), substr($td, $e));
326      my $wh_decls = substr($wh, $s, $e - $s);
327      my @wh_decls = split /,/, $wh_decls;
328      my $td_s = 0;
329      my (@td_decl, @td_pre, @td_post, @td_word);
330      for my $wh_d (@wh_decls) {
331	my $td_d = substr $td, $td_s, length $wh_d;
332	push @td_decl, $td_d;
333	$wh_d =~ /(\w+)/g;
334	push @td_word, $1;
335	push @td_post, substr $td_d, pos($wh_d);
336	push @td_pre,  substr $td_d, pos($wh_d) - length $1, length $1;
337	$td_s += 1 + length $wh_d; # Skip over ','
338      }
339      for my $i (0..$#wh_decls) {
340	my $p = "$td_post[$i]$post";
341	$p = '' unless $p =~ /\S/;
342	$out{$td_word[$i]} = ["$pre$td_pre[$i]", $p];
343      }
344    } else {			# Only one thing defined...
345      $wh =~ /(\w+)/g;
346      my $e	= pos $wh;
347      my $s	= $e - length $1;
348      my $type	= $1;
349      my $pre	= substr $td, 0, $s;
350      my $post	= substr $td, $e, length($td) - $e;
351      $post = '' unless $post =~ /\S/;
352      $out{$type} = [$pre, $post];
353    }
354  }
355  \%out;
356}
357
358sub typedef_chunks {		# Input is toplevel, output: starts and ends
359  my $txt = shift;
360  pos $txt = 0;
361  my ($b, $e, @out);
362  while ($txt =~ /\btypedef\b/g) {
363    push @out, pos $txt;
364    $txt =~ /(?=;)|\Z/g;
365    push @out, pos $txt;
366  }
367  \@out;
368}
369
370sub typedef_structs {
371  my $typehash = shift;
372  my %structs;
373  while (my($key, $text) = each %$typehash) {
374    my $name = parse_struct($text->[0], \%structs);
375    $structs{$key} = defined($name) ? $structs{$name} : undef;
376  }
377  \%structs;
378}
379
380sub parse_struct {
381  my($in, $structs) = @_;
382  my($b, $e, $chunk, $vars, $struct, $structname);
383  ($structname, $in) = $in =~ /
384    ^ \s* ( (?: struct | union ) (?: \s+ \w+ )? ) \s* { \s* (.*?) \s* } \s* $
385  /gisx or return;
386  $structname .= " _ANON" unless $structname =~ /\s/;
387  $structname .= " 0" if exists $structs->{$structname};
388  $structname =~ s/(\d+$)/$1 + 1/e while exists $structs->{$structname};
389  $b = 0;
390  while ($in =~ /(\{|;|$)/g) {
391    matchingbrace($in), next if $1 eq '{';
392    $e = pos($in);
393    next if $b == $e;
394    $chunk = substr($in, $b, $e - $b);
395    $b = $e;
396    if ($chunk =~ /\G\s*(struct|union).*\}/gs) {
397      my $term = pos $chunk;
398      my $name = parse_struct(substr($chunk, 0, $term), $structs);
399      $vars = parse_vars(join ' ', $name, substr $chunk, $term);
400    } else {
401      $vars = parse_vars($chunk);
402    }
403    push @$struct, @$vars;
404  }
405  $structs->{$structname} = $struct;
406  $structname;
407}
408
409sub parse_vars {
410  my $in = shift;
411  my($vars, $type, $word, $id, $post);
412  while ($in =~ /\G\s*([\[;,]|\S+?\b|$)\s*/g) {
413    $word = $1;
414    if ($word eq ';' || $word eq '') {
415      next unless defined $id;
416      $type = 'int' unless defined $type;	# or is this an error?
417      push @$vars, [ $type, $post, $id ];
418      ($type, $post, $id) = (undef, undef, undef);
419    } elsif ($word eq ',') {
420      warn "panic: expecting name before comma in '$in'\n" unless defined $id;
421      $type = 'int' unless defined $type;	# or is this an error?
422      push @$vars, [ $type, $post, $id ];
423      $type =~ s/[ *]*$//;
424      $id = undef;
425    } elsif ($word eq '[') {
426      warn "panic: expecting name before '[' in '$in'\n" unless defined $id;
427      $type = 'int' unless defined $type;	# or is this an error?
428      my $b = pos $in;
429      matchingbrace($in);
430      $post .= $word . substr $in, $b, pos($in) - $b;
431    } else {
432      if (defined $post) {
433	warn "panic: not expecting '$word' after array bounds in '$in'\n";
434      } else {
435	$type = join ' ', grep defined, $type, $id if defined $id;
436	$id = $word;
437      }
438    }
439  }
440  $vars;
441}
442
443sub vdecl_hash {
444  my($vdecls, $mdecls) = @_;
445  my %vdecl_hash;
446  for (@$vdecls, @$mdecls) {
447    next if /[()]/;	# ignore functions, and function pointers
448    my $copy = $_;
449    next unless $copy =~ s/^\s*extern\s*//;
450    my $vars = parse_vars($copy);
451    $vdecl_hash{$_->[2]} = [ @$_[0, 1] ] for @$vars;
452  }
453  \%vdecl_hash;
454}
455
456# The output is the list of list of inline chunks and list of
457# declaration chunks.
458
459sub functions_in {		# The arg is text without type declarations.
460  my $in = shift;		# remove_type_decl(top_level(sanitize($txt)));
461  # What remains now consists of variable and function declarations,
462  # and inline functions.
463  $in =~ /(?=\S)/g;
464  my ($b, $e, $b1, $e1, @inlines, @decls, @mdecls, @fdecls, @vdecls);
465  $b = pos $in;
466  my $chunk;
467  while ($b != length $in) {
468    $in =~ /;/g or pos $in = $b, $in =~ /.*\S|\Z/g ; # Or last non-space
469    $e = pos $in;
470    $chunk = substr $in, $b, $e - $b;
471    # Now subdivide the chunk.
472    #
473    # What we got is one chunk, probably finished by `;'. Whoever, it
474    # may start with several inline functions.
475    #
476    # Note that inline functions contain ( ) { } in the stripped version.
477    $b1 = 0;
478    while ($chunk =~ /\(\s*\)\s*\{\s*\}/g) {
479      $e1 = pos $chunk;
480      push @inlines, $b + $b1, $b + $e1;
481      $chunk =~ /(?=\S)/g;
482      $b1 = pos $chunk;
483      $b1 = length $chunk, last unless defined $b1;
484    }
485    if ($e - $b - $b1 > 0) {
486      push @decls, $b + $b1, $e;
487      substr ($chunk, 0, $b1) = '';
488      if ($chunk =~ /,/) {	# Contains multiple declarations.
489	push @mdecls, $b + $b1, $e;
490      } else  {			# Non-multiple.
491	my $isvar = 1;
492	# Since leading \s* is not optimized, this is quadratic!
493	$chunk =~ s{
494		     ( ( const
495			 | __attribute__ \s* \( \s* \)
496		       ) \s* )* ( ; \s* )? \Z # Strip from the end
497		   }()x;
498	$chunk =~ s/\s*\Z//;
499	if ($chunk =~ /\)\Z/) { # Function declaration ends on ")"!
500	  if ($chunk !~ m{
501			  \( .* \( # Multiple parenths
502			 }x
503	      and $chunk =~ / \w \s* \( /x) { # Most probably pointer to a function?
504	    $isvar = 0;
505	  }
506	}
507	if ($isvar)  {	# Heuristically variable
508	  push @vdecls, $b + $b1, $e;
509	} else {
510	  push @fdecls, $b + $b1, $e;
511	}
512      }
513    }
514    $in =~ /\G\s*/g ;
515    $b = pos $in;
516  }
517  [\@inlines, \@decls, \@mdecls, \@vdecls, \@fdecls];
518}
519
520sub typedefs_whited {		# Input is sanitized text, and list of beg/end.
521  my @lst = @{$_[1]};
522  my @out;
523  my ($b, $e);
524  while ($b = shift @lst) {
525    $e = shift @lst;
526    push @out, whited_decl($_[2], substr $_[0], $b, $e - $b);
527  }
528  \@out;
529}
530
531# XXXX This is heuristical in many respects...
532# Recipe: remove all struct-ish chunks.  Remove all array specifiers.
533# Remove GCC attribute specifiers.
534# What remains may contain function's arguments, old types, and newly
535# defined types.
536# Remove function arguments using heuristics methods.
537# Now out of several words in a row the last one is a newly defined type.
538
539sub whited_decl {		# Input is sanitized.
540  my $keywords_rex = shift;
541  my $in = shift;		# Text of a declaration
542  my $rest  = $in;
543  my $out  = $in;		# Whited out $in
544
545  # Remove all the structs
546  while ($out =~ /(\b(struct|union|class|enum)(\s+\w+)?\s*\{)/g) {
547    my $pos_start = pos($out) - length $1;
548
549    matchingbrace($out);
550    my $pos_end = pos $out;
551    substr($out, $pos_start, $pos_end - $pos_start) =
552	' ' x ($pos_end - $pos_start);
553    pos $out = $pos_end;
554  }
555
556  # Deal with glibc's wierd ass __attribute__ tag.  Just dump it.
557  # Maaaybe this should check to see if you're using GCC, but I don't
558  # think so since glibc is nice enough to do that for you.  [MGS]
559  while ( $out =~ m/(\b(__attribute__|attribute)\s*\((?=\s*\())/g ) {
560      my $att_pos_start = pos($out) - length($1);
561
562      # Need to figure out where ((..)) ends.
563      matchingbrace($out);
564      my $att_pos_end = pos $out;
565
566      # Remove the __attribute__ tag.
567      substr($out, $att_pos_start, $att_pos_end - $att_pos_start) =
568	' ' x ($att_pos_end - $att_pos_start);
569      pos $out = $att_pos_end;
570  }
571
572  # Remove arguments of functions (heuristics only).
573  # These things (start) arglist of a declared function:
574  # paren word comma
575  # paren word space non-paren
576  # paren keyword paren
577  # start a list of arguments. (May be "cdecl *myfunc"?) XXXXX ?????
578  while ( $out =~ /(\(\s*(\w+(,|\s+[^\)\s])|$keywords_rex\s*\)))/g ) {
579    my $pos_start = pos($out) - length($1);
580    pos $out = $pos_start + 1;
581    matchingbrace($out);
582    substr ($out, $pos_start + 1, pos($out) - 2 - $pos_start)
583      = ' ' x (pos($out) - 2 - $pos_start);
584  }
585  # Remove array specifiers
586  $out =~ s/(\[[\w\s\+]*\])/ ' ' x length $1 /ge;
587  my $tout = $out;
588  # Several words in a row cannot be new typedefs, but the last one.
589  $out =~ s/((\w+\s+)+(?=[^\s,;\[\{\)]))/ ' ' x length $1 /ge;
590  unless ($out =~ /\w/) {
591    # Probably a function-type declaration: typedef int f(int);
592    # Redo scan leaving the last word of the first group of words:
593    $tout =~ /(\w+\s+)*(\w+)/g;
594    $out = ' ' x (pos($tout) - length $2)
595      . $2 . ' ' x (length($tout) - pos($tout));
596    # warn "function typedef\n\t'$in'\nwhited-out as\n\t'$out'\n";
597  }
598  warn "panic: length mismatch\n\t'$in'\nwhited-out as\n\t'$out'\n"
599    if length($in) != length $out;
600  # Sanity check
601  warn "panic: multiple types without intervening comma in\n\t$in\nwhited-out as\n\t$out\n"
602    if $out =~ /\w[^\w,]+\w/;
603  warn "panic: no types found in\n\t$in\nwhited-out as\n\t$out\n"
604    unless $out =~ /\w/;
605  $out
606}
607
608sub matchingbrace {
609  # pos($_[0]) is after the opening brace now
610  my $n = 0;
611  while ($_[0] =~ /([\{\[\(])|([\]\)\}])/g) {
612    $1 ? $n++ : $n-- ;
613    return 1 if $n < 0;
614  }
615  # pos($_[0]) is after the closing brace now
616  return;				# false
617}
618
619sub remove_Comments_no_Strings { # We expect that no strings are around
620    my $in = shift;
621    $in =~ s,/(/.*|\*[\s\S]*?\*/),,g ; # C and C++
622    die "Unfinished comment" if $in =~ m,/\*, ;
623    $in;
624}
625
626sub sanitize {		# We expect that no strings are around
627    my $in = shift;
628    # C and C++, strings and characters
629    $in =~ s{ / (
630		 / .*			# C++ style
631		 |
632		 \* [\s\S]*? \*/	# C style
633		)			# (1)
634	     | '((?:[^\\\']|\\.)+)'	# (2) Character constants
635	     | "((?:[^\\\"]|\\.)*)"	# (3) Strings
636	     | ( ^ \s* \# .* 		# (4) Preprocessor
637		 ( \\ $ \n .* )* )	# and continuation lines
638	    } {
639	      # We want to preserve the length, so that one may go back
640	      defined $1 ? ' ' x (1 + length $1) :
641		defined $4 ? ' ' x length $4 :
642		  defined $2 ? "'" . ' ' x length($2) . "'" :
643		    defined $3 ? '"' . ' ' x length($3) . '"' : '???'
644	    }xgem ;
645    die "Unfinished comment" if $in =~ m{ /\* }x;
646    $in;
647}
648
649sub top_level {			# We expect argument is sanitized
650  # Note that this may remove the variable in declaration: int (*func)();
651  my $in = shift;
652  my $start;
653  my $out = $in;
654  while ($in =~ /[\[\{\(]/g ) {
655    $start = pos $in;
656    matchingbrace($in);
657    substr($out, $start, pos($in) - 1 - $start)
658      = ' ' x (pos($in) - 1 - $start);
659  }
660  $out;
661}
662
663sub remove_type_decl {		# We suppose that the arg is top-level only.
664  my $in = shift;
665  $in =~ s/(\b__extension__)(\s+typedef\b)/(' ' x length $1) . $2/gse;
666  $in =~ s/(\btypedef\b.*?;)/' ' x length $1/gse;
667  # The following form may appear only in the declaration of the type itself:
668  $in =~
669    s/(\b(enum|struct|union|class)\b[\s\w]*\{\s*\}\s*;)/' ' x length $1/gse;
670  # Pre-declarations:
671  $in =~
672    s/(\b(enum|struct|union|class)\b[\s\w]*;)/' ' x length $1/gse;
673  $in;
674}
675
676sub new {
677  my $class = shift;
678  my $out = SUPER::new $class $recipes;
679  $out->set(@_);
680  $out;
681}
682
683sub do_declarations {
684  my @d = map do_declaration($_, $_[1], $_[2]), @{ $_[0] };
685  \@d;
686}
687
688# Forth argument: if defined, there maybe no identifier. Generate one
689# basing on this argument.
690
691sub do_declaration {
692  my ($decl, $typedefs, $keywords, $argnum) = @_;
693  $decl =~ s/;?\s*$//;
694  my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater);
695  $decl =~ s/^\s*extern\b\s*//;
696  $pos = 0;
697  while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) {
698    $w = $1;
699    if ($w =~ /^(struct|class|enum|union)$/) {
700      $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'";
701    }
702    $pos = pos $decl;
703  }
704  pos $decl = $pos;
705  $decl =~ /\G[\s*]*\*/g or pos $decl = $pos;
706  $type = substr $decl, 0, pos $decl;
707  $decl =~ /\G\s*/g or pos $decl = length $type; # ????
708  $pos = pos $decl;
709  if (defined $argnum) {
710    if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2]
711      $ident = $1;
712      $repeater = $2;
713      $pos = pos $decl;
714    } else {
715      pos $decl = $pos = length $decl;
716      $type = $decl;
717      $ident = "arg$argnum";
718    }
719  } else {
720    die "Cannot process declaration `$decl' without an identifier"
721      unless $decl =~ /\G(\w+)/g;
722    $ident = $1;
723    $pos = pos $decl;
724  }
725  $decl =~ /\G\s*/g or pos $decl = $pos;
726  $pos = pos $decl;
727  if (pos $decl != length $decl) {
728    pos $decl = $pos;
729    die "Expecting parenth after identifier in `$decl'\nafter `",
730      substr($decl, 0, $pos), "'"
731      unless $decl =~ /\G\(/g;
732    my $argstring = substr($decl, pos($decl) - length $decl);
733    matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'";
734    $argstring = substr($argstring, 0, pos($argstring) - 1);
735    $argstring =~ s/ ^ ( \s* void )? \s* $ //x;
736    $args = [];
737    my @args;
738    if ($argstring ne '') {
739      my $top = top_level $argstring;
740      my $p = 0;
741      my $arg;
742      while ($top =~ /,/g) {
743	$arg = substr($argstring, $p, pos($top) - 1 - $p);
744	$arg =~ s/^\s+|\s+$//gs;
745	push @args, $arg;
746	$p = pos $top;
747      }
748      $arg = substr $argstring, $p;
749      $arg =~ s/^\s+|\s+$//gs;
750      push @args, $arg;
751    }
752    my $i = 0;
753    for (@args) {
754      push @$args, do_declaration1($_, $typedefs, $keywords, $i++);
755    }
756  }
757  [$type, $ident, $args, $decl, $repeater];
758}
759
760sub do_declaration1 {
761  my ($decl, $typedefs, $keywords, $argnum) = @_;
762  $decl =~ s/;?\s*$//;
763  my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater);
764  $pos = 0;
765  while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) {
766    $w = $1;
767    if ($w =~ /^(struct|class|enum|union)$/) {
768      $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'";
769    }
770    $pos = pos $decl;
771  }
772  pos $decl = $pos;
773  $decl =~ /\G[\s*]*\*/g or pos $decl = $pos;
774  $type = substr $decl, 0, pos $decl;
775  $decl =~ /\G\s*/g or pos $decl = length $type; # ????
776  $pos = pos $decl;
777  if (defined $argnum) {
778    if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2]
779      $ident = $1;
780      $repeater = $2;
781      $pos = pos $decl;
782    } else {
783      pos $decl = $pos = length $decl;
784      $type = $decl;
785      $ident = "arg$argnum";
786    }
787  } else {
788    die "Cannot process declaration `$decl' without an identifier"
789      unless $decl =~ /\G(\w+)/g;
790    $ident = $1;
791    $pos = pos $decl;
792  }
793  $decl =~ /\G\s*/g or pos $decl = $pos;
794  $pos = pos $decl;
795  if (pos $decl != length $decl) {
796    pos $decl = $pos;
797    die "Expecting parenth after identifier in `$decl'\nafter `",
798      substr($decl, 0, $pos), "'"
799      unless $decl =~ /\G\(/g;
800    my $argstring = substr($decl, pos($decl) - length $decl);
801    matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'";
802    $argstring = substr($argstring, 0, pos($argstring) - 1);
803    $argstring =~ s/ ^ ( \s* void )? \s* $ //x;
804    $args = [];
805    my @args;
806    if ($argstring ne '') {
807      my $top = top_level $argstring;
808      my $p = 0;
809      my $arg;
810      while ($top =~ /,/g) {
811	$arg = substr($argstring, $p, pos($top) - 1 - $p);
812	$arg =~ s/^\s+|\s+$//gs;
813	push @args, $arg;
814	$p = pos $top;
815      }
816      $arg = substr $argstring, $p;
817      $arg =~ s/^\s+|\s+$//gs;
818      push @args, $arg;
819    }
820    my $i = 0;
821    for (@args) {
822      push @$args, do_declaration2($_, $typedefs, $keywords, $i++);
823    }
824  }
825  [$type, $ident, $args, $decl, $repeater];
826}
827
828############################################################
829
830package C::Preprocessed;
831use Symbol;
832use File::Basename;
833use Config;
834
835sub new {
836    die "usage: C::Preprocessed->new(filename[, defines[, includes[, cpp]]])"
837      if @_ < 2 or @_ > 5;
838    my ($class, $filename, $Defines, $Includes, $Cpp)
839      = (shift, shift, shift, shift, shift);
840    $Cpp ||= \%Config::Config;
841    my $filedir = dirname $filename || '.';
842    $Includes ||= [$filedir, '/usr/local/include', '.'];
843    my $addincludes = "";
844    $addincludes = "-I" . join(" -I", @$Includes)
845      if defined $Includes and @$Includes;
846    my($sym) = gensym;
847    my $cmd = "echo '\#include \"$filename\"' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |";
848    #my $cmd = "$Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} < $filename |";
849    #my $cmd = "echo '\#include <$filename>' | $Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} $Cpp->{cppminus} |";
850
851    (open($sym, $cmd) or die "Cannot open pipe from `$cmd': $!")
852      and bless $sym => $class;
853}
854
855sub text {
856  my $class = shift;
857  my $filter = shift;
858  if (defined $filter) {
859    return text_only_from($class, $filter, @_);
860  }
861  my $stream = $class->new(@_);
862  my $oh = select $stream;
863  $/ = undef;
864  select $oh;
865  <$stream>;
866}
867
868sub text_only_from {
869  my $class = shift;
870  my $from = shift || die "Expecting argument in `text_only_from'";
871  my $stream = $class->new(@_);
872  my $on = $from eq $_[0];
873  my $eqregexp = $on ? '\"\"|' : '';
874  my @out;
875  while (<$stream>) {
876    #print;
877
878    $on = /$eqregexp[\"\/]\Q$from\"/ if /^\#/;
879    push @out, $_ if $on;
880  }
881  join '', @out;
882}
883
884sub DESTROY {
885  close($_[0])
886    or die "Cannot close pipe from `$Config::Config{cppstdin}': err $?, $!\n";
887}
888
889# Autoload methods go after __END__, and are processed by the autosplit program.
890# Return to the principal package.
891package C::Scan;
892
8931;
894__END__
895
896=head1 NAME
897
898C::Scan - scan C language files for easily recognized constructs.
899
900=head1 SYNOPSIS
901
902  $c = new C::Scan 'filename' => $filename, 'filename_filter' => $filter,
903                   'add_cppflags' => $addflags;
904  $c->set('includeDirs' => [$Config::Config{shrpdir}]);
905
906  my $fdec = $c->get('parsed_fdecls');
907
908
909=head1 DESCRIPTION
910
911B<This description is I<VERY> incomplete.>
912
913This module uses C<Data::Flow> interface, thus one uses it in the
914following fashion:
915
916  $c = new C::Scan(attr1 => $value1, attr2 => $value2);
917  $c->set( attr3 => $value3 );
918
919  $value4 = $c->get('attr4');
920
921Attributes are depending on some other attributes. The only
922I<required> attribute, i.e., the attribute which I<should> be set, is
923C<filename>, which denotes which file to parse.
924
925All other attributes are either optional, or would be calculated basing on values of required and optional attributes.
926
927=head2 Output attributes
928
929=over 14
930
931=item C<includes>
932
933Value: reference to a list of included files.
934
935=item C<defines_args>
936
937Value: reference to hash of macros with arguments. The values are
938references to an array of length 2, the first element is a reference
939to the list of arguments, the second one being the expansion.
940Newlines are not unescaped, thus
941
942  #define C(x,y) E\
943                 F
944
945will finish with C<("C" =E<gt> [ ["x", "y"], "E\nF"])>.
946
947=item C<defines_no_args>
948
949Value: reference to hash of macros without arguments.  Newlines are
950not escaped, thus
951
952  #define A B
953
954will finish with C<("A" =E<gt> "B")>.
955
956=item C<fdecls>
957
958Value: reference to list of declarations of functions.
959
960=item C<inlines>
961
962Value: reference to list of definitions of functions.
963
964=item C<parsed_fdecls>
965
966Value: reference to list of parsed declarations of functions.
967
968A parsed declaration is a reference to a list of C<(rt, nm, args, ft,
969mod)>. Here C<rt> is return type of a function, C<nm> is the name,
970C<args> is the list of arguments, C<ft> is the full text of the
971declaration, and C<mod> is the modifier (which is always C<undef>).
972
973Each entry in the list C<args> is of the same form C<(ty, nm, args,
974ft, mod)>, here C<ty> is the type of an argument, C<nm> is the name (a
975generated one if missing in the declaration), C<args> is C<undef>, and
976C<mod> is the string of array modifiers.
977
978=item C<typedef_hash>
979
980Value: a reference to a hash which contains known C<typedef>s as keys.
981Values of the hash are array references of length 2, with what should
982be put before/after the type for a standalone typedef declaration (but
983without the C<typedef> substring).
984
985Parse uses naive heuristics.
986
987=item C<typedef_texts>
988
989Value: a reference to a list which contains known expansions of
990C<typedef>s.
991
992=item C<typedefs_maybe>
993
994Value: a reference to a list of C<typedef>ed names.  Heuristics are used.
995
996=item C<vdecls>
997
998Value: a reference to a list of C<extern> variable declarations.
999
1000=item C<vdecl_hash>
1001
1002Value: a reference to a hash of parsed C<extern> variable declarations,
1003containing the variable names as keys. Values of the hash are array
1004references of length 2, with what should be put before/after the name
1005for a standalone extern variable declaration (but without the C<extern>
1006substring).
1007
1008=item C<typedef_structs>
1009
1010Value: a reference to a hash of parsed struct declarations from typedefs.
1011Keys are typedefed names, values are C<undef> if not a struct or union,
1012else an array reference of definitions of the elements of the structure;
1013each definition is itself an array reference of length 3, consisting of
1014what should be put before/after the name for a standalone variable
1015declaration, followed by the name of the element. Anonymous structs and
1016unions used within the definitions are given an arbitrary name including
1017the string C<ANON>, and referred to using that name.
1018
1019=back
1020
1021=cut
1022