1#!/usr/bin/perl -w
2$|=1;
3################################################################################
4#
5#  scanprov -- scan Perl headers for macros, and add known exceptions, and
6#              functions we weren't able to otherwise find.  Thus the purpose
7#              of this file has been expanded beyond what its name says.
8#
9#  Besides the normal options, 'mode=clean' is understood as 'write', but
10#  first remove any scanprov lines added in previous runs of this.
11#
12#  The lines added have a code to signify they are added by us:
13#   F means it is a function in embed.fnc that the normal routines didn't find
14#   K means it is a macro in config.h, hence is provided, and documented
15#   M means it is a provided by D:P macro
16#   X means it is a known exceptional item
17#   Z means it is an unprovided macro without documentation
18#
19#  The regeneration routines do not know the prototypes for the macros scanned
20#  for, which is gotten from documentation in the source.  (If they were
21#  documented, they would be put in parts/apidoc.fnc, and test cases generated
22#  for them in mktodo.pl).  Therefore these are all undocumented, except for
23#  things from config.h which are all documented there, and many of which are
24#  just defined or not defined, and hence can't be tested.  Thus looking for
25#  them here is the most convenient option, which is why it's done here.
26#
27#  The scope of this program has also expanded to look in almost all header
28#  files for almost all macros that aren't documented nor provided.  This
29#  allows ppport.h --api-info=/foo/ to return when a given element actually
30#  came into existence, which can be a time saver for developers of the perl
31#  core.
32#
33#  It would be best if people would add documentation to them in the perl
34#  source, and then this portion of this function would be minimized.
35#
36#  On Linux nm and other uses by D:P, these are the remaining unused capital
37#  flags: HJLOQY
38#
39################################################################################
40#
41#  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
42#  Version 2.x, Copyright (C) 2001, Paul Marquess.
43#  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
44#
45#  This program is free software; you can redistribute it and/or
46#  modify it under the same terms as Perl itself.
47#
48################################################################################
49
50use strict;
51use Getopt::Long;
52
53require './parts/ppptools.pl';
54require './parts/inc/inctools';
55require './devel/devtools.pl';
56
57our %opt = (
58  mode    => 'check',
59  install => '/tmp/perl/install/default',
60  blead   => 'bleadperl',
61  debug   => 0,
62 'debug-start' => "",
63);
64
65GetOptions(\%opt, qw( install=s mode=s
66                      blead=s debug=i
67                      debug-start=s
68                      skip-devels)) or die;
69
70my $clean = $opt{mode} eq 'clean';
71my $write = $clean || $opt{mode} eq 'write';
72my $debug = $opt{debug};
73
74# Get the list of known macros.  Functions are calculated separately below
75my %embed = map { $_->{flags}{m} ? ( $_->{name} => 1 ) : () }
76            parse_embed(qw(parts/embed.fnc parts/apidoc.fnc));
77
78# @provided is set to everthing provided
79my @provided = map { /^(\w+)/ ? $1 : () } `$^X ppport.h --list-provided`;
80
81# There are a few exceptions that have to be dealt with specially.  Add these
82# to the list of things to scan for.
83my $hard_to_test_ref = known_but_hard_to_test_for();
84push @provided, keys %$hard_to_test_ref;
85
86my $base_dir = 'parts/base';
87my $todo_dir = 'parts/todo';
88
89# The identifying text placed in every entry by this program
90my $id_text = "added by $0";
91
92if ($write) {
93
94    # Get the list of files
95    my @files = all_files_in_dir($base_dir);
96
97    # If asked to, first strip out the results of previous incarnations of
98    # this script
99    if ($clean) {
100        print "Cleaning previous $0 runs\n";
101        foreach my $file (@files) {
102            open my $fh, "+<", $file or die "$file: $!\n";
103            my @lines = <$fh>;
104            my $orig_count = @lines;
105            @lines = grep { $_ !~ /$id_text/ } @lines;
106            next if @lines == $orig_count;  # No need to write if unchanged.
107            truncate $fh, 0;
108            seek $fh, 0, 0;
109            print $fh @lines;
110            close $fh or die "$file: $!\n";
111        }
112    }
113
114    # The file list is returned sorted, and so the min version is in the 0th
115    # element
116    my $file =  $files[0];
117    my $min_perl = $file;
118    $min_perl =~ s,.*/,,;    # The name is the integer of __MIN_PERL__
119
120    # There are a very few special cases that we may not find in scanning, but
121    # exist all the way back.  Add them now to avoid throwing later things
122    # off.
123    print "-- $file --\n";
124    open my $fh, "+<", $file or die "$file: $!\n";
125    my @lines = <$fh>;
126    my $count = @lines;
127    for (qw(RETVAL CALL THIS)) { # These are also in hard_to_test_for(),
128                                 # so can't be in blead, as they are skipped
129                                 # in testing, so no real need to check that
130                                 # they aren't dups.
131        my $line = format_output_line($_, 'X');
132        next if grep { /$line/ } @lines;
133        print "Adding $_ to $file\n";
134        push @lines, $line;
135    }
136    if ($count != @lines) {
137        @lines = sort symbol_order @lines;
138        truncate $fh, 0;
139        seek $fh, 0, 0;
140        print $fh @lines;
141    }
142    close $fh;
143
144    # Now we're going to add the hard to test symbols.  The hash has been
145    # manually populated and commited, with the version number ppport supports
146    # them to.
147    #
148    # This is a hash ref with the keys being all symbols found in all the
149    # files in the directory, and the values being the perl versions of each
150    # symbol.
151    my $todo = parse_todo($todo_dir);
152
153    # The keys of $hard_to_test_ref are the symbols, and the values are
154    # subhashes, with each 'version' key being its proper perl version.
155    # Below, we invert %hard_to_test, so that the keys are the version, and
156    # the values are the symbols that go in that version
157    my %add_by_version;
158    for my $hard (keys %$hard_to_test_ref) {
159
160        # But if someone ups the min version we support, we don't want to add
161        # something less than that.
162        my $version = int_parse_version($hard_to_test_ref->{$hard});
163        $version = $min_perl if $version < $min_perl;
164        $version = format_version_line($version);
165
166        push @{$add_by_version{$version}}, $hard
167                unless grep { $todo->{$_}->{version} eq $hard } keys %$todo;
168    }
169
170    # Only a few files will have exceptions that apply to them.  Rewrite each
171    foreach my $version (keys %add_by_version) {
172        if (is_devel_release($version)) {
173            my ($super, $major, $minor) = parse_version($version);
174            $major++;   # Go to next highest version that isn't a devel
175            $version = "$super.$major.0";
176        }
177
178        my $file = "$todo_dir/" . int_parse_version($version);
179        print "-- Adding known exceptions to $file --\n";
180        open my $fh, "+<", $file or die "$file: $!\n";
181        my @lines = <$fh>;
182        my $count = @lines;
183        push @lines, format_version_line($version) . "\n" unless @lines;
184        foreach my $symbol (@{$add_by_version{$version}}) {
185            my $line = format_output_line($symbol, 'X');
186            unless (grep { /$line/ } @lines) {;
187                print "adding $symbol\n";
188                push @lines, $line unless grep { /$line/ } @lines;
189            }
190        }
191        if (@lines != $count) {
192            @lines = sort symbol_order @lines;
193            truncate $fh, 0;
194            seek $fh, 0, 0;
195            print $fh @lines;
196        }
197        close $fh;
198    }
199}
200
201# Now that we've added the exceptions to a few files, we can parse
202# and deal with all of them.
203my $perls_ref = get_and_sort_perls(\%opt);
204
205die "Couldn't find any perls" unless @$perls_ref > 1;
206
207find_first_mentions($perls_ref,   # perls to look in
208                    \@provided,   # List of symbol names to look for
209                    '*.h',        # Look in all hdrs.
210                    1,            # Strip comments
211                   'M'
212                   );
213
214# Now look for functions that we didn't test in mktodo.pl, generally because
215# these were hidden behind #ifdef's.
216my $base_ref = parse_todo($base_dir);
217my @functions = parse_embed(qw(parts/embed.fnc));
218
219# We could just gather data for the publicly available ones, but having this
220# information available for everything is useful.
221#@functions = grep { exists $_->{flags}{A} } @functions;
222
223# The ones we don't have info on are the ones in embed.fnc that aren't in the
224# base files.  Certain of these will only be in the Perl_foo form.
225my @missing = map { exists $base_ref->{$_->{name}}
226                    ? ()
227                    : ((exists $_->{flags}{p} && exists $_->{flags}{o})
228                       ? ((exists $base_ref->{$_->{"Perl_$_->{name}"}}
229                           ? ()
230                           : "Perl_$_->{name}"))
231                       : $_->{name})
232                  } @functions;
233
234# These symbols will be found in the autogen'd files, and they may be
235# commented out in them.
236find_first_mentions($perls_ref,
237                    \@missing,
238                    [ 'embed.h', 'proto.h' ],
239                    0,          # Don't strip comments
240                   'F'
241                   );
242
243sub symbol_order    # Sort based on first word on line
244{
245    my $stripped_a = $a =~ s/ ^ \s* //rx;
246    $stripped_a =~ s/ \s.* //x;
247
248    my $stripped_b = $b =~ s/ ^ \s* //rx;
249    $stripped_b =~ s/ \s.* //x;
250
251    return dictionary_order($stripped_a, $stripped_b);
252}
253
254sub format_output_line
255{
256    my $sym = shift;
257    my $code = shift;
258
259    return sprintf "%-30s # $code $id_text\n", $sym;
260}
261
262sub find_first_mentions
263{
264    my $perls_ref =    shift;   # List of perls to look in
265    my $look_for_ref = shift;   # List of symbol names to look for
266    my $hdrs =         shift;   # Glob of hdrs to look in
267    my $strip_comments = shift;
268    my $code           = shift; # Mark entries as having this type
269
270    use feature 'state';
271    state $first_perl = 1;
272
273    $hdrs = [ $hdrs ] unless ref $hdrs;
274
275    my %remaining;
276    $remaining{$_} = $code for @$look_for_ref;
277
278    my %v;
279
280    # We look in descending order of perl versions.  Each time through the
281    # loop %remaining is narrowed.
282    for my $p (@$perls_ref) {
283        print "checking perl $p->{version}...\n";
284
285        # Get the hdr files associated with this version
286        my $archlib = `$p->{path} -MConfig -l -e 'print \$Config{archlib}'`;
287        chomp $archlib;
288        local @ARGV;
289        push @ARGV, glob "$archlib/CORE/$_" for @$hdrs;
290
291        # %sym's keys are every single thing that looks like an identifier
292        # (beginning with a non-digit \w, followed by \w*) that occurs in any
293        # header, regardless of where (outside of comments).  For macros, it
294        # can't end in an underscore, nor be like 'AbCd', which are marks for
295        # internal.
296        my %sym;
297
298        local $/ = undef;
299        while (<<>>) {  # Read in the whole next file as one string.
300
301            # This would override function definitions with macro ones
302            next if $code eq 'M' && $ARGV =~ m! / embed\.h $ !x;
303
304            my $is_config_h = $ARGV =~ m! / config\.h $ !x;
305
306            my $contents = $_;
307
308            # Strip initial '/*' in config.h /*#define... lines.  This just
309            # means the item isn't available on the platform this program is
310            # being run on.
311            $contents =~ s! ^ /\* \s* (?=\#\s*define\s) !!mx if $is_config_h;
312
313            # Strip comments, from perl faq
314            if ($strip_comments) {
315                $contents =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse;
316            }
317
318            # For macros, we look for #defines
319            if ($code eq 'M') {
320                my %defines;
321
322                while ($contents =~ m/ ^ \s* \# \s* define \s+
323
324                                       # A symbol not ending in underscore
325                                       ( [A-Za-z][_A-Za-z0-9]*[A-Za-z0-9] )
326                                     /mxg)
327                {
328                    my $this_define = $1;
329
330                    # These are internal and not of external interest, so just
331                    # noise if we were to index them
332                    next if $this_define =~ / ^ PERL_ARGS_ASSERT /x;
333
334                    # Names like AbCd are internal
335                    next if $this_define =~ /[[:upper:]][[:lower:]][[:upper:]][[:lower:]]/;
336
337                    $defines{$this_define}++;
338                }
339                $sym{$_}++ for keys %defines;
340
341                # For functions, etc we get all the symbols for the latest
342                # perl passed in, but for macros, it is just the ones for the
343                # known documented ones, and we have to find the rest.  This
344                # allows us to keep the logic for that in just one place:
345                # here.
346                if ($first_perl) {
347
348                    # config.h symbols are documented; the rest aren't, so use
349                    # different flags so downstream processing knows which are
350                    # which.
351                    if ($is_config_h) {
352                        foreach my $define (keys %defines) {
353                            $remaining{$define} = 'K';
354                        }
355                    }
356                    else {
357                        foreach my $define (keys %defines) {
358                            # Don't override input 'M' symbols.
359                            $remaining{$define} = 'Z'
360                                            unless defined $remaining{$define};
361                        }
362                    }
363                }
364            }
365            else {  # Look for potential function names; remember comments
366                    # have been stripped off.
367                $sym{$_}++ for /(\b[^\W\d]\w*)/g;
368            }
369        }
370
371        # %remaining is narrowed to include only those identifier-like things
372        # that are mentioned in one of the input hdrs in this release.  (If it
373        # isn't even mentioned, it won't exist in the release.)  For those not
374        # mentioned, a key is added of the identifier-like thing in %v.  It is
375        # a subkey of this release's "todo" release, which is the next higher
376        # one.  If we are at version n, we have already done version n+1 and
377        # the provided element was mentioned there, and now it no longer is.
378        # We take that to mean that to mean that the element became provided
379        # for in n+1.
380        foreach my $symbol (keys %remaining) {
381            next if defined $sym{$symbol};  # Still exists in this release
382
383            # Gone in this release, must have come into existence in the next
384            # higher one.
385            $v{$p->{todo}}{$symbol} = delete $remaining{$symbol};
386        }
387
388        $first_perl = 0;
389    }
390
391    # After all releases, assume that anything still defined came into
392    # existence in that earliest release.
393    $v{$perls_ref->[-1]{file}}{$_} = $remaining{$_} for keys %remaining;
394
395    # Read in the parts/base files.  The hash ref has keys being all symbols
396    # found in all the files in base/, which are all we are concerned with
397    # became defined in.
398    my $base_ref = parse_todo($base_dir);
399
400
401    # Now add the results from above.  At this point, The keys of %v are the 7
402    # digit BCD version numbers, and their subkeys are the symbols provided by
403    # D:P that are first mentioned in this version, like this:
404    #   '5009002' => {
405    #                  'MY_CXT_CLONE' => 1,
406    #                  'SV_NOSTEAL' => 1,
407    #                  'UTF8_MAXBYTES' => 1
408    #                },
409
410    for my $version (keys %v) {
411
412        # Things listed in blead (the most recent file) are special.  They are
413        # there by default because we haven't found them anywhere, so they
414        # don't really exist as far as we can determine, so shouldn't be
415        # listed as existing.
416        next if $version > $perls_ref->[0]->{file};
417
418        # @new becomes the symbols for $version not already in the file for it
419        my @new = sort symbol_order grep { !exists $base_ref->{$_} }
420                                                                keys %{$v{$version}};
421        @new or next; # Nothing new, skip writing
422
423        my $file = $version;
424        $file =~ s/\.//g;
425        $file = "$base_dir/$file";
426        -e $file or die "non-existent: $file\n";
427        print "-- $file --\n";
428        if ($write) {
429            open my $fh, "+<", $file or die "$file: $!\n";
430            my @lines = <$fh>;
431            my $count = @lines;
432            for my $new (@new) {
433                my $line = format_output_line($new, $v{$version}{$new});
434                next if grep { /$line/ } @lines;
435                print "adding $new\n";
436                push @lines, $line;
437            }
438            if (@lines != $count) {
439                @lines = sort symbol_order @lines;
440                truncate $fh, 0;
441                seek $fh, 0, 0;
442                print $fh @lines;
443            }
444            close $fh;
445        }
446    }
447}
448