1#!/usr/bin/perl -w 2$|=1; 3################################################################################ 4# 5# scanprov -- scan Perl headers for macros, and add known exceptions, and 6# functions we weren't able to otherwise find. Thus the purpose 7# of this file has been expanded beyond what its name says. 8# 9# Besides the normal options, 'mode=clean' is understood as 'write', but 10# first remove any scanprov lines added in previous runs of this. 11# 12# The lines added have a code to signify they are added by us: 13# F means it is a function in embed.fnc that the normal routines didn't find 14# K means it is a macro in config.h, hence is provided, and documented 15# M means it is a provided by D:P macro 16# X means it is a known exceptional item 17# Z means it is an unprovided macro without documentation 18# 19# The regeneration routines do not know the prototypes for the macros scanned 20# for, which is gotten from documentation in the source. (If they were 21# documented, they would be put in parts/apidoc.fnc, and test cases generated 22# for them in mktodo.pl). Therefore these are all undocumented, except for 23# things from config.h which are all documented there, and many of which are 24# just defined or not defined, and hence can't be tested. Thus looking for 25# them here is the most convenient option, which is why it's done here. 26# 27# The scope of this program has also expanded to look in almost all header 28# files for almost all macros that aren't documented nor provided. This 29# allows ppport.h --api-info=/foo/ to return when a given element actually 30# came into existence, which can be a time saver for developers of the perl 31# core. 32# 33# It would be best if people would add documentation to them in the perl 34# source, and then this portion of this function would be minimized. 35# 36# On Linux nm and other uses by D:P, these are the remaining unused capital 37# flags: HJLOQY 38# 39################################################################################ 40# 41# Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz. 42# Version 2.x, Copyright (C) 2001, Paul Marquess. 43# Version 1.x, Copyright (C) 1999, Kenneth Albanowski. 44# 45# This program is free software; you can redistribute it and/or 46# modify it under the same terms as Perl itself. 47# 48################################################################################ 49 50use strict; 51use Getopt::Long; 52 53require './parts/ppptools.pl'; 54require './parts/inc/inctools'; 55require './devel/devtools.pl'; 56 57our %opt = ( 58 mode => 'check', 59 install => '/tmp/perl/install/default', 60 blead => 'bleadperl', 61 debug => 0, 62 'debug-start' => "", 63); 64 65GetOptions(\%opt, qw( install=s mode=s 66 blead=s debug=i 67 debug-start=s 68 skip-devels)) or die; 69 70my $clean = $opt{mode} eq 'clean'; 71my $write = $clean || $opt{mode} eq 'write'; 72my $debug = $opt{debug}; 73 74# Get the list of known macros. Functions are calculated separately below 75my %embed = map { $_->{flags}{m} ? ( $_->{name} => 1 ) : () } 76 parse_embed(qw(parts/embed.fnc parts/apidoc.fnc)); 77 78# @provided is set to everthing provided 79my @provided = map { /^(\w+)/ ? $1 : () } `$^X ppport.h --list-provided`; 80 81# There are a few exceptions that have to be dealt with specially. Add these 82# to the list of things to scan for. 83my $hard_to_test_ref = known_but_hard_to_test_for(); 84push @provided, keys %$hard_to_test_ref; 85 86my $base_dir = 'parts/base'; 87my $todo_dir = 'parts/todo'; 88 89# The identifying text placed in every entry by this program 90my $id_text = "added by $0"; 91 92if ($write) { 93 94 # Get the list of files 95 my @files = all_files_in_dir($base_dir); 96 97 # If asked to, first strip out the results of previous incarnations of 98 # this script 99 if ($clean) { 100 print "Cleaning previous $0 runs\n"; 101 foreach my $file (@files) { 102 open my $fh, "+<", $file or die "$file: $!\n"; 103 my @lines = <$fh>; 104 my $orig_count = @lines; 105 @lines = grep { $_ !~ /$id_text/ } @lines; 106 next if @lines == $orig_count; # No need to write if unchanged. 107 truncate $fh, 0; 108 seek $fh, 0, 0; 109 print $fh @lines; 110 close $fh or die "$file: $!\n"; 111 } 112 } 113 114 # The file list is returned sorted, and so the min version is in the 0th 115 # element 116 my $file = $files[0]; 117 my $min_perl = $file; 118 $min_perl =~ s,.*/,,; # The name is the integer of __MIN_PERL__ 119 120 # There are a very few special cases that we may not find in scanning, but 121 # exist all the way back. Add them now to avoid throwing later things 122 # off. 123 print "-- $file --\n"; 124 open my $fh, "+<", $file or die "$file: $!\n"; 125 my @lines = <$fh>; 126 my $count = @lines; 127 for (qw(RETVAL CALL THIS)) { # These are also in hard_to_test_for(), 128 # so can't be in blead, as they are skipped 129 # in testing, so no real need to check that 130 # they aren't dups. 131 my $line = format_output_line($_, 'X'); 132 next if grep { /$line/ } @lines; 133 print "Adding $_ to $file\n"; 134 push @lines, $line; 135 } 136 if ($count != @lines) { 137 @lines = sort symbol_order @lines; 138 truncate $fh, 0; 139 seek $fh, 0, 0; 140 print $fh @lines; 141 } 142 close $fh; 143 144 # Now we're going to add the hard to test symbols. The hash has been 145 # manually populated and commited, with the version number ppport supports 146 # them to. 147 # 148 # This is a hash ref with the keys being all symbols found in all the 149 # files in the directory, and the values being the perl versions of each 150 # symbol. 151 my $todo = parse_todo($todo_dir); 152 153 # The keys of $hard_to_test_ref are the symbols, and the values are 154 # subhashes, with each 'version' key being its proper perl version. 155 # Below, we invert %hard_to_test, so that the keys are the version, and 156 # the values are the symbols that go in that version 157 my %add_by_version; 158 for my $hard (keys %$hard_to_test_ref) { 159 160 # But if someone ups the min version we support, we don't want to add 161 # something less than that. 162 my $version = int_parse_version($hard_to_test_ref->{$hard}); 163 $version = $min_perl if $version < $min_perl; 164 $version = format_version_line($version); 165 166 push @{$add_by_version{$version}}, $hard 167 unless grep { $todo->{$_}->{version} eq $hard } keys %$todo; 168 } 169 170 # Only a few files will have exceptions that apply to them. Rewrite each 171 foreach my $version (keys %add_by_version) { 172 if (is_devel_release($version)) { 173 my ($super, $major, $minor) = parse_version($version); 174 $major++; # Go to next highest version that isn't a devel 175 $version = "$super.$major.0"; 176 } 177 178 my $file = "$todo_dir/" . int_parse_version($version); 179 print "-- Adding known exceptions to $file --\n"; 180 open my $fh, "+<", $file or die "$file: $!\n"; 181 my @lines = <$fh>; 182 my $count = @lines; 183 push @lines, format_version_line($version) . "\n" unless @lines; 184 foreach my $symbol (@{$add_by_version{$version}}) { 185 my $line = format_output_line($symbol, 'X'); 186 unless (grep { /$line/ } @lines) {; 187 print "adding $symbol\n"; 188 push @lines, $line unless grep { /$line/ } @lines; 189 } 190 } 191 if (@lines != $count) { 192 @lines = sort symbol_order @lines; 193 truncate $fh, 0; 194 seek $fh, 0, 0; 195 print $fh @lines; 196 } 197 close $fh; 198 } 199} 200 201# Now that we've added the exceptions to a few files, we can parse 202# and deal with all of them. 203my $perls_ref = get_and_sort_perls(\%opt); 204 205die "Couldn't find any perls" unless @$perls_ref > 1; 206 207find_first_mentions($perls_ref, # perls to look in 208 \@provided, # List of symbol names to look for 209 '*.h', # Look in all hdrs. 210 1, # Strip comments 211 'M' 212 ); 213 214# Now look for functions that we didn't test in mktodo.pl, generally because 215# these were hidden behind #ifdef's. 216my $base_ref = parse_todo($base_dir); 217my @functions = parse_embed(qw(parts/embed.fnc)); 218 219# We could just gather data for the publicly available ones, but having this 220# information available for everything is useful. 221#@functions = grep { exists $_->{flags}{A} } @functions; 222 223# The ones we don't have info on are the ones in embed.fnc that aren't in the 224# base files. Certain of these will only be in the Perl_foo form. 225my @missing = map { exists $base_ref->{$_->{name}} 226 ? () 227 : ((exists $_->{flags}{p} && exists $_->{flags}{o}) 228 ? ((exists $base_ref->{$_->{"Perl_$_->{name}"}} 229 ? () 230 : "Perl_$_->{name}")) 231 : $_->{name}) 232 } @functions; 233 234# These symbols will be found in the autogen'd files, and they may be 235# commented out in them. 236find_first_mentions($perls_ref, 237 \@missing, 238 [ 'embed.h', 'proto.h' ], 239 0, # Don't strip comments 240 'F' 241 ); 242 243sub symbol_order # Sort based on first word on line 244{ 245 my $stripped_a = $a =~ s/ ^ \s* //rx; 246 $stripped_a =~ s/ \s.* //x; 247 248 my $stripped_b = $b =~ s/ ^ \s* //rx; 249 $stripped_b =~ s/ \s.* //x; 250 251 return dictionary_order($stripped_a, $stripped_b); 252} 253 254sub format_output_line 255{ 256 my $sym = shift; 257 my $code = shift; 258 259 return sprintf "%-30s # $code $id_text\n", $sym; 260} 261 262sub find_first_mentions 263{ 264 my $perls_ref = shift; # List of perls to look in 265 my $look_for_ref = shift; # List of symbol names to look for 266 my $hdrs = shift; # Glob of hdrs to look in 267 my $strip_comments = shift; 268 my $code = shift; # Mark entries as having this type 269 270 use feature 'state'; 271 state $first_perl = 1; 272 273 $hdrs = [ $hdrs ] unless ref $hdrs; 274 275 my %remaining; 276 $remaining{$_} = $code for @$look_for_ref; 277 278 my %v; 279 280 # We look in descending order of perl versions. Each time through the 281 # loop %remaining is narrowed. 282 for my $p (@$perls_ref) { 283 print "checking perl $p->{version}...\n"; 284 285 # Get the hdr files associated with this version 286 my $archlib = `$p->{path} -MConfig -l -e 'print \$Config{archlib}'`; 287 chomp $archlib; 288 local @ARGV; 289 push @ARGV, glob "$archlib/CORE/$_" for @$hdrs; 290 291 # %sym's keys are every single thing that looks like an identifier 292 # (beginning with a non-digit \w, followed by \w*) that occurs in any 293 # header, regardless of where (outside of comments). For macros, it 294 # can't end in an underscore, nor be like 'AbCd', which are marks for 295 # internal. 296 my %sym; 297 298 local $/ = undef; 299 while (<<>>) { # Read in the whole next file as one string. 300 301 # This would override function definitions with macro ones 302 next if $code eq 'M' && $ARGV =~ m! / embed\.h $ !x; 303 304 my $is_config_h = $ARGV =~ m! / config\.h $ !x; 305 306 my $contents = $_; 307 308 # Strip initial '/*' in config.h /*#define... lines. This just 309 # means the item isn't available on the platform this program is 310 # being run on. 311 $contents =~ s! ^ /\* \s* (?=\#\s*define\s) !!mx if $is_config_h; 312 313 # Strip comments, from perl faq 314 if ($strip_comments) { 315 $contents =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse; 316 } 317 318 # For macros, we look for #defines 319 if ($code eq 'M') { 320 my %defines; 321 322 while ($contents =~ m/ ^ \s* \# \s* define \s+ 323 324 # A symbol not ending in underscore 325 ( [A-Za-z][_A-Za-z0-9]*[A-Za-z0-9] ) 326 /mxg) 327 { 328 my $this_define = $1; 329 330 # These are internal and not of external interest, so just 331 # noise if we were to index them 332 next if $this_define =~ / ^ PERL_ARGS_ASSERT /x; 333 334 # Names like AbCd are internal 335 next if $this_define =~ /[[:upper:]][[:lower:]][[:upper:]][[:lower:]]/; 336 337 $defines{$this_define}++; 338 } 339 $sym{$_}++ for keys %defines; 340 341 # For functions, etc we get all the symbols for the latest 342 # perl passed in, but for macros, it is just the ones for the 343 # known documented ones, and we have to find the rest. This 344 # allows us to keep the logic for that in just one place: 345 # here. 346 if ($first_perl) { 347 348 # config.h symbols are documented; the rest aren't, so use 349 # different flags so downstream processing knows which are 350 # which. 351 if ($is_config_h) { 352 foreach my $define (keys %defines) { 353 $remaining{$define} = 'K'; 354 } 355 } 356 else { 357 foreach my $define (keys %defines) { 358 # Don't override input 'M' symbols. 359 $remaining{$define} = 'Z' 360 unless defined $remaining{$define}; 361 } 362 } 363 } 364 } 365 else { # Look for potential function names; remember comments 366 # have been stripped off. 367 $sym{$_}++ for /(\b[^\W\d]\w*)/g; 368 } 369 } 370 371 # %remaining is narrowed to include only those identifier-like things 372 # that are mentioned in one of the input hdrs in this release. (If it 373 # isn't even mentioned, it won't exist in the release.) For those not 374 # mentioned, a key is added of the identifier-like thing in %v. It is 375 # a subkey of this release's "todo" release, which is the next higher 376 # one. If we are at version n, we have already done version n+1 and 377 # the provided element was mentioned there, and now it no longer is. 378 # We take that to mean that to mean that the element became provided 379 # for in n+1. 380 foreach my $symbol (keys %remaining) { 381 next if defined $sym{$symbol}; # Still exists in this release 382 383 # Gone in this release, must have come into existence in the next 384 # higher one. 385 $v{$p->{todo}}{$symbol} = delete $remaining{$symbol}; 386 } 387 388 $first_perl = 0; 389 } 390 391 # After all releases, assume that anything still defined came into 392 # existence in that earliest release. 393 $v{$perls_ref->[-1]{file}}{$_} = $remaining{$_} for keys %remaining; 394 395 # Read in the parts/base files. The hash ref has keys being all symbols 396 # found in all the files in base/, which are all we are concerned with 397 # became defined in. 398 my $base_ref = parse_todo($base_dir); 399 400 401 # Now add the results from above. At this point, The keys of %v are the 7 402 # digit BCD version numbers, and their subkeys are the symbols provided by 403 # D:P that are first mentioned in this version, like this: 404 # '5009002' => { 405 # 'MY_CXT_CLONE' => 1, 406 # 'SV_NOSTEAL' => 1, 407 # 'UTF8_MAXBYTES' => 1 408 # }, 409 410 for my $version (keys %v) { 411 412 # Things listed in blead (the most recent file) are special. They are 413 # there by default because we haven't found them anywhere, so they 414 # don't really exist as far as we can determine, so shouldn't be 415 # listed as existing. 416 next if $version > $perls_ref->[0]->{file}; 417 418 # @new becomes the symbols for $version not already in the file for it 419 my @new = sort symbol_order grep { !exists $base_ref->{$_} } 420 keys %{$v{$version}}; 421 @new or next; # Nothing new, skip writing 422 423 my $file = $version; 424 $file =~ s/\.//g; 425 $file = "$base_dir/$file"; 426 -e $file or die "non-existent: $file\n"; 427 print "-- $file --\n"; 428 if ($write) { 429 open my $fh, "+<", $file or die "$file: $!\n"; 430 my @lines = <$fh>; 431 my $count = @lines; 432 for my $new (@new) { 433 my $line = format_output_line($new, $v{$version}{$new}); 434 next if grep { /$line/ } @lines; 435 print "adding $new\n"; 436 push @lines, $line; 437 } 438 if (@lines != $count) { 439 @lines = sort symbol_order @lines; 440 truncate $fh, 0; 441 seek $fh, 0, 0; 442 print $fh @lines; 443 } 444 close $fh; 445 } 446 } 447} 448