1#!/usr/bin/perl -w 2$|=1; 3################################################################################ 4# 5# scanprov -- scan Perl headers for macros, and add known exceptions, and 6# functions we weren't able to otherwise find. Thus the purpose 7# of this file has been expanded beyond what its name says. 8# 9# Besides the normal options, 'mode=clean' is understood as 'write', but 10# first remove any scanprov lines added in previous runs of this. 11# 12# The lines added have a code to signify they are added by us: 13# F means it is a function in embed.fnc that the normal routines didn't find 14# K means it is a macro in config.h, hence is provided, and documented 15# M means it is a provided by D:P macro 16# X means it is a known exceptional item 17# Z means it is an unprovided macro without documentation 18# 19# The regeneration routines do not know the prototypes for the macros scanned 20# for, which is gotten from documentation in the source. (If they were 21# documented, they would be put in parts/apidoc.fnc, and test cases generated 22# for them in mktodo.pl). Therefore these are all undocumented, except for 23# things from config.h which are all documented there, and many of which are 24# just defined or not defined, and hence can't be tested. Thus looking for 25# them here is the most convenient option, which is why it's done here. 26# 27# The scope of this program has also expanded to look in almost all header 28# files for almost all macros that aren't documented nor provided. This 29# allows ppport.h --api-info=/foo/ to return when a given element actually 30# came into existence, which can be a time saver for developers of the perl 31# core. 32# 33# It would be best if people would add documentation to them in the perl 34# source, and then this portion of this function would be minimized. 35# 36# On Linux nm and other uses by D:P, these are the remaining unused capital 37# flags: HJLOQY 38# 39################################################################################ 40# 41# Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz. 42# Version 2.x, Copyright (C) 2001, Paul Marquess. 43# Version 1.x, Copyright (C) 1999, Kenneth Albanowski. 44# 45# This program is free software; you can redistribute it and/or 46# modify it under the same terms as Perl itself. 47# 48################################################################################ 49 50use strict; 51use Getopt::Long; 52 53require './parts/ppptools.pl'; 54require './parts/inc/inctools'; 55require './devel/devtools.pl'; 56 57our %opt = ( 58 mode => 'check', 59 install => '/tmp/perl/install/default', 60 blead => 'bleadperl', 61 debug => 0, 62 'debug-start' => "", 63); 64 65GetOptions(\%opt, qw( install=s mode=s blead=s debug=i debug-start=s)) or die; 66 67my $clean = $opt{mode} eq 'clean'; 68my $write = $clean || $opt{mode} eq 'write'; 69my $debug = $opt{debug}; 70 71# Get the list of known macros. Functions are calculated separately below 72my %embed = map { $_->{flags}{m} ? ( $_->{name} => 1 ) : () } 73 parse_embed(qw(parts/embed.fnc parts/apidoc.fnc)); 74 75# @provided is set to everthing provided 76my @provided = map { /^(\w+)/ ? $1 : () } `$^X ppport.h --list-provided`; 77 78# There are a few exceptions that have to be dealt with specially. Add these 79# to the list of things to scan for. 80my $hard_to_test_ref = known_but_hard_to_test_for(); 81push @provided, keys %$hard_to_test_ref; 82 83my $base_dir = 'parts/base'; 84my $todo_dir = 'parts/todo'; 85 86# The identifying text placed in every entry by this program 87my $id_text = "added by $0"; 88 89if ($write) { 90 91 # Get the list of files 92 my @files = all_files_in_dir($base_dir); 93 94 # If asked to, first strip out the results of previous incarnations of 95 # this script 96 if ($clean) { 97 print "Cleaning previous $0 runs\n"; 98 foreach my $file (@files) { 99 open my $fh, "+<", $file or die "$file: $!\n"; 100 my @lines = <$fh>; 101 my $orig_count = @lines; 102 @lines = grep { $_ !~ /$id_text/ } @lines; 103 next if @lines == $orig_count; # No need to write if unchanged. 104 truncate $fh, 0; 105 seek $fh, 0, 0; 106 print $fh @lines; 107 close $fh or die "$file: $!\n"; 108 } 109 } 110 111 # The file list is returned sorted, and so the min version is in the 0th 112 # element 113 my $file = $files[0]; 114 my $min_perl = $file; 115 $min_perl =~ s,.*/,,; # The name is the integer of __MIN_PERL__ 116 117 # There are a very few special cases that we may not find in scanning, but 118 # exist all the way back. Add them now to avoid throwing later things 119 # off. 120 print "-- $file --\n"; 121 open my $fh, "+<", $file or die "$file: $!\n"; 122 my @lines = <$fh>; 123 my $count = @lines; 124 for (qw(RETVAL CALL THIS)) { # These are also in hard_to_test_for(), 125 # so can't be in blead, as they are skipped 126 # in testing, so no real need to check that 127 # they aren't dups. 128 my $line = format_output_line($_, 'X'); 129 next if grep { /$line/ } @lines; 130 print "Adding $_ to $file\n"; 131 push @lines, $line; 132 } 133 if ($count != @lines) { 134 @lines = sort symbol_order @lines; 135 truncate $fh, 0; 136 seek $fh, 0, 0; 137 print $fh @lines; 138 } 139 close $fh; 140 141 # Now we're going to add the hard to test symbols. The hash has been 142 # manually populated and commited, with the version number ppport supports 143 # them to. 144 # 145 # This is a hash ref with the keys being all symbols found in all the 146 # files in the directory, and the values being the perl versions of each 147 # symbol. 148 my $todo = parse_todo($todo_dir); 149 150 # The keys of $hard_to_test_ref are the symbols, and the values are 151 # subhashes, with each 'version' key being its proper perl version. 152 # Below, we invert %hard_to_test, so that the keys are the version, and 153 # the values are the symbols that go in that version 154 my %add_by_version; 155 for my $hard (keys %$hard_to_test_ref) { 156 157 # But if someone ups the min version we support, we don't want to add 158 # something less than that. 159 my $version = int_parse_version($hard_to_test_ref->{$hard}); 160 $version = $min_perl if $version < $min_perl; 161 $version = format_version_line($version); 162 163 push @{$add_by_version{$version}}, $hard 164 unless grep { $todo->{$_}->{version} eq $hard } keys %$todo; 165 } 166 167 # Only a few files will have exceptions that apply to them. Rewrite each 168 foreach my $version (keys %add_by_version) { 169 my $file = "$todo_dir/" . int_parse_version($version); 170 print "-- Adding known exceptions to $file --\n"; 171 open my $fh, "+<", $file or die "$file: $!\n"; 172 my @lines = <$fh>; 173 my $count = @lines; 174 push @lines, format_version_line($version) . "\n" unless @lines; 175 foreach my $symbol (@{$add_by_version{$version}}) { 176 my $line = format_output_line($symbol, 'X'); 177 unless (grep { /$line/ } @lines) {; 178 print "adding $symbol\n"; 179 push @lines, $line unless grep { /$line/ } @lines; 180 } 181 } 182 if (@lines != $count) { 183 @lines = sort symbol_order @lines; 184 truncate $fh, 0; 185 seek $fh, 0, 0; 186 print $fh @lines; 187 } 188 close $fh; 189 } 190} 191 192# Now that we've added the exceptions to a few files, we can parse 193# and deal with all of them. 194my $perls_ref = get_and_sort_perls(\%opt); 195 196die "Couldn't find any perls" unless @$perls_ref > 1; 197 198find_first_mentions($perls_ref, # perls to look in 199 \@provided, # List of symbol names to look for 200 '*.h', # Look in all hdrs. 201 1, # Strip comments 202 'M' 203 ); 204 205# Now look for functions that we didn't test in mktodo.pl, generally because 206# these were hidden behind #ifdef's. 207my $base_ref = parse_todo($base_dir); 208my @functions = parse_embed(qw(parts/embed.fnc)); 209 210# We could just gather data for the publicly available ones, but having this 211# information available for everything is useful. 212#@functions = grep { exists $_->{flags}{A} } @functions; 213 214# The ones we don't have info on are the ones in embed.fnc that aren't in the 215# base files. Certain of these will only be in the Perl_foo form. 216my @missing = map { exists $base_ref->{$_->{name}} 217 ? () 218 : ((exists $_->{flags}{p} && exists $_->{flags}{o}) 219 ? ((exists $base_ref->{$_->{"Perl_$_->{name}"}} 220 ? () 221 : "Perl_$_->{name}")) 222 : $_->{name}) 223 } @functions; 224 225# These symbols will be found in the autogen'd files, and they may be 226# commented out in them. 227find_first_mentions($perls_ref, 228 \@missing, 229 [ 'embed.h', 'proto.h' ], 230 0, # Don't strip comments 231 'F' 232 ); 233 234sub symbol_order # Sort based on first word on line 235{ 236 my $stripped_a = $a =~ s/ ^ \s* //rx; 237 $stripped_a =~ s/ \s.* //x; 238 239 my $stripped_b = $b =~ s/ ^ \s* //rx; 240 $stripped_b =~ s/ \s.* //x; 241 242 return dictionary_order($stripped_a, $stripped_b); 243} 244 245sub format_output_line 246{ 247 my $sym = shift; 248 my $code = shift; 249 250 return sprintf "%-30s # $code $id_text\n", $sym; 251} 252 253sub find_first_mentions 254{ 255 my $perls_ref = shift; # List of perls to look in 256 my $look_for_ref = shift; # List of symbol names to look for 257 my $hdrs = shift; # Glob of hdrs to look in 258 my $strip_comments = shift; 259 my $code = shift; # Mark entries as having this type 260 261 use feature 'state'; 262 state $first_perl = 1; 263 264 $hdrs = [ $hdrs ] unless ref $hdrs; 265 266 my %remaining; 267 $remaining{$_} = $code for @$look_for_ref; 268 269 my %v; 270 271 # We look in descending order of perl versions. Each time through the 272 # loop %remaining is narrowed. 273 for my $p (@$perls_ref) { 274 print "checking perl $p->{version}...\n"; 275 276 # Get the hdr files associated with this version 277 my $archlib = `$p->{path} -MConfig -l -e 'print \$Config{archlib}'`; 278 chomp $archlib; 279 local @ARGV; 280 push @ARGV, glob "$archlib/CORE/$_" for @$hdrs; 281 282 # %sym's keys are every single thing that looks like an identifier 283 # (beginning with a non-digit \w, followed by \w*) that occurs in any 284 # header, regardless of where (outside of comments). For macros, it 285 # can't end in an underscore, nor be like 'AbCd', which are marks for 286 # internal. 287 my %sym; 288 289 local $/ = undef; 290 while (<<>>) { # Read in the whole next file as one string. 291 292 # This would override function definitions with macro ones 293 next if $code eq 'M' && $ARGV =~ m! / embed\.h $ !x; 294 295 my $is_config_h = $ARGV =~ m! / config\.h $ !x; 296 297 my $contents = $_; 298 299 # Strip initial '/*' in config.h /*#define... lines. This just 300 # means the item isn't available on the platform this program is 301 # being run on. 302 $contents =~ s! ^ /\* \s* (?=\#\s*define\s) !!mx if $is_config_h; 303 304 # Strip comments, from perl faq 305 if ($strip_comments) { 306 $contents =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse; 307 } 308 309 # For macros, we look for #defines 310 if ($code eq 'M') { 311 my %defines; 312 313 while ($contents =~ m/ ^ \s* \# \s* define \s+ 314 315 # A symbol not ending in underscore 316 ( [A-Za-z][_A-Za-z0-9]*[A-Za-z0-9] ) 317 /mxg) 318 { 319 my $this_define = $1; 320 321 # These are internal and not of external interest, so just 322 # noise if we were to index them 323 next if $this_define =~ / ^ PERL_ARGS_ASSERT /x; 324 325 # Names like AbCd are internal 326 next if $this_define =~ /[[:upper:]][[:lower:]][[:upper:]][[:lower:]]/; 327 328 $defines{$this_define}++; 329 } 330 $sym{$_}++ for keys %defines; 331 332 # For functions, etc we get all the symbols for the latest 333 # perl passed in, but for macros, it is just the ones for the 334 # known documented ones, and we have to find the rest. This 335 # allows us to keep the logic for that in just one place: 336 # here. 337 if ($first_perl) { 338 339 # config.h symbols are documented; the rest aren't, so use 340 # different flags so downstream processing knows which are 341 # which. 342 if ($is_config_h) { 343 foreach my $define (keys %defines) { 344 $remaining{$define} = 'K'; 345 } 346 } 347 else { 348 foreach my $define (keys %defines) { 349 # Don't override input 'M' symbols. 350 $remaining{$define} = 'Z' 351 unless defined $remaining{$define}; 352 } 353 } 354 } 355 } 356 else { # Look for potential function names; remember comments 357 # have been stripped off. 358 $sym{$_}++ for /(\b[^\W\d]\w*)/g; 359 } 360 } 361 362 # %remaining is narrowed to include only those identifier-like things 363 # that are mentioned in one of the input hdrs in this release. (If it 364 # isn't even mentioned, it won't exist in the release.) For those not 365 # mentioned, a key is added of the identifier-like thing in %v. It is 366 # a subkey of this release's "todo" release, which is the next higher 367 # one. If we are at version n, we have already done version n+1 and 368 # the provided element was mentioned there, and now it no longer is. 369 # We take that to mean that to mean that the element became provided 370 # for in n+1. 371 foreach my $symbol (keys %remaining) { 372 next if defined $sym{$symbol}; # Still exists in this release 373 374 # Gone in this release, must have come into existence in the next 375 # higher one. 376 $v{$p->{todo}}{$symbol} = delete $remaining{$symbol}; 377 } 378 379 $first_perl = 0; 380 } 381 382 # After all releases, assume that anything still defined came into 383 # existence in that earliest release. 384 $v{$perls_ref->[-1]{file}}{$_} = $remaining{$_} for keys %remaining; 385 386 # Read in the parts/base files. The hash ref has keys being all symbols 387 # found in all the files in base/, which are all we are concerned with 388 # became defined in. 389 my $base_ref = parse_todo($base_dir); 390 391 392 # Now add the results from above. At this point, The keys of %v are the 7 393 # digit BCD version numbers, and their subkeys are the symbols provided by 394 # D:P that are first mentioned in this version, like this: 395 # '5009002' => { 396 # 'MY_CXT_CLONE' => 1, 397 # 'SV_NOSTEAL' => 1, 398 # 'UTF8_MAXBYTES' => 1 399 # }, 400 401 for my $version (keys %v) { 402 403 # Things listed in blead (the most recent file) are special. They are 404 # there by default because we haven't found them anywhere, so they 405 # don't really exist as far as we can determine, so shouldn't be 406 # listed as existing. 407 next if $version > $perls_ref->[0]->{file}; 408 409 # @new becomes the symbols for $version not already in the file for it 410 my @new = sort symbol_order grep { !exists $base_ref->{$_} } 411 keys %{$v{$version}}; 412 @new or next; # Nothing new, skip writing 413 414 my $file = $version; 415 $file =~ s/\.//g; 416 $file = "$base_dir/$file"; 417 -e $file or die "non-existent: $file\n"; 418 print "-- $file --\n"; 419 if ($write) { 420 open my $fh, "+<", $file or die "$file: $!\n"; 421 my @lines = <$fh>; 422 my $count = @lines; 423 for my $new (@new) { 424 my $line = format_output_line($new, $v{$version}{$new}); 425 next if grep { /$line/ } @lines; 426 print "adding $new\n"; 427 push @lines, $line; 428 } 429 if (@lines != $count) { 430 @lines = sort symbol_order @lines; 431 truncate $fh, 0; 432 seek $fh, 0, 0; 433 print $fh @lines; 434 } 435 close $fh; 436 } 437 } 438} 439