1#!perl 2use 5.010; 3use strict; 4use warnings; 5use lib 'Porting'; 6use Maintainers qw/%Modules/; 7use lib 'dist/Module-CoreList/lib'; 8use Module::CoreList; 9use Getopt::Long; 10 11=head1 USAGE 12 13 # generate the module changes for the Perl you are currently building 14 ./perl -Ilib Porting/corelist-perldelta.pl 15 16 # update the module changes for the Perl you are currently building 17 ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod 18 19 # generate a diff between the corelist sections of two perldelta* files: 20 perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod 21 22=head1 ABOUT 23 24corelist-perldelta.pl is a bit schizophrenic. The part to generate the 25new Perldelta text does not need Algorithm::Diff, but wants to be 26run with the freshly built Perl. 27 28The part to check the diff wants to be run with a Perl that has an up-to-date 29L<Module::CoreList>, but needs the outside L<Algorithm::Diff>. 30 31Ideally, the program will be split into two separate programs, one 32to generate the text and one to show the diff between the 33corelist sections of the last perldelta and the next perldelta. 34 35Currently no information about Removed Modules is displayed in any of the 36modes. 37 38=cut 39 40my %sections = ( 41 new => qr/New Modules and Pragma(ta)?/, 42 updated => qr/Updated Modules and Pragma(ta)?/, 43 removed => qr/Removed Modules and Pragma(ta)?/, 44); 45 46my %titles = ( 47 new => 'New Modules and Pragmata', 48 updated => 'Updated Modules and Pragmata', 49 removed => 'Removed Modules and Pragmata', 50); 51 52my $deprecated; 53 54sub run { 55 my %opt = (mode => 'generate'); 56 57 GetOptions(\%opt, 58 'mode|m:s', # 'generate', 'check', 'update' 59 ); 60 61 # by default, compare latest two version in CoreList; 62 my ($old, $new) = latest_two_perl_versions(); 63 64 # use the provided versions if present 65 # @ARGV >=2 means [old_version] [new_version] [path/to/file] 66 if ( @ARGV >= 2) { 67 ($old, $new) = (shift @ARGV, shift @ARGV); 68 die "$old is an invalid version\n" if not exists 69 $Module::CoreList::version{$old}; 70 die "$new is an invalid version\n" if not exists 71 $Module::CoreList::version{$new}; 72 } 73 74 if ( $opt{mode} eq 'generate' ) { 75 do_generate($old => $new); 76 } 77 elsif ( $opt{mode} eq 'check' ) { 78 do_check(\*ARGV, $old => $new); 79 } 80 elsif ( $opt{mode} eq 'update' ) { 81 do_update_existing(shift @ARGV, $old => $new); 82 } 83 else { 84 die "Unrecognized mode '$opt{mode}'\n"; 85 } 86 87 exit 0; 88} 89 90sub latest_two_perl_versions { 91 92 my @versions = sort keys %Module::CoreList::version; 93 94 my $new = pop @versions; 95 96 # If a fully-padded version number ends in a zero (as in "5.019010"), that 97 # version shows up in %Module::CoreList::version both with and without its 98 # trailing zeros. So skip all versions that are numerically equal to $new. 99 pop @versions while @versions && $versions[-1] == $new; 100 101 die "Too few distinct core versions in %Module::CoreList::version ?!\n" 102 if !@versions; 103 104 return $versions[-1], $new; 105} 106 107# Given two perl versions, it returns a list describing the core distributions that have changed. 108# The first three elements are hashrefs corresponding to new, updated, and removed modules 109# and are of the form (mostly, see the special remarks about removed): 110# 'Distribution Name' => ['Distribution Name', previous version number, current version number] 111# where the version number is undef if the distribution did not exist. 112# The fourth element is an arrayref of core distribution names of those distribution for which it 113# is unknown whether they have changed and therefore need to be manually checked. 114# 115# In most cases, the distribution name in %Modules corresponds to the module that is representative 116# of the distribution as listed in Module::CoreList. However, there are a few distribution names 117# that do not correspond to a module. %distToModules has been created which maps the distribution 118# name to a representative module. The representative module was chosen by either looking at the 119# Makefile of the distribution or by seeing which module the distribution has been traditionally 120# listed under in past perldeltas. 121# 122# There are a few distributions for which there is no single representative module (e.g. libnet). 123# These distributions are returned as the last element of the list. 124# 125# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p. 126# This list contains modules and pragmata that may also be present in Module::CoreList. 127# A list of modules are in the list @unclaimedModules, which were manually listed based on whether 128# they were independent modules and whether they have been listed in past perldeltas. 129# The pragmata were found by doing something like: 130# say for sort grep { $_ eq lc $_ and !exists $Modules{$_}} 131# keys %{$Module::CoreList::version{'5.019003'}} 132# and manually filtering out pragmata that were already covered. 133# 134# It is currently not possible to differentiate between a removed module and a removed 135# distribution. Therefore, the removed hashref contains every module that has been removed, even if 136# the module's corresponding distribution has not been removed. 137 138sub corelist_delta { 139 my ($old, $new) = @_; 140 my $corelist = \%Module::CoreList::version; 141 my %changes = Module::CoreList::changes_between( $old, $new ); 142 $deprecated = $Module::CoreList::deprecated{$new}; 143 144 my $getModifyType = sub { 145 my $data = shift; 146 if ( exists $data->{left} and exists $data->{right} ) { 147 return 'updated'; 148 } 149 elsif ( !exists $data->{left} and exists $data->{right} ) { 150 return 'new'; 151 } 152 elsif ( exists $data->{left} and !exists $data->{right} ) { 153 return 'removed'; 154 } 155 return undef; 156 }; 157 158 my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB 159 DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl 160 ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob 161 File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash 162 I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via 163 Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash 164 Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm 165 Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap XS::APItest Win32CORE builtin/; 166 my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/; 167 my @unclaimed = (@unclaimedModules, @unclaimedPragmata); 168 169 my %distToModules = ( 170 'IO-Compress' => [ 171 { 172 'name' => 'IO::Compress', 173 'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ), 174 'data' => $changes{'IO::Compress::Base'} 175 } 176 ], 177 'libnet' => [ 178 { 179 'name' => 'libnet', 180 'modification' => $getModifyType->( $changes{'Net::Cmd'} ), 181 'data' => $changes{'Net::Cmd'} 182 } 183 ], 184 'PathTools' => [ 185 { 186 'name' => 'File::Spec', 187 'modification' => $getModifyType->( $changes{'Cwd'} ), 188 'data' => $changes{'Cwd'} 189 } 190 ], 191 'podlators' => [ 192 { 193 'name' => 'podlators', 194 'modification' => $getModifyType->( $changes{'Pod::Text'} ), 195 'data' => $changes{'Pod::Text'} 196 } 197 ], 198 'Scalar-List-Utils' => [ 199 { 200 'name' => 'List::Util', 201 'modification' => $getModifyType->( $changes{'List::Util'} ), 202 'data' => $changes{'List::Util'} 203 }, 204 { 205 'name' => 'Scalar::Util', 206 'modification' => $getModifyType->( $changes{'Scalar::Util'} ), 207 'data' => $changes{'Scalar::Util'} 208 }, 209 { 210 'name' => 'Sub::Util', 211 'modification' => $getModifyType->( $changes{'Sub::Util'} ), 212 'data' => $changes{'Sub::Util'} 213 } 214 ], 215 'Text-Tabs+Wrap' => [ 216 { 217 'name' => 'Text::Tabs', 218 'modification' => $getModifyType->( $changes{'Text::Tabs'} ), 219 'data' => $changes{'Text::Tabs'} 220 }, 221 { 222 'name' => 'Text::Wrap', 223 'modification' => $getModifyType->( $changes{'Text::Wrap'} ), 224 'data' => $changes{'Text::Wrap'} 225 } 226 ], 227 ); 228 229 # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ] 230 my $deltaGrouping = {}; 231 232 # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it 233 my @manuallyCheck; 234 235 # %Modules defines what is currently in core 236 for my $k ( keys %Modules ) { 237 next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed 238 next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed 239 240 my ( $distName, $modifyType, $data ); 241 242 if ( exists $changes{$k} ) { 243 $distName = $k; 244 $modifyType = $getModifyType->( $changes{$k} ); 245 $data = $changes{$k}; 246 } 247 elsif ( exists $distToModules{$k} ) { 248 # modification will be undef if the distribution has not changed 249 my @modules = grep { $_->{modification} } @{ $distToModules{$k} }; 250 for (@modules) { 251 $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ]; 252 } 253 next; 254 } 255 else { 256 push @manuallyCheck, $k and next; 257 } 258 259 $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ]; 260 } 261 262 for my $k (@unclaimed) { 263 if ( exists $changes{$k} ) { 264 $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} = 265 [ $k, $changes{$k}->{left}, $changes{$k}->{right} ]; 266 } 267 } 268 269 # in old corelist, but not this one => removed 270 # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from 271 # distributions will show up here, too. Some person will have to review to see what's 272 # important. That's the best we can do without a historical Maintainers.pl 273 for my $k ( keys %{ $corelist->{$old} } ) { 274 if ( ! exists $corelist->{$new}{$k} ) { 275 $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ]; 276 } 277 } 278 279 return ( 280 \%{ $deltaGrouping->{'new'} }, 281 \%{ $deltaGrouping->{'removed'} }, 282 \%{ $deltaGrouping->{'updated'} }, 283 \@manuallyCheck 284 ); 285} 286 287# currently does not update the Removed Module section 288sub do_update_existing { 289 my ( $existing, $old, $new ) = @_; 290 291 my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new ); 292 if (@{$manuallyCheck}) { 293 print "It cannot be determined whether the following distributions have changed.\n"; 294 print "Please check and list accordingly:\n"; 295 say "\t* $_" for sort @{$manuallyCheck}; 296 print "\n"; 297 } 298 299 my $data = { 300 new => $added, 301 updated => $updated, 302 #removed => $removed, ignore removed for now 303 }; 304 305 my $text = DeltaUpdater::transform_pod( $existing, $data ); 306 open my $out, '>', $existing or die "can't open perldelta file $existing: $!"; 307 binmode($out); 308 print $out $text; 309 close $out; 310 say "The New and Updated Modules and Pragmata sections in $existing have been updated"; 311 say "Please ensure the Removed Modules and Pragmata section is up-to-date"; 312} 313 314sub do_generate { 315 my ($old, $new) = @_; 316 my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new); 317 318 if ($manuallyCheck) { 319 print "\nXXXIt cannot be determined whether the following distributions have changed.\n"; 320 print "Please check and list accordingly:\n"; 321 say "\t$_" for @{$manuallyCheck}; 322 print "\n"; 323 } 324 325 my $data = { 326 new => $added, 327 updated => $updated, 328 #removed => $removed, ignore removed for now 329 }; 330 331 say DeltaUpdater::sections_to_pod($data) 332} 333 334sub do_check { 335 my ($in, $old, $new) = @_; 336 337 my $delta = DeltaParser->new($in); 338 my ($added, $removed, $updated) = corelist_delta($old => $new); 339 340 # because of the difficulty in identifying the distribution for removed modules 341 # don't bother checking them 342 for my $ck ([ 'new', $delta->new_modules, $added ], 343 #[ 'removed', $delta->removed_modules, $removed ], 344 [ 'updated', $delta->updated_modules, $updated ] ) { 345 my @delta = @{ $ck->[1] }; 346 my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] }; 347 348 printf $ck->[0] . ":\n"; 349 350 require Algorithm::Diff; 351 my $diff = Algorithm::Diff->new(map { 352 [map { join q{ } => grep defined, @{ $_ } } @{ $_ }] 353 } \@delta, \@corelist); 354 355 while ($diff->Next) { 356 next if $diff->Same; 357 my $sep = ''; 358 if (!$diff->Items(2)) { 359 printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 )); 360 } elsif(!$diff->Items(1)) { 361 printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 )); 362 } else { 363 $sep = "---\n"; 364 printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 )); 365 } 366 print "Delta< $_\n" for $diff->Items(1); 367 print $sep; 368 print "Corelist> $_\n" for $diff->Items(2); 369 } 370 371 print "\n"; 372 } 373} 374 375{ 376 377 package DeltaUpdater; 378 use List::Util 'reduce'; 379 380 sub get_section_name_from_heading { 381 my $heading = shift; 382 while (my ($key, $expression) = each %sections) { 383 if ($heading =~ $expression) { 384 return $titles{$key}; 385 } 386 } 387 die "$heading did not match any section"; 388 } 389 390 sub is_desired_section_name { 391 for (values %sections) { 392 return 1 if $_[0] =~ $_; 393 } 394 return 0; 395 } 396 397 # verify the module and pragmata in the section, changing the stated version if necessary 398 # this subroutine warns if the module name cannot be parsed or if it is not listed in 399 # the results returned from corelist_delta() 400 # 401 # a side-effect of calling this function is that modules present in the section are 402 # removed from $data, resulting in $data containing only those modules and pragmata 403 # that were not listed in the perldelta file. This means we can then pass $data to 404 # add_to_section() without worrying about filtering out duplicates 405 sub update_section { 406 my ( $section, $data, $title ) = @_; 407 my @items = @{ $section->{items} }; 408 409 for my $item (@items) { 410 411 my $content = $item->{text}; 412 my $module = $item->{name}; 413 414 #skip dummy items 415 next if !$module and $content =~ /\s*xx*\s*/i; 416 417 say "Could not parse module name; line is:\n\t$content" and next unless $module; 418 419 if ( !$data->{$title}{$module} ) { 420 print "$module is not listed as being $title in Module::CoreList.\n"; 421 print "Ensure Module::CoreList has been updated and\n"; 422 print "check to see that the distribution is not listed under another name.\n\n"; 423 next; 424 } 425 426 if ( $title eq 'new' ) { 427 my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m; 428 say "Could not parse new version for $module; line is:\n\t$content" and next unless $new; 429 if ( $data->{$title}{$module}[2] ne $new ) { 430 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; 431 } 432 $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me; 433 } 434 435 elsif ( $title eq 'updated' ) { 436 my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; 437 say "Could not parse old and new version for $module; line is:\n\t$content" and next 438 unless $prev and $new; 439 if ( $data->{$title}{$module}[1] ne $prev ) { 440 say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1]; 441 } 442 if ( $data->{$title}{$module}[2] ne $new ) { 443 say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2]; 444 } 445 $content =~ 446 s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se; 447 } 448 449 elsif ( $title eq 'removed' ) { 450 my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m; 451 say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev; 452 if ( $data->{$title}{$module}[1] ne $prev ) { 453 say "$module: previous version differs; $prev " . $data->{$title}{$module}[1]; 454 } 455 $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me; 456 } 457 458 delete $data->{$title}{$module}; 459 $item->{text} = $content; 460 } 461 return $section; 462 } 463 464 # add modules and pragmata present in $data to the section 465 sub add_to_section { 466 my ( $section, $data, $title ) = @_; 467 468 #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values 469 no warnings 'uninitialized'; 470 for ( values %{ $data->{$title} } ) { 471 my ( $mod, $old_v, $new_v ) = @{$_}; 472 my ( $item, $text ); 473 474 $item = { name => $mod, text => "=item *\n" }; 475 if ( $title eq 'new' ) { 476 $text = "L<$mod> $new_v has been added to the Perl core.\n"; 477 } 478 479 elsif ( $title eq 'updated' ) { 480 $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n"; 481 if ( $deprecated->{$mod} ) { 482 $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n"; 483 } 484 } 485 486 elsif ( $title eq 'removed' ) { 487 $text = "C<$mod> has been removed from the Perl core. Prior version was $old_v.\n"; 488 } 489 490 $item->{text} .= "\n$text\n"; 491 push @{ $section->{items} }, $item; 492 } 493 return $section; 494 } 495 496 sub sort_items_in_section { 497 my ($section) = @_; 498 499 # if we could not parse the module name, it will be uninitialized 500 # in sort. This is not a problem as it will just result in these 501 # sections being placed near the beginning of the section 502 no warnings 'uninitialized'; 503 $section->{items} = 504 [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ]; 505 return $section; 506 } 507 508 # given a hashref of the form returned by corelist_delta() 509 # and a hash structured as documented in transform_pod(), it returns 510 # a pod string representation of the sections, creating sections 511 # if necessary 512 sub sections_to_pod { 513 my ( $data, %sections ) = @_; 514 my $out = ''; 515 516 for ( 517 ( 518 [ 'New Modules and Pragmata', 'new' ], 519 [ 'Updated Modules and Pragmata', 'updated' ], 520 [ 'Removed Modules and Pragmata', 'removed' ] 521 ) 522 ) 523 { 524 my ( $section_name, $title ) = @{$_}; 525 526 my $section = $sections{$section_name} // { 527 name => $section_name, 528 preceding_text => "=head2 $_->[0]\n=over 4\n", 529 following_text => "=back\n", 530 items => [], 531 manual => 1 532 }; 533 534 $section = update_section( $section, $data, $title ); 535 $section = add_to_section( $section, $data, $title ); 536 $section = sort_items_in_section( $section ); 537 538 next if $section->{manual} and scalar @{ $section->{items} } == 0; 539 540 my $items = reduce { no warnings 'once'; $a . $b->{text} } 541 ( '', @{ $section->{items} } ); 542 $out .= 543 ( $section->{preceding_text} // '' ) 544 . $items 545 . ( $section->{following_text} // '' ); 546 } 547 return $out; 548 } 549 550 # given a filename corresponding to an existing perldelta file 551 # and a hashref of the form returned by corelist_delta(), it 552 # returns a string of the resulting file after the module 553 # information has been added. 554 sub transform_pod { 555 my ( $existing, $data ) = @_; 556 557 # will contain hashrefs corresponding to new, updated and removed 558 # modules and pragmata keyed by section name 559 # each section is hashref of the structure 560 # preceding_text => Text occurring before and including the over 561 # region containing the list of modules, 562 # items => [Arrayref of hashrefs corresponding to a module 563 # entry], 564 # an entry has the form: 565 # name => Module name or undef if the name could not be determined 566 # text => The text of the entry, including the item heading 567 # 568 # following_text => Any text not corresponding to a module 569 # that occurs after the first module 570 # 571 # the sections are converted to a pod string by calling sections_to_pod() 572 my %sections; 573 574 # we are in the Modules_and_Pragmata's section 575 my $in_Modules_and_Pragmata; 576 577 # we are the Modules_and_Pragmata's section but have not 578 # encountered any of the desired sections. We use this 579 # flag to determine whether we should append the text to $out 580 # or we need to delay appending until the module listings are 581 # processed and instead append to $append_to_out 582 my $in_Modules_and_Pragmata_preamble; 583 584 my $done_processing_Modules_and_Pragmata; 585 586 my $current_section; 587 588 # $nested_element_level == 0 : not in an over region, treat lines as text 589 # $nested_element_level == 1 : presumably in the top over region that 590 # corresponds to the module listing. Treat 591 # each item as a module 592 # $nested_element_level > 1 : we only consider these values when we are in an item 593 # We treat lines as the text of the current item. 594 my $nested_element_level = 0; 595 596 my $current_item; 597 my $need_to_parse_module_name; 598 599 my $out = ''; 600 my $append_to_out = ''; 601 602 open my $fh, '<', $existing or die "can't open perldelta file $existing: $!"; 603 binmode($fh); 604 605 while (<$fh>) { 606 # treat the rest of the file as plain text 607 if ($done_processing_Modules_and_Pragmata) { 608 $out .= $_; 609 next; 610 } 611 612 elsif ( !$in_Modules_and_Pragmata ) { 613 # entering Modules and Pragmata 614 if (/^=head1 Modules and Pragmata/) { 615 $in_Modules_and_Pragmata = 1; 616 $in_Modules_and_Pragmata_preamble = 1; 617 } 618 $out .= $_; 619 next; 620 } 621 622 # leaving Modules and Pragmata 623 elsif (/^=head1/) { 624 if ($current_section) { 625 push @{ $current_section->{items} }, $current_item 626 if $current_item; 627 $sections{ $current_section->{name} } = $current_section; 628 } 629 $done_processing_Modules_and_Pragmata = 1; 630 $out .= 631 sections_to_pod( $data, %sections ) . $append_to_out . $_; 632 next; 633 } 634 635 # new section in Modules and Pragmata 636 elsif (/^=head2 (.*?)$/) { 637 my $name = $1; 638 if ($current_section) { 639 push @{ $current_section->{items} }, $current_item 640 if $current_item; 641 $sections{ $current_section->{name} } = $current_section; 642 undef $current_section; 643 } 644 645 if ( is_desired_section_name($name) ) { 646 undef $in_Modules_and_Pragmata_preamble; 647 if ( $nested_element_level > 0 ) { 648 die "Unexpected head2 at line no. $."; 649 } 650 my $title = get_section_name_from_heading($name); 651 if ( exists $sections{$title} ) { 652 die "$name occurred twice at line no. $."; 653 } 654 $current_section = {}; 655 $current_section->{name} = $title; 656 $current_section->{preceding_text} = $_; 657 $current_section->{items} = []; 658 $nested_element_level = 0; 659 next; 660 } 661 662 # otherwise treat section as plain text 663 else { 664 if ($in_Modules_and_Pragmata_preamble) { 665 $out .= $_; 666 } 667 else { 668 $append_to_out .= $_; 669 } 670 next; 671 } 672 } 673 674 elsif ($current_section) { 675 676 # not in an over region 677 if ( $nested_element_level == 0 ) { 678 if (/^=over/) { 679 $nested_element_level++; 680 } 681 if ( scalar @{ $current_section->{items} } > 0 ) { 682 $current_section->{following_text} .= $_; 683 } 684 else { 685 $current_section->{preceding_text} .= $_; 686 } 687 next; 688 } 689 690 if ($current_item) { 691 if ($need_to_parse_module_name) { 692 # the item may not have a parsable module name, which means that 693 # $current_item->{name} will never be defined. 694 if (/^(?:L|C)<(.+?)>/) { 695 $current_item->{name} = $1; 696 undef $need_to_parse_module_name; 697 } 698 # =item or =back signals the end of an item 699 # block, which we handle below 700 if ( !/^=(?:item|back)/ ) { 701 $current_item->{text} .= $_; 702 next; 703 } 704 } 705 # currently in an over region 706 # treat text inside region as plain text 707 if ( $nested_element_level > 1 ) { 708 if (/^=back/) { 709 $nested_element_level--; 710 } 711 elsif (/^=over/) { 712 $nested_element_level++; 713 } 714 $current_item->{text} .= $_; 715 next; 716 } 717 # entering over region 718 if (/^=over/) { 719 $nested_element_level++; 720 $current_item->{text} .= $_; 721 next; 722 } 723 # =item or =back signals the end of an item 724 # block, which we handle below 725 if ( !/^=(?:item|back)/ ) { 726 $current_item->{text} .= $_; 727 next; 728 } 729 } 730 731 if (/^=item \*/) { 732 push @{ $current_section->{items} }, $current_item 733 if $current_item; 734 $current_item = { text => $_ }; 735 $need_to_parse_module_name = 1; 736 next; 737 } 738 739 if (/^=back/) { 740 push @{ $current_section->{items} }, $current_item 741 if $current_item; 742 undef $current_item; 743 $nested_element_level--; 744 } 745 746 if ( scalar @{ $current_section->{items} } == 0 ) { 747 $current_section->{preceding_text} .= $_; 748 } 749 else { 750 $current_section->{following_text} .= $_; 751 } 752 next; 753 } 754 755 # text in Modules and Pragmata not in a head2 region 756 else { 757 if ($in_Modules_and_Pragmata_preamble) { 758 $out .= $_; 759 } 760 else { 761 $append_to_out .= $_; 762 } 763 next; 764 } 765 } 766 close $fh; 767 die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata; 768 return $out; 769 } 770 771} 772 773{ 774 package DeltaParser; 775 use Pod::Simple::SimpleTree; 776 777 sub new { 778 my ($class, $input) = @_; 779 780 my $self = bless {} => $class; 781 782 my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root; 783 splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure, 784 # just the nodes within it 785 786 $self->_parse_delta($parsed_pod); 787 788 return $self; 789 } 790 791 # creates the accessor methods: 792 # new_modules 793 # updated_modules 794 # removed_modules 795 for my $k (keys %sections) { 796 no strict 'refs'; 797 my $m = "${k}_modules"; 798 *$m = sub { $_[0]->{$m} }; 799 } 800 801 sub _parse_delta { 802 my ($self, $pod) = @_; 803 804 my $new_section = $self->_look_for_section( $pod, $sections{new} ); 805 my $updated_section = $self->_look_for_section( $pod, $sections{updated} ); 806 my $removed_section = $self->_look_for_section( $pod, $sections{removed} ); 807 808 $self->_parse_new_section($new_section); 809 $self->_parse_updated_section($updated_section); 810 $self->_parse_removed_section($removed_section); 811 812 for (qw/new_modules updated_modules removed_modules/) { 813 $self->{$_} = 814 [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ]; 815 } 816 817 return; 818 } 819 820 sub _parse_new_section { 821 my ($self, $section) = @_; 822 823 $self->{new_modules} = []; 824 return unless $section; 825 $self->{new_modules} = $self->_parse_section($section => sub { 826 my ($el) = @_; 827 828 my ($first, $second) = @{ $el }[2, 3]; 829 my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/; 830 831 return [ $first->[2], undef, $ver ]; 832 }); 833 834 return; 835 } 836 837 sub _parse_updated_section { 838 my ($self, $section) = @_; 839 840 $self->{updated_modules} = []; 841 return unless $section; 842 $self->{updated_modules} = $self->_parse_section($section => sub { 843 my ($el) = @_; 844 845 my ($first, $second) = @{ $el }[2, 3]; 846 my $module = $first->[2]; 847 848 # the regular expression matches the following: 849 # from VERSION_NUMBER to VERSION_NUMBER 850 # from VERSION_NUMBER to VERSION_NUMBER. 851 # from version VERSION_NUMBER to version VERSION_NUMBER. 852 # from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER 853 # from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER 854 # 855 # some perldeltas contain more than one module listed in an entry, this only attempts to match the 856 # first module 857 my ($old, $new) = $second =~ 858 /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s; 859 860 warn "Unable to extract old or new version of $module from perldelta" 861 if !defined $old || !defined $new; 862 863 return [ $module, $old, $new ]; 864 }); 865 866 return; 867 } 868 869 sub _parse_removed_section { 870 my ($self, $section) = @_; 871 872 $self->{removed_modules} = []; 873 return unless $section; 874 $self->{removed_modules} = $self->_parse_section($section => sub { 875 my ($el) = @_; 876 877 my ($first, $second) = @{ $el }[2, 3]; 878 my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/; 879 880 return [ $first->[2], $old, undef ]; 881 }); 882 883 return; 884 } 885 886 sub _parse_section { 887 my ($self, $section, $parser) = @_; 888 889 my $items = $self->_look_down($section => sub { 890 my ($el) = @_; 891 return unless ref $el && $el->[0] =~ /^item-/ 892 && @{ $el } > 2 && ref $el->[2]; 893 return unless $el->[2]->[0] =~ /C|L/; 894 895 return 1; 896 }); 897 898 return [map { $parser->($_) } @{ $items }]; 899 } 900 901 sub _look_down { 902 my ($self, $pod, $predicate) = @_; 903 my @pod = @{ $pod }; 904 905 my @l; 906 while (my $el = shift @pod) { 907 push @l, $el if $predicate->($el); 908 if (ref $el) { 909 my @el = @{ $el }; 910 splice @el, 0, 2; 911 unshift @pod, @el if @el; 912 } 913 } 914 915 return @l ? \@l : undef; 916 } 917 918 sub _look_for_section { 919 my ($self, $pod, $section) = @_; 920 921 my $level; 922 $self->_look_for_range($pod, 923 sub { 924 my ($el) = @_; 925 my ($heading) = $el->[0] =~ /^head(\d)$/; 926 my $f = $heading && $el->[2] =~ /^$section/; 927 $level = $heading if $f && !$level; 928 return $f; 929 }, 930 sub { 931 my ($el) = @_; 932 $el->[0] =~ /^head(\d)$/ && $1 <= $level; 933 }, 934 ); 935 } 936 937 sub _look_for_range { 938 my ($self, $pod, $start_predicate, $stop_predicate) = @_; 939 940 my @l; 941 for my $el (@{ $pod }) { 942 if (@l) { 943 return \@l if $stop_predicate->($el); 944 } 945 else { 946 next unless $start_predicate->($el); 947 } 948 push @l, $el; 949 } 950 951 return; 952 } 953} 954 955run; 956