1#!perl
2use 5.010;
3use strict;
4use warnings;
5use lib 'Porting';
6use Maintainers qw/%Modules/;
7use lib 'dist/Module-CoreList/lib';
8use Module::CoreList;
9use Getopt::Long;
10
11=head1 USAGE
12
13  # generate the module changes for the Perl you are currently building
14  ./perl -Ilib Porting/corelist-perldelta.pl
15
16  # update the module changes for the Perl you are currently building
17  ./perl -Ilib Porting/corelist-perldelta.pl --mode=update pod/perldelta.pod
18
19  # generate a diff between the corelist sections of two perldelta* files:
20  perl Porting/corelist-perldelta.pl --mode=check 5.017001 5.017002 <perl5172delta.pod
21
22=head1 ABOUT
23
24corelist-perldelta.pl is a bit schizophrenic. The part to generate the
25new Perldelta text does not need Algorithm::Diff, but wants to be
26run with the freshly built Perl.
27
28The part to check the diff wants to be run with a Perl that has an up-to-date
29L<Module::CoreList>, but needs the outside L<Algorithm::Diff>.
30
31Ideally, the program will be split into two separate programs, one
32to generate the text and one to show the diff between the
33corelist sections of the last perldelta and the next perldelta.
34
35Currently no information about Removed Modules is displayed in any of the
36modes.
37
38=cut
39
40my %sections = (
41  new     => qr/New Modules and Pragma(ta)?/,
42  updated => qr/Updated Modules and Pragma(ta)?/,
43  removed => qr/Removed Modules and Pragma(ta)?/,
44);
45
46my %titles = (
47  new     => 'New Modules and Pragmata',
48  updated => 'Updated Modules and Pragmata',
49  removed => 'Removed Modules and Pragmata',
50);
51
52my $deprecated;
53
54sub run {
55  my %opt = (mode => 'generate');
56
57  GetOptions(\%opt,
58    'mode|m:s', # 'generate', 'check', 'update'
59  );
60
61  # by default, compare latest two version in CoreList;
62  my ($old, $new) = latest_two_perl_versions();
63
64  # use the provided versions if present
65  # @ARGV >=2 means [old_version] [new_version] [path/to/file]
66  if ( @ARGV >= 2) {
67    ($old, $new) = (shift @ARGV, shift @ARGV);
68    die "$old is an invalid version\n" if not exists
69      $Module::CoreList::version{$old};
70    die "$new is an invalid version\n" if not exists
71      $Module::CoreList::version{$new};
72  }
73
74  if ( $opt{mode} eq 'generate' ) {
75    do_generate($old => $new);
76  }
77  elsif ( $opt{mode} eq 'check' ) {
78    do_check(\*ARGV, $old => $new);
79  }
80  elsif ( $opt{mode} eq 'update' ) {
81    do_update_existing(shift @ARGV, $old => $new);
82  }
83  else {
84    die "Unrecognized mode '$opt{mode}'\n";
85  }
86
87  exit 0;
88}
89
90sub latest_two_perl_versions {
91
92  my @versions = sort keys %Module::CoreList::version;
93
94  my $new = pop @versions;
95
96  # If a fully-padded version number ends in a zero (as in "5.019010"), that
97  # version shows up in %Module::CoreList::version both with and without its
98  # trailing zeros. So skip all versions that are numerically equal to $new.
99  pop @versions while @versions && $versions[-1] == $new;
100
101  die "Too few distinct core versions in %Module::CoreList::version ?!\n"
102    if !@versions;
103
104  return $versions[-1], $new;
105}
106
107# Given two perl versions, it returns a list describing the core distributions that have changed.
108# The first three elements are hashrefs corresponding to new, updated, and removed modules
109# and are of the form (mostly, see the special remarks about removed):
110#   'Distribution Name' => ['Distribution Name', previous version number, current version number]
111# where the version number is undef if the distribution did not exist.
112# The fourth element is an arrayref of core distribution names of those distribution for which it
113# is unknown whether they have changed and therefore need to be manually checked.
114#
115# In most cases, the distribution name in %Modules corresponds to the module that is representative
116# of the distribution as listed in Module::CoreList. However, there are a few distribution names
117# that do not correspond to a module. %distToModules has been created which maps the distribution
118# name to a representative module. The representative module was chosen by either looking at the
119# Makefile of the distribution or by seeing which module the distribution has been traditionally
120# listed under in past perldeltas.
121#
122# There are a few distributions for which there is no single representative module (e.g. libnet).
123# These distributions are returned as the last element of the list.
124#
125# %Modules contains a final key, _PERLLIB, which contains a list of modules that are owned by p5p.
126# This list contains modules and pragmata that may also be present in Module::CoreList.
127# A list of modules are in the list @unclaimedModules, which were manually listed based on whether
128# they were independent modules and whether they have been listed in past perldeltas.
129# The pragmata were found by doing something like:
130#   say for sort grep { $_ eq lc $_ and !exists $Modules{$_}}
131#     keys %{$Module::CoreList::version{'5.019003'}}
132# and manually filtering out pragmata that were already covered.
133#
134# It is currently not possible to differentiate between a removed module and a removed
135# distribution. Therefore, the removed hashref contains every module that has been removed, even if
136# the module's corresponding distribution has not been removed.
137
138sub corelist_delta {
139  my ($old, $new) = @_;
140  my $corelist = \%Module::CoreList::version;
141  my %changes = Module::CoreList::changes_between( $old, $new );
142  $deprecated = $Module::CoreList::deprecated{$new};
143
144  my $getModifyType = sub {
145    my $data = shift;
146    if ( exists $data->{left} and exists $data->{right} ) {
147      return 'updated';
148    }
149    elsif ( !exists $data->{left} and exists $data->{right} ) {
150      return 'new';
151    }
152    elsif ( exists $data->{left} and !exists $data->{right} ) {
153      return 'removed';
154    }
155    return undef;
156  };
157
158  my @unclaimedModules = qw/AnyDBM_File B B::Concise B::Deparse Benchmark Class::Struct Config::Extensions DB
159                            DBM_Filter Devel::Peek DirHandle DynaLoader English Errno ExtUtils::Embed ExtUtils::Miniperl
160                            ExtUtils::Typemaps ExtUtils::XSSymSet Fcntl File::Basename File::Compare File::Copy File::DosGlob
161                            File::Find File::Glob File::stat FileCache FileHandle FindBin GDBM_File Getopt::Std Hash::Util Hash::Util::FieldHash
162                            I18N::Langinfo IPC::Open3 NDBM_File ODBM_File Opcode PerlIO PerlIO::encoding PerlIO::mmap PerlIO::scalar PerlIO::via
163                            Pod::Functions Pod::Html POSIX SDBM_File SelectSaver Symbol Sys::Hostname Thread Tie::Array Tie::Handle Tie::Hash
164                            Tie::Hash::NamedCapture Tie::Memoize Tie::Scalar Tie::StdHandle Tie::SubstrHash Time::gmtime Time::localtime Time::tm
165                            Unicode::UCD UNIVERSAL User::grent User::pwent VMS::DCLsym VMS::Filespec VMS::Stdio XS::Typemap XS::APItest Win32CORE builtin/;
166  my @unclaimedPragmata = qw/arybase attributes blib bytes charnames deprecate diagnostics encoding feature fields filetest inc::latest integer less locale mro open ops overload overloading re sigtrap sort strict subs utf8 vars vmsish/;
167  my @unclaimed = (@unclaimedModules, @unclaimedPragmata);
168
169  my %distToModules = (
170    'IO-Compress' => [
171      {
172        'name'         => 'IO::Compress',
173        'modification' => $getModifyType->( $changes{'IO::Compress::Base'} ),
174        'data'         => $changes{'IO::Compress::Base'}
175      }
176    ],
177    'libnet' => [
178      {
179        'name'         => 'libnet',
180        'modification' => $getModifyType->( $changes{'Net::Cmd'} ),
181        'data'         => $changes{'Net::Cmd'}
182      }
183    ],
184    'PathTools' => [
185      {
186        'name'         => 'File::Spec',
187        'modification' => $getModifyType->( $changes{'Cwd'} ),
188        'data'         => $changes{'Cwd'}
189      }
190    ],
191    'podlators' => [
192      {
193        'name'         => 'podlators',
194        'modification' => $getModifyType->( $changes{'Pod::Text'} ),
195        'data'         => $changes{'Pod::Text'}
196      }
197    ],
198    'Scalar-List-Utils' => [
199      {
200        'name'         => 'List::Util',
201        'modification' => $getModifyType->( $changes{'List::Util'} ),
202        'data'         => $changes{'List::Util'}
203      },
204      {
205        'name'         => 'Scalar::Util',
206        'modification' => $getModifyType->( $changes{'Scalar::Util'} ),
207        'data'         => $changes{'Scalar::Util'}
208      },
209      {
210        'name'         => 'Sub::Util',
211        'modification' => $getModifyType->( $changes{'Sub::Util'} ),
212        'data'         => $changes{'Sub::Util'}
213      }
214    ],
215    'Text-Tabs+Wrap' => [
216      {
217        'name'         => 'Text::Tabs',
218        'modification' => $getModifyType->( $changes{'Text::Tabs'} ),
219        'data'         => $changes{'Text::Tabs'}
220      },
221      {
222        'name'         => 'Text::Wrap',
223        'modification' => $getModifyType->( $changes{'Text::Wrap'} ),
224        'data'         => $changes{'Text::Wrap'}
225      }
226    ],
227  );
228
229  # structure is (new|removed|updated) => [ [ModuleName, previousVersion, newVersion] ]
230  my $deltaGrouping = {};
231
232  # list of distributions listed in %Modules that need to be manually checked because there is no module that represents it
233  my @manuallyCheck;
234
235  # %Modules defines what is currently in core
236  for my $k ( keys %Modules ) {
237    next if $k eq '_PERLLIB'; #these are taken care of by being listed in @unclaimed
238    next if Module::CoreList::is_core($k) and !exists $changes{$k}; #modules that have not changed
239
240    my ( $distName, $modifyType, $data );
241
242    if ( exists $changes{$k} ) {
243      $distName   = $k;
244      $modifyType = $getModifyType->( $changes{$k} );
245      $data       = $changes{$k};
246    }
247    elsif ( exists $distToModules{$k} ) {
248      # modification will be undef if the distribution has not changed
249      my @modules = grep { $_->{modification} } @{ $distToModules{$k} };
250      for (@modules) {
251        $deltaGrouping->{ $_->{modification} }->{ $_->{name} } = [ $_->{name}, $_->{data}->{left}, $_->{data}->{right} ];
252      }
253      next;
254    }
255    else {
256      push @manuallyCheck, $k and next;
257    }
258
259    $deltaGrouping->{$modifyType}->{$distName} = [ $distName, $data->{left}, $data->{right} ];
260  }
261
262  for my $k (@unclaimed) {
263    if ( exists $changes{$k} ) {
264      $deltaGrouping->{ $getModifyType->( $changes{$k} ) }->{$k} =
265        [ $k, $changes{$k}->{left}, $changes{$k}->{right} ];
266    }
267  }
268
269  # in old corelist, but not this one => removed
270  # N.B. This is exhaustive -- not just what's in %Modules, so modules removed from
271  # distributions will show up here, too.  Some person will have to review to see what's
272  # important. That's the best we can do without a historical Maintainers.pl
273  for my $k ( keys %{ $corelist->{$old} } ) {
274    if ( ! exists $corelist->{$new}{$k} ) {
275      $deltaGrouping->{'removed'}->{$k} = [ $k, $corelist->{$old}{$k}, undef ];
276    }
277  }
278
279  return (
280    \%{ $deltaGrouping->{'new'} },
281    \%{ $deltaGrouping->{'removed'} },
282    \%{ $deltaGrouping->{'updated'} },
283    \@manuallyCheck
284  );
285}
286
287# currently does not update the Removed Module section
288sub do_update_existing {
289  my ( $existing, $old, $new ) = @_;
290
291  my ( $added, $removed, $updated, $manuallyCheck ) = corelist_delta( $old => $new );
292  if (@{$manuallyCheck}) {
293    print "It cannot be determined whether the following distributions have changed.\n";
294    print "Please check and list accordingly:\n";
295    say "\t* $_" for sort @{$manuallyCheck};
296    print "\n";
297  }
298
299  my $data = {
300    new      => $added,
301    updated  => $updated,
302    #removed => $removed, ignore removed for now
303  };
304
305  my $text = DeltaUpdater::transform_pod( $existing, $data );
306  open my $out, '>', $existing or die "can't open perldelta file $existing: $!";
307  binmode($out);
308  print $out $text;
309  close $out;
310  say "The New and Updated Modules and Pragmata sections in $existing have been updated";
311  say "Please ensure the Removed Modules and Pragmata section is up-to-date";
312}
313
314sub do_generate {
315  my ($old, $new) = @_;
316  my ($added, $removed, $updated, $manuallyCheck) = corelist_delta($old => $new);
317
318  if ($manuallyCheck) {
319    print "\nXXXIt cannot be determined whether the following distributions have changed.\n";
320    print "Please check and list accordingly:\n";
321    say "\t$_" for @{$manuallyCheck};
322    print "\n";
323  }
324
325  my $data = {
326    new      => $added,
327    updated  => $updated,
328    #removed => $removed, ignore removed for now
329  };
330
331  say DeltaUpdater::sections_to_pod($data)
332}
333
334sub do_check {
335  my ($in, $old, $new) = @_;
336
337  my $delta = DeltaParser->new($in);
338  my ($added, $removed, $updated) = corelist_delta($old => $new);
339
340  # because of the difficulty in identifying the distribution for removed modules
341  # don't bother checking them
342  for my $ck ([ 'new', $delta->new_modules, $added ],
343              #[ 'removed', $delta->removed_modules, $removed ],
344              [ 'updated', $delta->updated_modules, $updated ] ) {
345    my @delta = @{ $ck->[1] };
346    my @corelist = sort { lc $a->[0] cmp lc $b->[0] } values %{ $ck->[2] };
347
348    printf $ck->[0] . ":\n";
349
350    require Algorithm::Diff;
351    my $diff = Algorithm::Diff->new(map {
352      [map { join q{ } => grep defined, @{ $_ } } @{ $_ }]
353    } \@delta, \@corelist);
354
355    while ($diff->Next) {
356      next if $diff->Same;
357      my $sep = '';
358      if (!$diff->Items(2)) {
359        printf "%d,%dd%d\n", $diff->Get(qw( Min1 Max1 Max2 ));
360      } elsif(!$diff->Items(1)) {
361        printf "%da%d,%d\n", $diff->Get(qw( Max1 Min2 Max2 ));
362      } else {
363        $sep = "---\n";
364        printf "%d,%dc%d,%d\n", $diff->Get(qw( Min1 Max1 Min2 Max2 ));
365      }
366      print "Delta< $_\n" for $diff->Items(1);
367      print $sep;
368      print "Corelist> $_\n" for $diff->Items(2);
369    }
370
371    print "\n";
372  }
373}
374
375{
376
377  package DeltaUpdater;
378  use List::Util 'reduce';
379
380  sub get_section_name_from_heading {
381    my $heading = shift;
382    while (my ($key, $expression) = each %sections) {
383      if ($heading =~ $expression) {
384        return $titles{$key};
385      }
386    }
387    die "$heading did not match any section";
388  }
389
390  sub is_desired_section_name {
391    for (values %sections) {
392      return 1 if $_[0] =~ $_;
393    }
394    return 0;
395  }
396
397  # verify the module and pragmata in the section, changing the stated version if necessary
398  # this subroutine warns if the module name cannot be parsed or if it is not listed in
399  # the results returned from corelist_delta()
400  #
401  # a side-effect of calling this function is that modules present in the section are
402  # removed from $data, resulting in $data containing only those modules and pragmata
403  # that were not listed in the perldelta file. This means we can then pass $data to
404  # add_to_section() without worrying about filtering out duplicates
405  sub update_section {
406    my ( $section, $data, $title ) = @_;
407    my @items = @{ $section->{items} };
408
409    for my $item (@items) {
410
411      my $content = $item->{text};
412      my $module  = $item->{name};
413
414      #skip dummy items
415      next if !$module and $content =~ /\s*xx*\s*/i;
416
417      say "Could not parse module name; line is:\n\t$content" and next unless $module;
418
419      if ( !$data->{$title}{$module} ) {
420        print "$module is not listed as being $title in Module::CoreList.\n";
421        print "Ensure Module::CoreList has been updated and\n";
422        print "check to see that the distribution is not listed under another name.\n\n";
423        next;
424      }
425
426      if ( $title eq 'new' ) {
427        my ($new) = $content =~ /(\d[^\s]+)\s+has\s+been.*$/m;
428        say "Could not parse new version for $module; line is:\n\t$content" and next unless $new;
429        if ( $data->{$title}{$module}[2] ne $new ) {
430            say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
431        }
432        $content =~ s/\d[^\s]+(\s+has\s+been.*$)/$data->{$title}{$module}[2].$1/me;
433      }
434
435      elsif ( $title eq 'updated' ) {
436        my ( $prev, $new ) = $content =~ /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
437        say "Could not parse old and new version for $module; line is:\n\t$content" and next
438          unless $prev and $new;
439        if ( $data->{$title}{$module}[1] ne $prev ) {
440          say "$module: previous version differs; version in pod: $prev; version in corelist: " . $data->{$title}{$module}[1];
441        }
442        if ( $data->{$title}{$module}[2] ne $new ) {
443          say "$module: new version differs; version in pod: $new; version in corelist: " . $data->{$title}{$module}[2];
444        }
445        $content =~
446          s/(from\s+(?:version\s+)?)\d[^\s]+(\s+to\s+(?:version\s+)?)\d[^\s,]+?(?=[\s,]|\.\s|\.$|$)(.*)/$1.$data->{$title}{$module}[1].$2.$data->{$title}{$module}[2].$3/se;
447      }
448
449      elsif ( $title eq 'removed' ) {
450        my ($prev) = $content =~ /^.*?was\s+(\d[^\s]+?)/m;
451        say "Could not parse old version for $module; line is:\n\t$content" and next unless $prev;
452        if ( $data->{$title}{$module}[1] ne $prev ) {
453          say "$module: previous version differs; $prev " . $data->{$title}{$module}[1];
454        }
455        $content =~ s/(^.*?was\s+)\d[^\s]+?/$1.$data->{$title}{$module}[1]/me;
456      }
457
458      delete $data->{$title}{$module};
459      $item->{text} = $content;
460    }
461    return $section;
462  }
463
464  # add modules and pragmata present in $data to the section
465  sub add_to_section {
466    my ( $section, $data, $title ) = @_;
467
468    #undef is a valid version name in Module::CoreList so suppress warnings about concatenating undef values
469    no warnings 'uninitialized';
470    for ( values %{ $data->{$title} } ) {
471      my ( $mod, $old_v, $new_v ) = @{$_};
472      my ( $item, $text );
473
474      $item = { name => $mod, text => "=item *\n" };
475      if ( $title eq 'new' ) {
476        $text = "L<$mod> $new_v has been added to the Perl core.\n";
477      }
478
479      elsif ( $title eq 'updated' ) {
480        $text = "L<$mod> has been upgraded from version $old_v to $new_v.\n";
481        if ( $deprecated->{$mod} ) {
482          $text .= "NOTE: L<$mod> is deprecated and may be removed from a future version of Perl.\n";
483        }
484      }
485
486      elsif ( $title eq 'removed' ) {
487        $text = "C<$mod> has been removed from the Perl core.  Prior version was $old_v.\n";
488      }
489
490      $item->{text} .= "\n$text\n";
491      push @{ $section->{items} }, $item;
492    }
493    return $section;
494  }
495
496  sub sort_items_in_section {
497    my ($section) = @_;
498
499    # if we could not parse the module name, it will be uninitialized
500    # in sort. This is not a problem as it will just result in these
501    # sections being placed near the beginning of the section
502    no warnings 'uninitialized';
503    $section->{items} =
504      [ sort { lc $a->{name} cmp lc $b->{name} } @{ $section->{items} } ];
505    return $section;
506  }
507
508  # given a hashref of the form returned by corelist_delta()
509  # and a hash structured as documented in transform_pod(), it returns
510  # a pod string representation of the sections, creating sections
511  # if necessary
512  sub sections_to_pod {
513    my ( $data, %sections ) = @_;
514    my $out = '';
515
516    for (
517        (
518          [ 'New Modules and Pragmata',     'new' ],
519          [ 'Updated Modules and Pragmata', 'updated' ],
520          [ 'Removed Modules and Pragmata', 'removed' ]
521        )
522      )
523    {
524      my ( $section_name, $title ) = @{$_};
525
526      my $section = $sections{$section_name} // {
527          name           => $section_name,
528          preceding_text => "=head2 $_->[0]\n=over 4\n",
529          following_text => "=back\n",
530          items          => [],
531          manual         => 1
532      };
533
534      $section = update_section( $section, $data, $title );
535      $section = add_to_section( $section, $data, $title );
536      $section = sort_items_in_section( $section );
537
538      next if $section->{manual} and scalar @{ $section->{items} } == 0;
539
540      my $items = reduce { no warnings 'once'; $a . $b->{text} }
541        ( '', @{ $section->{items} } );
542      $out .=
543        ( $section->{preceding_text} // '' )
544        . $items
545        . ( $section->{following_text} // '' );
546    }
547    return $out;
548  }
549
550  # given a filename corresponding to an existing perldelta file
551  # and a hashref of the form returned by corelist_delta(), it
552  # returns a string of the resulting file after the module
553  # information has been added.
554  sub transform_pod {
555    my ( $existing, $data ) = @_;
556
557    # will contain hashrefs corresponding to new, updated and removed
558    # modules and pragmata keyed by section name
559    # each section is hashref of the structure
560    #   preceding_text => Text occurring before and including the over
561    #                     region containing the list of modules,
562    #   items          => [Arrayref of hashrefs corresponding to a module
563    #                      entry],
564    #     an entry has the form:
565    #       name => Module name or undef if the name could not be determined
566    #       text => The text of the entry, including the item heading
567    #
568    #   following_text => Any text not corresponding to a module
569    #                     that occurs after the first module
570    #
571    # the sections are converted to a pod string by calling sections_to_pod()
572    my %sections;
573
574    # we are in the Modules_and_Pragmata's section
575    my $in_Modules_and_Pragmata;
576
577    # we are the Modules_and_Pragmata's section but have not
578    # encountered any of the desired sections. We use this
579    # flag to determine whether we should append the text to $out
580    # or we need to delay appending until the module listings are
581    # processed and instead append to $append_to_out
582    my $in_Modules_and_Pragmata_preamble;
583
584    my $done_processing_Modules_and_Pragmata;
585
586    my $current_section;
587
588    # $nested_element_level == 0 : not in an over region, treat lines as text
589    # $nested_element_level == 1 : presumably in the top over region that
590    #                              corresponds to the module listing. Treat
591    #                              each item as a module
592    # $nested_element_level > 1  : we only consider these values when we are in an item
593    #                              We treat lines as the text of the current item.
594    my $nested_element_level = 0;
595
596    my $current_item;
597    my $need_to_parse_module_name;
598
599    my $out = '';
600    my $append_to_out = '';
601
602    open my $fh, '<', $existing or die "can't open perldelta file $existing: $!";
603    binmode($fh);
604
605    while (<$fh>) {
606      # treat the rest of the file as plain text
607      if ($done_processing_Modules_and_Pragmata) {
608        $out .= $_;
609        next;
610      }
611
612      elsif ( !$in_Modules_and_Pragmata ) {
613        # entering Modules and Pragmata
614        if (/^=head1 Modules and Pragmata/) {
615          $in_Modules_and_Pragmata          = 1;
616          $in_Modules_and_Pragmata_preamble = 1;
617        }
618        $out .= $_;
619        next;
620      }
621
622      # leaving Modules and Pragmata
623      elsif (/^=head1/) {
624        if ($current_section) {
625          push @{ $current_section->{items} }, $current_item
626            if $current_item;
627          $sections{ $current_section->{name} } = $current_section;
628        }
629        $done_processing_Modules_and_Pragmata = 1;
630        $out .=
631          sections_to_pod( $data, %sections ) . $append_to_out . $_;
632        next;
633      }
634
635      # new section in Modules and Pragmata
636      elsif (/^=head2 (.*?)$/) {
637        my $name = $1;
638        if ($current_section) {
639          push @{ $current_section->{items} }, $current_item
640            if $current_item;
641          $sections{ $current_section->{name} } = $current_section;
642          undef $current_section;
643        }
644
645        if ( is_desired_section_name($name) ) {
646          undef $in_Modules_and_Pragmata_preamble;
647          if ( $nested_element_level > 0 ) {
648            die "Unexpected head2 at line no. $.";
649          }
650          my $title = get_section_name_from_heading($name);
651          if ( exists $sections{$title} ) {
652            die "$name occurred twice at line no. $.";
653          }
654          $current_section                   = {};
655          $current_section->{name}           = $title;
656          $current_section->{preceding_text} = $_;
657          $current_section->{items}          = [];
658         $nested_element_level               = 0;
659          next;
660        }
661
662        # otherwise treat section as plain text
663        else {
664          if ($in_Modules_and_Pragmata_preamble) {
665            $out .= $_;
666          }
667          else {
668            $append_to_out .= $_;
669          }
670          next;
671        }
672      }
673
674      elsif ($current_section) {
675
676        # not in an over region
677        if ( $nested_element_level == 0 ) {
678          if (/^=over/) {
679            $nested_element_level++;
680          }
681          if ( scalar @{ $current_section->{items} } > 0 ) {
682            $current_section->{following_text} .= $_;
683          }
684          else {
685            $current_section->{preceding_text} .= $_;
686          }
687          next;
688        }
689
690        if ($current_item) {
691          if ($need_to_parse_module_name) {
692            # the item may not have a parsable module name, which means that
693            # $current_item->{name} will never be defined.
694            if (/^(?:L|C)<(.+?)>/) {
695              $current_item->{name} = $1;
696              undef $need_to_parse_module_name;
697            }
698            # =item or =back signals the end of an item
699            # block, which we handle below
700            if ( !/^=(?:item|back)/ ) {
701              $current_item->{text} .= $_;
702              next;
703            }
704          }
705          # currently in an over region
706          # treat text inside region as plain text
707          if ( $nested_element_level > 1 ) {
708            if (/^=back/) {
709              $nested_element_level--;
710            }
711            elsif (/^=over/) {
712              $nested_element_level++;
713            }
714            $current_item->{text} .= $_;
715            next;
716          }
717          # entering over region
718          if (/^=over/) {
719            $nested_element_level++;
720            $current_item->{text} .= $_;
721            next;
722          }
723          # =item or =back signals the end of an item
724          # block, which we handle below
725          if ( !/^=(?:item|back)/ ) {
726            $current_item->{text} .= $_;
727            next;
728          }
729        }
730
731        if (/^=item \*/) {
732          push @{ $current_section->{items} }, $current_item
733            if $current_item;
734          $current_item = { text => $_ };
735          $need_to_parse_module_name = 1;
736          next;
737        }
738
739        if (/^=back/) {
740          push @{ $current_section->{items} }, $current_item
741            if $current_item;
742          undef $current_item;
743          $nested_element_level--;
744        }
745
746        if ( scalar @{ $current_section->{items} } == 0 ) {
747          $current_section->{preceding_text} .= $_;
748        }
749        else {
750          $current_section->{following_text} .= $_;
751        }
752        next;
753      }
754
755      # text in Modules and Pragmata not in a head2 region
756      else {
757        if ($in_Modules_and_Pragmata_preamble) {
758          $out .= $_;
759        }
760        else {
761          $append_to_out .= $_;
762        }
763        next;
764      }
765    }
766    close $fh;
767    die 'Never saw Modules and Pragmata section' unless $in_Modules_and_Pragmata;
768    return $out;
769  }
770
771}
772
773{
774  package DeltaParser;
775  use Pod::Simple::SimpleTree;
776
777  sub new {
778    my ($class, $input) = @_;
779
780    my $self = bless {} => $class;
781
782    my $parsed_pod = Pod::Simple::SimpleTree->new->parse_file($input)->root;
783    splice @{ $parsed_pod }, 0, 2; # we don't care about the document structure,
784                                   # just the nodes within it
785
786    $self->_parse_delta($parsed_pod);
787
788    return $self;
789  }
790
791  # creates the accessor methods:
792  #   new_modules
793  #   updated_modules
794  #   removed_modules
795  for my $k (keys %sections) {
796    no strict 'refs';
797    my $m = "${k}_modules";
798    *$m = sub { $_[0]->{$m} };
799  }
800
801  sub _parse_delta {
802    my ($self, $pod) = @_;
803
804    my $new_section     = $self->_look_for_section( $pod, $sections{new} );
805    my $updated_section = $self->_look_for_section( $pod, $sections{updated} );
806    my $removed_section = $self->_look_for_section( $pod, $sections{removed} );
807
808    $self->_parse_new_section($new_section);
809    $self->_parse_updated_section($updated_section);
810    $self->_parse_removed_section($removed_section);
811
812    for (qw/new_modules updated_modules removed_modules/) {
813      $self->{$_} =
814        [ sort { lc $a->[0] cmp lc $b->[0] } @{ $self->{$_} } ];
815    }
816
817    return;
818  }
819
820  sub _parse_new_section {
821    my ($self, $section) = @_;
822
823    $self->{new_modules} = [];
824    return unless $section;
825    $self->{new_modules} = $self->_parse_section($section => sub {
826      my ($el) = @_;
827
828      my ($first, $second) = @{ $el }[2, 3];
829      my ($ver) = $second =~ /(\d[^\s]+)\s+has\s+been/;
830
831      return [ $first->[2], undef, $ver ];
832    });
833
834    return;
835  }
836
837  sub _parse_updated_section {
838    my ($self, $section) = @_;
839
840    $self->{updated_modules} = [];
841    return unless $section;
842    $self->{updated_modules} = $self->_parse_section($section => sub {
843      my ($el) = @_;
844
845      my ($first, $second) = @{ $el }[2, 3];
846      my $module = $first->[2];
847
848      # the regular expression matches the following:
849      #   from VERSION_NUMBER to VERSION_NUMBER
850      #   from VERSION_NUMBER to VERSION_NUMBER.
851      #   from version VERSION_NUMBER to version VERSION_NUMBER.
852      #   from VERSION_NUMBER to VERSION_NUMBER and MODULE from VERSION_NUMBER to VERSION_NUMBER
853      #   from VERSION_NUMBER to VERSION_NUMBER, and MODULE from VERSION_NUMBER to VERSION_NUMBER
854      #
855      # some perldeltas contain more than one module listed in an entry, this only attempts to match the
856      # first module
857      my ($old, $new) = $second =~
858          /from\s+(?:version\s+)?(\d[^\s]+)\s+to\s+(?:version\s+)?(\d[^\s,]+?)(?=[\s,]|\.\s|\.$|$).*/s;
859
860      warn "Unable to extract old or new version of $module from perldelta"
861        if !defined $old || !defined $new;
862
863      return [ $module, $old, $new ];
864    });
865
866    return;
867  }
868
869  sub _parse_removed_section {
870    my ($self, $section) = @_;
871
872    $self->{removed_modules} = [];
873    return unless $section;
874    $self->{removed_modules} = $self->_parse_section($section => sub {
875      my ($el) = @_;
876
877      my ($first, $second) = @{ $el }[2, 3];
878      my ($old) = $second =~ /was\s+(\d[^\s]+?)\.?$/;
879
880      return [ $first->[2], $old, undef ];
881    });
882
883    return;
884  }
885
886  sub _parse_section {
887    my ($self, $section, $parser) = @_;
888
889    my $items = $self->_look_down($section => sub {
890      my ($el) = @_;
891      return unless ref $el && $el->[0] =~ /^item-/
892          && @{ $el } > 2 && ref $el->[2];
893      return unless $el->[2]->[0] =~ /C|L/;
894
895      return 1;
896    });
897
898    return [map { $parser->($_) } @{ $items }];
899  }
900
901  sub _look_down {
902    my ($self, $pod, $predicate) = @_;
903    my @pod = @{ $pod };
904
905    my @l;
906    while (my $el = shift @pod) {
907      push @l, $el if $predicate->($el);
908      if (ref $el) {
909        my @el = @{ $el };
910        splice @el, 0, 2;
911        unshift @pod, @el if @el;
912      }
913    }
914
915    return @l ? \@l : undef;
916  }
917
918  sub _look_for_section {
919    my ($self, $pod, $section) = @_;
920
921    my $level;
922    $self->_look_for_range($pod,
923      sub {
924        my ($el) = @_;
925        my ($heading) = $el->[0] =~ /^head(\d)$/;
926        my $f = $heading && $el->[2] =~ /^$section/;
927        $level = $heading if $f && !$level;
928        return $f;
929      },
930      sub {
931        my ($el) = @_;
932        $el->[0] =~ /^head(\d)$/ && $1 <= $level;
933      },
934    );
935  }
936
937  sub _look_for_range {
938    my ($self, $pod, $start_predicate, $stop_predicate) = @_;
939
940    my @l;
941    for my $el (@{ $pod }) {
942      if (@l) {
943        return \@l if $stop_predicate->($el);
944      }
945      else {
946        next unless $start_predicate->($el);
947      }
948      push @l, $el;
949    }
950
951    return;
952  }
953}
954
955run;
956