xref: /openbsd/gnu/usr.bin/perl/Porting/pod_lib.pl (revision 6fb12b70)
1#!/usr/bin/perl -w
2
3use strict;
4use File::Find;
5
6=head1 NAME
7
8Porting/pod_lib.pl - functions for building and installing POD
9
10=head1 SYNOPSIS
11
12    require './Porting/pod_lib.pl';
13
14=cut
15
16=head1 DESCRIPTION
17
18This program, when C<require>d into other programs in the Perl 5 core
19distribution, provides functions useful during building and, secondarily,
20testing.
21
22As of this writing, the functions in this program are used in these other
23programs:
24
25    installman
26    installperl
27    pod/buildtoc
28    pod/perl.pod
29    Porting/new-perldelta.pl
30    Porting/pod_rules.pl
31
32Note:  Since these functions are used during the Perl build process, they must
33work with F<miniperl>.  That necessarily implies that these functions must not
34rely on XS modules, either directly or indirectly (e.g., C<autodie>).
35
36=head1 SUBROUTINES
37
38=head2 C<my_die()>
39
40=over 4
41
42=item * Purpose
43
44Exit from a process with an error code and a message.
45
46=item * Arguments
47
48List of arguments to be passed with the error message.  Example:
49
50    close $fh or my_die("close 'utils.lst': $!");
51
52=item * Return Value
53
54Exit code C<255>.
55
56=item * Comment
57
58Prints C<ABORTED> to STDERR.
59
60=back
61
62=cut
63
64# In some situations, eg cross-compiling, we get run with miniperl, so we can't use Digest::MD5
65my $has_md5;
66BEGIN {
67    use Carp;
68    $has_md5 = eval { require Digest::MD5; Digest::MD5->import('md5');  1; };
69}
70
71
72# make it clearer when we haven't run to completion, as we can be quite
73# noisy when things are working ok
74
75sub my_die {
76    print STDERR "$0: ", @_;
77    print STDERR "\n" unless $_[-1] =~ /\n\z/;
78    print STDERR "ABORTED\n";
79    exit 255;
80}
81
82=head2 C<open_or_die()>
83
84=over 4
85
86=item * Purpose
87
88Opens a file or fails if it cannot.
89
90=item * Arguments
91
92String holding filename to be opened.  Example:
93
94    $fh = open_or_die('utils.lst');
95
96=item * Return Value
97
98Handle to opened file.
99
100=back
101
102=cut
103
104sub open_or_die {
105    my $filename = shift;
106    open my $fh, '<', $filename or my_die "Can't open $filename: $!";
107    return $fh;
108}
109
110=head2 C<slurp_or_die()>
111
112=over 4
113
114=item * Purpose
115
116Read the contents of a file into memory as a single string.
117
118=item * Arguments
119
120String holding name of file to be read into memory.
121
122    $olddelta = slurp_or_die('pod/perldelta.pod');
123
124=item * Return Value
125
126String holding contents of file.
127
128=back
129
130=cut
131
132sub slurp_or_die {
133    my $filename = shift;
134    my $fh = open_or_die($filename);
135    binmode $fh;
136    local $/;
137    my $contents = <$fh>;
138    die "Can't read $filename: $!" unless defined $contents and close $fh;
139    return $contents;
140}
141
142=head2 C<write_or_die()>
143
144=over 4
145
146=item * Purpose
147
148Write out a string to a file.
149
150=item * Arguments
151
152List of two arguments:  (i) String holding name of file to be written to; (ii)
153String holding contents to be written.
154
155    write_or_die($olddeltaname, $olddelta);
156
157=item * Return Value
158
159Implicitly returns true value upon success.
160
161=back
162
163=cut
164
165sub write_or_die {
166    my ($filename, $contents) = @_;
167    open my $fh, '>', $filename or die "Can't open $filename for writing: $!";
168    binmode $fh;
169    print $fh $contents or die "Can't write to $filename: $!";
170    close $fh or die "Can't close $filename: $!";
171}
172
173=head2 C<verify_contiguous()>
174
175=over 4
176
177=item * Purpose
178
179Verify that a file contains exactly one contiguous run of lines which matches
180the passed in pattern. C<croak()>s if the pattern is not found, or found in
181more than one place.
182
183=item * Arguments
184
185=over 4
186
187=item * Name of file
188
189=item * Contents of file
190
191=item * Pattern of interest
192
193=item * Name to report on error
194
195=back
196
197=item * Return Value
198
199The contents of the file, with C<qr/\0+/> substituted for the pattern.
200
201=back
202
203=cut
204
205sub verify_contiguous {
206    my ($name, $content, $re, $what) = @_;
207    require Carp;
208    $content =~ s/$re/\0/g;
209    my $sections = () = $content =~ m/\0+/g;
210    Carp::croak("$0: $name contains no $what") if $sections < 1;
211    Carp::croak("$0: $name contains discontiguous $what") if $sections > 1;
212    return $content;
213}
214
215=head2 C<process()>
216
217=over 4
218
219=item * Purpose
220
221Read a file from disk, pass the contents to the callback, and either update
222the file on disk (if changed) or generate TAP output to confirm that the
223version on disk is up to date. C<die>s if the file contains any C<NUL> bytes.
224This permits the callback routine to use C<NUL> bytes as placeholders while
225manipulating the file's contents.
226
227=item * Arguments
228
229=over 4
230
231=item * Description for use in error messages
232
233=item * Name of file
234
235=item * Callback
236
237Passed description and file contents, should return updated file contents.
238
239=item * Test number
240
241If defined, generate TAP output to C<STDOUT>. If defined and false, generate
242an unnumbered test. Otherwise this is the test number in the I<ok> line.
243
244=item * Verbose flag
245
246If true, generate verbose output.
247
248=back
249
250=item * Return Value
251
252Does not return anything.
253
254=back
255
256=cut
257
258sub process {
259    my ($desc, $filename, $callback, $test, $verbose) = @_;
260
261    print "Now processing $filename\n" if $verbose;
262    my $orig = slurp_or_die($filename);
263    my_die "$filename contains NUL bytes" if $orig =~ /\0/;
264
265    my $new = $callback->($desc, $orig);
266
267    if (defined $test) {
268        printf "%s%s # $filename is up to date\n",
269            ($new eq $orig ? 'ok' : 'not ok'), ($test ? " $test" : '');
270        return;
271    } elsif ($new eq $orig) {
272        print "Was not modified\n"
273            if $verbose;
274        return;
275    }
276
277    my $mode = (stat $filename)[2];
278    my_die "Can't stat $filename: $!"
279        unless defined $mode;
280    rename $filename, "$filename.old"
281        or my_die "Can't rename $filename to $filename.old: $!";
282
283    write_or_die($filename, $new);
284    chmod $mode & 0777, $filename or my_die "can't chmod $mode $filename: $!";
285}
286
287=head2 C<pods_to_install()>
288
289=over 4
290
291=item * Purpose
292
293Create a lookup table holding information about PODs to be installed.
294
295=item * Arguments
296
297None.
298
299=item * Return Value
300
301Reference to a hash with a structure like this:
302
303    $found = {
304      'MODULE' => {
305        'CPAN::Bundle' => 'lib/CPAN/Bundle.pm',
306        'Locale::Codes::Script_Retired' =>
307            'lib/Locale/Codes/Script_Retired.pm',
308        'Pod::Simple::DumpAsText' =>
309            'lib/Pod/Simple/DumpAsText.pm',
310        # ...
311        'Locale::Codes::LangVar' =>
312            'lib/Locale/Codes/LangVar.pod'
313      },
314      'PRAGMA' => {
315        'fields' => 'lib/fields.pm',
316        'subs' => 'lib/subs.pm',
317        # ...
318      },
319
320=item * Comment
321
322Broadly speaking, the function assembles a list of all F<.pm> and F<.pod>
323files in the distribution and then excludes certain files from installation.
324
325=back
326
327=cut
328
329sub pods_to_install {
330    # manpages not to be installed
331    my %do_not_install = map { ($_ => 1) }
332        qw(Pod::Functions XS::APItest XS::Typemap);
333
334    my (%done, %found);
335
336    File::Find::find({no_chdir=>1,
337                      wanted => sub {
338                          if (m!/t\z!) {
339                              ++$File::Find::prune;
340                              return;
341                          }
342
343                          # $_ is $File::Find::name when using no_chdir
344                          return unless m!\.p(?:m|od)\z! && -f $_;
345                          return if m!lib/Net/FTP/.+\.pm\z!; # Hi, Graham! :-)
346                          # Skip .pm files that have corresponding .pod files
347                          return if s!\.pm\z!.pod! && -e $_;
348                          s!\.pod\z!!;
349                          s!\Alib/!!;
350                          s!/!::!g;
351
352                          my_die("Duplicate files for $_, '$done{$_}' and '$File::Find::name'")
353                              if exists $done{$_};
354                          $done{$_} = $File::Find::name;
355
356                          return if $do_not_install{$_};
357                          return if is_duplicate_pod($File::Find::name);
358                          $found{/\A[a-z]/ ? 'PRAGMA' : 'MODULE'}{$_}
359                              = $File::Find::name;
360                      }}, 'lib');
361    return \%found;
362}
363
364my %state = (
365             # Don't copy these top level READMEs
366             ignore => {
367                        micro => 1,
368                        # vms => 1,
369                       },
370            );
371
372{
373    my (%Lengths, %MD5s);
374
375    sub is_duplicate_pod {
376        my $file = shift;
377        local $_;
378
379        return if !$has_md5;
380
381        # Initialise the list of possible source files on the first call.
382        unless (%Lengths) {
383            __prime_state() unless $state{master};
384            foreach (@{$state{master}}) {
385                next unless $_->[2]{dual};
386                # This is a dual-life perl*.pod file, which will have be copied
387                # to lib/ by the build process, and hence also found there.
388                # These are the only pod files that might become duplicated.
389                ++$Lengths{-s $_->[1]};
390                ++$MD5s{md5(slurp_or_die($_->[1]))};
391            }
392        }
393
394        # We are a file in lib. Are we a duplicate?
395        # Don't bother calculating the MD5 if there's no interesting file of
396        # this length.
397        return $Lengths{-s $file} && $MD5s{md5(slurp_or_die($file))};
398    }
399}
400
401sub __prime_state {
402    my $source = 'perldelta.pod';
403    my $filename = "pod/$source";
404    my $contents = slurp_or_die($filename);
405    my @want =
406        $contents =~ /perldelta - what is new for perl v(5)\.(\d+)\.(\d+)\n/;
407    die "Can't extract version from $filename" unless @want;
408    my $delta_leaf = join '', 'perl', @want, 'delta';
409    $state{delta_target} = "$delta_leaf.pod";
410    $state{delta_version} = \@want;
411
412    # This way round so that keys can act as a MANIFEST skip list
413    # Targets will always be in the pod directory. Currently we can only cope
414    # with sources being in the same directory.
415    $state{copies}{$state{delta_target}} = $source;
416
417    # The default flags if none explicitly set for the current file.
418    my $current_flags = '';
419    my (%flag_set, @paths);
420
421    my $master = open_or_die('pod/perl.pod');
422
423    while (<$master>) {
424        last if /^=begin buildtoc$/;
425    }
426    die "Can't find '=begin buildtoc':" if eof $master;
427
428    while (<$master>) {
429        next if /^$/ or /^#/;
430        last if /^=end buildtoc/;
431        my ($command, @args) = split ' ';
432        if ($command eq 'flag') {
433            # For the named pods, use these flags, instead of $current_flags
434            my $flags = shift @args;
435            my_die("Malformed flag $flags")
436                unless $flags =~ /\A=([a-z]*)\z/;
437            $flag_set{$_} = $1 foreach @args;
438        } elsif ($command eq 'path') {
439            # If the pod's name matches the regex, prepend the given path.
440            my_die("Malformed path for /$args[0]/")
441                unless @args == 2;
442            push @paths, [qr/\A$args[0]\z/, $args[1]];
443        } elsif ($command eq 'aux') {
444            # The contents of perltoc.pod's "AUXILIARY DOCUMENTATION" section
445            $state{aux} = [sort @args];
446        } else {
447            my_die("Unknown buildtoc command '$command'");
448        }
449    }
450
451    foreach (<$master>) {
452        next if /^$/ or /^#/;
453        next if /^=head2/;
454        last if /^=for buildtoc __END__$/;
455
456        if (my ($action, $flags) = /^=for buildtoc flag ([-+])([a-z]+)/) {
457            if ($action eq '+') {
458                $current_flags .= $flags;
459            } else {
460                my_die("Attempt to unset [$flags] failed - flags are '$current_flags")
461                    unless $current_flags =~ s/[\Q$flags\E]//g;
462            }
463        } elsif (my ($leafname, $desc) = /^\s+(\S+)\s+(.*)/) {
464            my $podname = $leafname;
465            my $filename = "pod/$podname.pod";
466            foreach (@paths) {
467                my ($re, $path) = @$_;
468                if ($leafname =~ $re) {
469                    $podname = $path . $leafname;
470                    $filename = "$podname.pod";
471                    last;
472                }
473            }
474
475            # Keep this compatible with pre-5.10
476            my $flags = delete $flag_set{$leafname};
477            $flags = $current_flags unless defined $flags;
478
479            my %flags;
480            $flags{toc_omit} = 1 if $flags =~ tr/o//d;
481            $flags{dual} = $podname ne $leafname;
482
483            $state{generated}{"$podname.pod"}++ if $flags =~ tr/g//d;
484
485            if ($flags =~ tr/r//d) {
486                my $readme = $podname;
487                $readme =~ s/^perl//;
488                $state{readmes}{$readme} = $desc;
489                $flags{readme} = 1;
490            } else {
491                $state{pods}{$podname} = $desc;
492            }
493            my_die "Unknown flag found in section line: $_" if length $flags;
494
495            push @{$state{master}},
496                [$leafname, $filename, \%flags];
497
498            if ($podname eq 'perldelta') {
499                local $" = '.';
500                push @{$state{master}},
501                    [$delta_leaf, "pod/$state{delta_target}"];
502                $state{pods}{$delta_leaf} = "Perl changes in version @want";
503            }
504
505        } else {
506            my_die("Malformed line: $_");
507        }
508    }
509    close $master or my_die("close pod/perl.pod: $!");
510    # This has to be special-cased somewhere. Turns out this is cleanest:
511    push @{$state{master}}, ['a2p', 'x2p/a2p.pod', {toc_omit => 1}];
512
513    my_die("perl.pod sets flags for unknown pods: "
514           . join ' ', sort keys %flag_set)
515        if keys %flag_set;
516}
517
518=head2 C<get_pod_metadata()>
519
520=over 4
521
522=item * Purpose
523
524=item * Arguments
525
526List of one or more arguments.
527
528=over 4
529
530=item * Boolean true or false
531
532=item * Reference to a subroutine.
533
534=item * Various other arguments.
535
536=back
537
538Example:
539
540    $state = get_pod_metadata(
541        0, sub { warn @_ if @_ }, 'pod/perltoc.pod');
542
543    get_pod_metadata(
544        1, sub { warn @_ if @_ }, values %Build);
545
546=item * Return Value
547
548Hash reference; each element provides either a list or a lookup table for
549information about various types of POD files.
550
551  'aux'             => [ # utility programs like
552                            'h2xs' and 'perlbug' ]
553  'generated'       => { # lookup table for generated POD files
554                            like 'perlapi.pod' }
555  'ignore'          => { # lookup table for files to be ignored }
556  'pods'            => { # lookup table in "name" =>
557                            "short description" format }
558  'readmes'         => { # lookup table for OS-specific
559                            and other READMEs }
560  'delta_version'   => [ # major version number, minor no.,
561                            patch no. ]
562  'delta_target'    => 'perl<Mmmpp>delta.pod',
563  'master'          => [ # list holding entries for files callable
564                        by 'perldoc' ]
565  'copies'          => { # patch version perldelta =>
566                        minor version perldelta }
567
568=back
569
570=cut
571
572sub get_pod_metadata {
573    # Do we expect to find generated pods on disk?
574    my $permit_missing_generated = shift;
575    # Do they want a consistency report?
576    my $callback = shift;
577    local $_;
578
579    __prime_state() unless $state{master};
580    return \%state unless $callback;
581
582    my %BuildFiles;
583
584    foreach my $path (@_) {
585        $path =~ m!([^/]+)$!;
586        ++$BuildFiles{$1};
587    }
588
589    # Sanity cross check
590
591    my (%disk_pods, %manipods, %manireadmes);
592    my (%cpanpods, %cpanpods_leaf);
593    my (%our_pods);
594
595    # There are files that we don't want to list in perl.pod.
596    # Maybe the various stub manpages should be listed there.
597    my %ignoredpods = map { ( "$_.pod" => 1 ) } qw( );
598
599    # Convert these to a list of filenames.
600    ++$our_pods{"$_.pod"} foreach keys %{$state{pods}};
601    foreach (@{$state{master}}) {
602        ++$our_pods{"$_->[0].pod"}
603            if $_->[2]{readme};
604    }
605
606    opendir my $dh, 'pod';
607    while (defined ($_ = readdir $dh)) {
608        next unless /\.pod\z/;
609        ++$disk_pods{$_};
610    }
611
612    # Things we copy from won't be in perl.pod
613    # Things we copy to won't be in MANIFEST
614
615    my $mani = open_or_die('MANIFEST');
616    while (<$mani>) {
617        chomp;
618        s/\s+.*$//;
619        if (m!^pod/([^.]+\.pod)!i) {
620            ++$manipods{$1};
621        } elsif (m!^README\.(\S+)!i) {
622            next if $state{ignore}{$1};
623            ++$manireadmes{"perl$1.pod"};
624        } elsif (exists $our_pods{$_}) {
625            ++$cpanpods{$_};
626            m!([^/]+)$!;
627            ++$cpanpods_leaf{$1};
628            $disk_pods{$_}++
629                if -e $_;
630        }
631    }
632    close $mani or my_die "close MANIFEST: $!\n";
633
634    # Are we running before known generated files have been generated?
635    # (eg in a clean checkout)
636    my %not_yet_there;
637    if ($permit_missing_generated) {
638        # If so, don't complain if these files aren't yet in place
639        %not_yet_there = (%manireadmes, %{$state{generated}}, %{$state{copies}})
640    }
641
642    my @inconsistent;
643    foreach my $i (sort keys %disk_pods) {
644        push @inconsistent, "$0: $i exists but is unknown by buildtoc\n"
645            unless $our_pods{$i} || $ignoredpods{$i};
646        push @inconsistent, "$0: $i exists but is unknown by MANIFEST\n"
647            if !$BuildFiles{'MANIFEST'} # Ignore if we're rebuilding MANIFEST
648                && !$manipods{$i} && !$manireadmes{$i} && !$state{copies}{$i}
649                    && !$state{generated}{$i} && !$cpanpods{$i};
650    }
651    foreach my $i (sort keys %our_pods) {
652        push @inconsistent, "$0: $i is known by buildtoc but does not exist\n"
653            unless $disk_pods{$i} or $BuildFiles{$i} or $not_yet_there{$i};
654    }
655    unless ($BuildFiles{'MANIFEST'}) {
656        # Again, ignore these if we're about to rebuild MANIFEST
657        foreach my $i (sort keys %manipods) {
658            push @inconsistent, "$0: $i is known by MANIFEST but does not exist\n"
659                unless $disk_pods{$i};
660            push @inconsistent, "$0: $i is known by MANIFEST but is marked as generated\n"
661                if $state{generated}{$i};
662        }
663    }
664    &$callback(@inconsistent);
665    return \%state;
666}
667
6681;
669
670# Local variables:
671# cperl-indent-level: 4
672# indent-tabs-mode: nil
673# End:
674#
675# ex: set ts=8 sts=4 sw=4 et:
676