1#!/usr/bin/perl -w
2#
3# Unconditionally regenerate:
4#
5#    pod/perlintern.pod
6#    pod/perlapi.pod
7#
8# from information stored in
9#
10#    embed.fnc
11#    plus all the .c and .h files listed in MANIFEST
12#
13# Has an optional arg, which is the directory to chdir to before reading
14# MANIFEST and *.[ch].
15#
16# This script is invoked as part of 'make all'
17#
18# '=head1' are the only headings looked for.  If the first non-blank line after
19# the heading begins with a word character, it is considered to be the first
20# line of documentation that applies to the heading itself.  That is, it is
21# output immediately after the heading, before the first function, and not
22# indented. The next input line that is a pod directive terminates this
23# heading-level documentation.
24
25# The meanings of the flags fields in embed.fnc and the source code is
26# documented at the top of embed.fnc.
27
28use strict;
29
30if (@ARGV) {
31    my $workdir = shift;
32    chdir $workdir
33        or die "Couldn't chdir to '$workdir': $!";
34}
35require './regen/regen_lib.pl';
36require './regen/embed_lib.pl';
37
38my @specialized_docs = sort qw( perlguts
39                                perlxs
40                                perlxstut
41                                perlclib
42                                warnings
43                                perlapio
44                                perlcall
45                                perlfilter
46                                perlmroapi
47                                config.h
48                              );
49sub name_in_pod($) {
50    my $name = shift;
51    return "F<$name>" if $name =~ /\./;
52    return "L<$name>";
53}
54my $other_places_api = join " ",    map { name_in_pod($_) } sort @specialized_docs, 'perlintern';
55my $other_places_intern = join " ", map { name_in_pod($_) } sort @specialized_docs, 'perlapi';
56
57@specialized_docs = map { name_in_pod($_) } sort @specialized_docs;
58$specialized_docs[-1] =~ s/^/and /;
59my $specialized_docs = join ", ", @specialized_docs;
60
61#
62# See database of global and static function prototypes in embed.fnc
63# This is used to generate prototype headers under various configurations,
64# export symbols lists for different platforms, and macros to provide an
65# implicit interpreter context argument.
66#
67
68my %docs;
69my %seen;
70my %funcflags;
71my %missing;
72
73my $curheader = "Unknown section";
74
75sub autodoc ($$) { # parse a file and extract documentation info
76    my($fh,$file) = @_;
77    my($in, $doc, $line, $header_doc);
78
79    # Count lines easier
80    my $get_next_line = sub { $line++; return <$fh> };
81
82FUNC:
83    while (defined($in = $get_next_line->())) {
84        if ($in=~ /^=head1 (.*)/) {
85            $curheader = $1;
86
87            # If the next non-space line begins with a word char, then it is
88            # the start of heading-level documentation.
89            if (defined($doc = $get_next_line->())) {
90                # Skip over empty lines
91                while ($doc =~ /^\s+$/) {
92                    if (! defined($doc = $get_next_line->())) {
93                        next FUNC;
94                    }
95                }
96
97                if ($doc !~ /^\w/) {
98                    $in = $doc;
99                    redo FUNC;
100                }
101                $header_doc = $doc;
102
103                # Continue getting the heading-level documentation until read
104                # in any pod directive (or as a fail-safe, find a closing
105                # comment to this pod in a C language file
106HDR_DOC:
107                while (defined($doc = $get_next_line->())) {
108                    if ($doc =~ /^=\w/) {
109                        $in = $doc;
110                        redo FUNC;
111                    }
112
113                    if ($doc =~ m:^\s*\*/$:) {
114                        warn "=cut missing? $file:$line:$doc";;
115                        last HDR_DOC;
116                    }
117                    $header_doc .= $doc;
118                }
119            }
120            next FUNC;
121        }
122
123        # Parentheses are used to accept anything that looks like 'for
124        # apidoc', and later verify that things are the actual correct syntax.
125        my $apidoc_re = qr/^(\s*)(=?)(\s*)for(\s*)apidoc(\s*)(.*?)\s*\n/;
126
127        if ($in =~ /^=for comment/) {
128            $in = $get_next_line->();
129            if ($in =~ /skip apidoc/) {   # Skips the next apidoc-like line
130                while (defined($in = $get_next_line->())) {
131                    last if $in =~ $apidoc_re;
132                }
133            }
134            next FUNC;
135        }
136
137        if ($in =~ $apidoc_re) {
138            my $is_in_proper_form = length $1 == 0
139                                 && length $2 > 0
140                                 && length $3 == 0
141                                 && length $4 > 0
142                                 && length $5 > 0
143                                 && length $6 > 0;
144            my $proto_in_file = $6;
145            my $proto = $proto_in_file;
146            $proto = "||$proto" unless $proto =~ /\|/;
147            my($flags, $ret, $name, @args) = split /\s*\|\s*/, $proto;
148            $name && $is_in_proper_form or die <<EOS;
149Bad apidoc at $file line $.:
150  $in
151Expected:
152  =for apidoc flags|returntype|name|arg|arg|...
153  =for apidoc flags|returntype|name
154  =for apidoc name
155EOS
156            die "flag $1 is not legal (for function $name (from $file))"
157                        if $flags =~ / ( [^AabCDdEefhiMmNnTOoPpRrSsUuWXx] ) /x;
158            next FUNC if $flags =~ /h/;
159
160            die "'u' flag must also have 'm' flag' for $name" if $flags =~ /u/ && $flags !~ /m/;
161            warn ("'$name' not \\w+ in '$proto_in_file' in $file")
162                        if $flags !~ /N/ && $name !~ / ^ [_[:alpha:]] \w* $ /x;
163
164            if (exists $seen{$name}) {
165                die ("'$name' in $file was already documented in $seen{$name}");
166            }
167            else {
168                $seen{$name} = $file;
169            }
170
171            my $docs = "";
172DOC:
173            while (defined($doc = $get_next_line->())) {
174
175                # Other pod commands are considered part of the current
176                # function's docs, so can have lists, etc.
177                last DOC if $doc =~ /^=(cut|for\s+apidoc|head)/;
178                if ($doc =~ m:^\*/$:) {
179                    warn "=cut missing? $file:$line:$doc";;
180                    last DOC;
181                }
182                $docs .= $doc;
183            }
184            $docs = "\n$docs" if $docs and $docs !~ /^\n/;
185
186            # If the entry is also in embed.fnc, it should be defined
187            # completely there, but not here
188            my $embed_docref = delete $funcflags{$name};
189            if ($embed_docref and %$embed_docref) {
190                warn "embed.fnc entry overrides redundant information in"
191                   . " '$proto_in_file' in $file" if $flags || $ret || @args;
192                $flags = $embed_docref->{'flags'};
193                warn "embed.fnc entry '$name' missing 'd' flag"
194                                                            unless $flags =~ /d/;
195                next FUNC if $flags =~ /h/;
196                $ret = $embed_docref->{'retval'};
197                @args = @{$embed_docref->{args}};
198            } elsif ($flags !~ /m/)  { # Not in embed.fnc, is missing if not a
199                                       # macro
200                $missing{$name} = $file;
201            }
202
203            my $inline_where = $flags =~ /A/ ? 'api' : 'guts';
204
205            if (exists $docs{$inline_where}{$curheader}{$name}) {
206                warn "$0: duplicate API entry for '$name' in $inline_where/$curheader\n";
207                next;
208            }
209            $docs{$inline_where}{$curheader}{$name}
210                = [$flags, $docs, $ret, $file, @args];
211
212            # Create a special entry with an empty-string name for the
213            # heading-level documentation.
214            if (defined $header_doc) {
215                $docs{$inline_where}{$curheader}{""} = $header_doc;
216                undef $header_doc;
217            }
218
219            if (defined $doc) {
220                if ($doc =~ /^=(?:for|head)/) {
221                    $in = $doc;
222                    redo FUNC;
223                }
224            } else {
225                warn "$file:$line:$in";
226            }
227        }
228    }
229}
230
231sub docout ($$$) { # output the docs for one function
232    my($fh, $name, $docref) = @_;
233    my($flags, $docs, $ret, $file, @args) = @$docref;
234    $name =~ s/\s*$//;
235
236    if ($flags =~ /D/) {
237        $docs = "\n\nDEPRECATED!  It is planned to remove this function from a
238future release of Perl.  Do not use it for new code; remove it from
239existing code.\n\n$docs";
240    }
241    else {
242        $docs = "\n\nNOTE: this function is experimental and may change or be
243removed without notice.\n\n$docs" if $flags =~ /x/;
244    }
245
246    # Is Perl_, but no #define foo # Perl_foo
247    my $p = $flags =~ /p/ && $flags =~ /o/ && $flags !~ /M/;
248
249    $docs .= "NOTE: the perl_ form of this function is deprecated.\n\n"
250         if $flags =~ /O/;
251    if ($p) {
252        $docs .= "NOTE: this function must be explicitly called as Perl_$name";
253        $docs .= " with an aTHX_ parameter" if $flags !~ /T/;
254        $docs .= ".\n\n"
255    }
256
257    print $fh "=item $name\nX<$name>\n$docs";
258
259    if ($flags =~ /U/) { # no usage
260        warn("U and s flags are incompatible") if $flags =~ /s/;
261        # nothing
262    } else {
263        if ($flags =~ /n/) { # no args
264            warn("n flag without m") unless $flags =~ /m/;
265            warn("n flag but apparently has args") if @args;
266            print $fh "\t$ret\t$name";
267        } else { # full usage
268            my $n            = "Perl_"x$p . $name;
269            my $large_ret    = length $ret > 7;
270            my $indent_size  = 7+8 # nroff: 7 under =head + 8 under =item
271                            +8+($large_ret ? 1 + length $ret : 8)
272                            +length($n) + 1;
273            my $indent;
274            print $fh "\t$ret" . ($large_ret ? ' ' : "\t") . "$n(";
275            my $long_args;
276            for (@args) {
277                if ($indent_size + 2 + length > 79) {
278                    $long_args=1;
279                    $indent_size -= length($n) - 3;
280                    last;
281                }
282            }
283            my $args = '';
284            if ($flags !~ /T/ && ($p || ($flags =~ /m/ && $name =~ /^Perl_/))) {
285                $args = @args ? "pTHX_ " : "pTHX";
286                if ($long_args) { print $fh $args; $args = '' }
287            }
288            $long_args and print $fh "\n";
289            my $first = !$long_args;
290            while () {
291                if (!@args or
292                    length $args
293                    && $indent_size + 3 + length($args[0]) + length $args > 79
294                ) {
295                    print $fh
296                    $first ? '' : (
297                        $indent //=
298                        "\t".($large_ret ? " " x (1+length $ret) : "\t")
299                        ." "x($long_args ? 4 : 1 + length $n)
300                    ),
301                    $args, (","x($args ne 'pTHX_ ') . "\n")x!!@args;
302                    $args = $first = '';
303                }
304                @args or last;
305                $args .= ", "x!!(length $args && $args ne 'pTHX_ ')
306                    . shift @args;
307            }
308            if ($long_args) { print $fh "\n", substr $indent, 0, -4 }
309            print $fh ")";
310        }
311        print $fh ";" if $flags =~ /s/; # semicolon "dTHR;"
312        print $fh "\n\n";
313    }
314    print $fh "=for hackers\nFound in file $file\n\n";
315}
316
317sub sort_helper {
318    # Do a case-insensitive dictionary sort, with only alphabetics
319    # significant, falling back to using everything for determinancy
320    return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
321           || uc($a) cmp uc($b)
322           || $a cmp $b;
323}
324
325sub output {
326    my ($podname, $header, $dochash, $missing, $footer) = @_;
327    #
328    # strip leading '|' from each line which had been used to hide
329    # pod from pod checkers.
330    s/^\|//gm for $header, $footer;
331
332    my $fh = open_new("pod/$podname.pod", undef,
333                      {by => "$0 extracting documentation",
334                       from => 'the C source files'}, 1);
335
336    print $fh $header;
337
338    my $key;
339    for $key (sort sort_helper keys %$dochash) {
340        my $section = $dochash->{$key};
341        print $fh "\n=head1 $key\n\n";
342
343        # Output any heading-level documentation and delete so won't get in
344        # the way later
345        if (exists $section->{""}) {
346            print $fh $section->{""} . "\n";
347            delete $section->{""};
348        }
349        print $fh "=over 8\n\n";
350
351        for my $key (sort sort_helper keys %$section) {
352            docout($fh, $key, $section->{$key});
353        }
354        print $fh "\n=back\n";
355    }
356
357    if (@$missing) {
358        print $fh "\n=head1 Undocumented functions\n\n";
359    print $fh $podname eq 'perlapi' ? <<'_EOB_' : <<'_EOB_';
360The following functions have been flagged as part of the public API,
361but are currently undocumented.  Use them at your own risk, as the
362interfaces are subject to change.  Functions that are not listed in this
363document are not intended for public use, and should NOT be used under any
364circumstances.
365
366If you feel you need to use one of these functions, first send email to
367L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.  It may be
368that there is a good reason for the function not being documented, and it
369should be removed from this list; or it may just be that no one has gotten
370around to documenting it.  In the latter case, you will be asked to submit a
371patch to document the function.  Once your patch is accepted, it will indicate
372that the interface is stable (unless it is explicitly marked otherwise) and
373usable by you.
374_EOB_
375The following functions are currently undocumented.  If you use one of
376them, you may wish to consider creating and submitting documentation for
377it.
378_EOB_
379    print $fh "\n=over\n\n";
380
381    for my $missing (sort @$missing) {
382        print $fh "=item $missing\nX<$missing>\n\n";
383    }
384    print $fh "=back\n\n";
385}
386    print $fh $footer, "=cut\n";
387
388    read_only_bottom_close_and_rename($fh);
389}
390
391foreach (@{(setup_embed())[0]}) {
392    next if @$_ < 2;
393    my ($flags, $retval, $func, @args) = @$_;
394    s/\b(?:NN|NULLOK)\b\s+//g for @args;
395
396    $funcflags{$func} = {
397                         flags => $flags,
398                         retval => $retval,
399                         args => \@args,
400                        };
401}
402
403# glob() picks up docs from extra .c or .h files that may be in unclean
404# development trees.
405open my $fh, '<', 'MANIFEST'
406    or die "Can't open MANIFEST: $!";
407while (my $line = <$fh>) {
408    next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
409
410    open F, '<', $file or die "Cannot open $file for docs: $!\n";
411    $curheader = "Functions in file $file\n";
412    autodoc(\*F,$file);
413    close F or die "Error closing $file: $!\n";
414}
415close $fh or die "Error whilst reading MANIFEST: $!";
416
417for (sort keys %funcflags) {
418    next unless $funcflags{$_}{flags} =~ /d/;
419    next if $funcflags{$_}{flags} =~ /h/;
420    warn "no docs for $_\n"
421}
422
423foreach (sort keys %missing) {
424    warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
425}
426
427# walk table providing an array of components in each line to
428# subroutine, printing the result
429
430# List of funcs in the public API that aren't also marked as core-only,
431# experimental nor deprecated.
432my @missing_api = grep $funcflags{$_}{flags} =~ /A/
433                    && $funcflags{$_}{flags} !~ /[xD]/
434                    && !$docs{api}{$_}, keys %funcflags;
435output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
436|=encoding UTF-8
437|
438|=head1 NAME
439|
440|perlapi - autogenerated documentation for the perl public API
441|
442|=head1 DESCRIPTION
443|X<Perl API> X<API> X<api>
444|
445|This file contains most of the documentation of the perl public API, as
446|generated by F<embed.pl>.  Specifically, it is a listing of functions,
447|macros, flags, and variables that may be used by extension writers.  Some
448|specialized items are instead documented in $specialized_docs.
449|
450|L<At the end|/Undocumented functions> is a list of functions which have yet
451|to be documented.  Patches welcome!  The interfaces of these are subject to
452|change without notice.
453|
454|Anything not listed here is not part of the public API, and should not be
455|used by extension writers at all.  For these reasons, blindly using functions
456|listed in proto.h is to be avoided when writing extensions.
457|
458|In Perl, unlike C, a string of characters may generally contain embedded
459|C<NUL> characters.  Sometimes in the documentation a Perl string is referred
460|to as a "buffer" to distinguish it from a C string, but sometimes they are
461|both just referred to as strings.
462|
463|Note that all Perl API global variables must be referenced with the C<PL_>
464|prefix.  Again, those not listed here are not to be used by extension writers,
465|and can be changed or removed without notice; same with macros.
466|Some macros are provided for compatibility with the older,
467|unadorned names, but this support may be disabled in a future release.
468|
469|Perl was originally written to handle US-ASCII only (that is characters
470|whose ordinal numbers are in the range 0 - 127).
471|And documentation and comments may still use the term ASCII, when
472|sometimes in fact the entire range from 0 - 255 is meant.
473|
474|The non-ASCII characters below 256 can have various meanings, depending on
475|various things.  (See, most notably, L<perllocale>.)  But usually the whole
476|range can be referred to as ISO-8859-1.  Often, the term "Latin-1" (or
477|"Latin1") is used as an equivalent for ISO-8859-1.  But some people treat
478|"Latin1" as referring just to the characters in the range 128 through 255, or
479|somethimes from 160 through 255.
480|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
481|
482|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
483|L<perlebcdic>).  Most of the documentation (and even comments in the code)
484|ignore the EBCDIC possibility.
485|For almost all purposes the differences are transparent.
486|As an example, under EBCDIC,
487|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
488|whenever this documentation refers to C<utf8>
489|(and variants of that name, including in function names),
490|it also (essentially transparently) means C<UTF-EBCDIC>.
491|But the ordinals of characters differ between ASCII, EBCDIC, and
492|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
493|number of bytes than in UTF-8.
494|
495|The listing below is alphabetical, case insensitive.
496|
497_EOB_
498|
499|=head1 AUTHORS
500|
501|Until May 1997, this document was maintained by Jeff Okamoto
502|<okamoto\@corp.hp.com>.  It is now maintained as part of Perl itself.
503|
504|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
505|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
506|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
507|Stephen McCamant, and Gurusamy Sarathy.
508|
509|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
510|
511|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
512|
513|=head1 SEE ALSO
514|
515$other_places_api
516_EOE_
517
518# List of non-static internal functions
519my @missing_guts =
520 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
521
522output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
523|=head1 NAME
524|
525|perlintern - autogenerated documentation of purely B<internal>
526|Perl functions
527|
528|=head1 DESCRIPTION
529|X<internal Perl functions> X<interpreter functions>
530|
531|This file is the autogenerated documentation of functions in the
532|Perl interpreter that are documented using Perl's internal documentation
533|format but are not marked as part of the Perl API.  In other words,
534|B<they are not for use in extensions>!
535|
536_EOB_
537|
538|=head1 AUTHORS
539|
540|The autodocumentation system was originally added to the Perl core by
541|Benjamin Stuhl.  Documentation is by whoever was kind enough to
542|document their functions.
543|
544|=head1 SEE ALSO
545|
546$other_places_intern
547_EOE_
548