1#!/usr/bin/perl -w
2
3use Text::Tabs;
4#
5# Unconditionally regenerate:
6#
7#    pod/perlintern.pod
8#    pod/perlapi.pod
9#
10# from information stored in
11#
12#    embed.fnc
13#    plus all the core .c, .h, and .pod files listed in MANIFEST
14#
15# Has an optional arg, which is the directory to chdir to before reading
16# MANIFEST and the files
17#
18# This script is invoked as part of 'make all'
19#
20# The generated pod consists of sections of related elements, functions,
21# macros, and variables.  The keys of %valid_sections give the current legal
22# ones.  Just add a new key to add a section.
23#
24# Throughout the files read by this script are lines like
25#
26# =for apidoc_section Section Name
27# =for apidoc_section $section_name_variable
28#
29# "Section Name" (after having been stripped of leading space) must be one of
30# the legal section names, or an error is thrown.  $section_name_variable must
31# be one of the legal section name variables defined below; these expand to
32# legal section names.  This form is used so that minor wording changes in
33# these titles can be confied to this file.  All the names of the variables
34# end in '_scn'; this suffix is optional in the apidoc_section lines.
35#
36# All API elements defined between this line and the next 'apidoc_section'
37# line will go into the section "Section Name" (or $section_name_variable),
38# sorted by dictionary order within it.  perlintern and perlapi are parallel
39# documents, each potentially with a section "Section Name".  Each element is
40# marked as to which document it goes into.  If there are none for a
41# particular section in perlapi, that section is omitted.
42#
43# Also, in .[ch] files, there may be
44#
45# =head1 Section Name
46#
47# lines in comments.  These are also used by this program to switch to section
48# "Section Name".  The difference is that if there are any lines after the
49# =head1, inside the same comment, and before any =for apidoc-ish lines, they
50# are used as a heading for section "Section Name" (in both perlintern and
51# perlapi).  This includes any =head[2-5].  If more than one '=head1 Section
52# Name' line has content, they appear in the generated pod in an undefined
53# order.  Note that you can't use a $section_name_variable in =head1 lines
54#
55# The next =head1, =for apidoc_section, or file end terminates what goes into
56# the current section
57#
58# The %valid_sections hash below also can have header content, which will
59# appear before any =head1 content.  The hash can also have footer content
60# content, which will appear at the end of the section, after all the
61# elements.
62#
63# The lines that define the actual functions, etc are documented in embed.fnc,
64# because they have flags which must be kept in sync with that file.
65
66use strict;
67use warnings;
68
69# 80 column terminal - 2 for pager adding 2 columns; -4 for indent for
70# non-heading lines;
71my $max_width = 80 - 2 - 4;
72
73if (@ARGV) {
74    my $workdir = shift;
75    chdir $workdir
76        or die "Couldn't chdir to '$workdir': $!";
77}
78require './regen/regen_lib.pl';
79require './regen/embed_lib.pl';
80
81my %described_elsewhere;
82
83#
84# See database of global and static function prototypes in embed.fnc
85# This is used to generate prototype headers under various configurations,
86# export symbols lists for different platforms, and macros to provide an
87# implicit interpreter context argument.
88#
89
90my %docs;
91my %seen;
92my %funcflags;
93my %missing;
94my %missing_macros;
95
96my $link_text = "Described in";
97
98my $description_indent = 4;
99my $usage_indent = 3;   # + initial blank yields 4 total
100
101my $AV_scn = 'AV Handling';
102my $callback_scn = 'Callback Functions';
103my $casting_scn = 'Casting';
104my $casing_scn = 'Character case changing';
105my $classification_scn = 'Character classification';
106my $names_scn = 'Character names';
107my $scope_scn = 'Compile-time scope hooks';
108my $compiler_scn = 'Compiler and Preprocessor information';
109my $directives_scn = 'Compiler directives';
110my $concurrency_scn = 'Concurrency';
111my $COP_scn = 'COP Hint Hashes';
112my $CV_scn = 'CV Handling';
113my $custom_scn = 'Custom Operators';
114my $debugging_scn = 'Debugging';
115my $display_scn = 'Display functions';
116my $embedding_scn = 'Embedding and Interpreter Cloning';
117my $errno_scn = 'Errno';
118my $exceptions_scn = 'Exception Handling (simple) Macros';
119my $filesystem_scn = 'Filesystem configuration values';
120my $floating_scn = 'Floating point configuration values';
121my $formats_scn = 'Formats';
122my $genconfig_scn = 'General Configuration';
123my $globals_scn = 'Global Variables';
124my $GV_scn = 'GV Handling';
125my $hook_scn = 'Hook manipulation';
126my $HV_scn = 'HV Handling';
127my $io_scn = 'Input/Output';
128my $integer_scn = 'Integer configuration values';
129my $lexer_scn = 'Lexer interface';
130my $locale_scn = 'Locales';
131my $magic_scn = 'Magic';
132my $memory_scn = 'Memory Management';
133my $MRO_scn = 'MRO';
134my $multicall_scn = 'Multicall Functions';
135my $numeric_scn = 'Numeric Functions';
136my $optree_construction_scn = 'Optree construction';
137my $optree_manipulation_scn = 'Optree Manipulation Functions';
138my $pack_scn = 'Pack and Unpack';
139my $pad_scn = 'Pad Data Structures';
140my $password_scn = 'Password and Group access';
141my $paths_scn = 'Paths to system commands';
142my $prototypes_scn = 'Prototype information';
143my $regexp_scn = 'REGEXP Functions';
144my $signals_scn = 'Signals';
145my $site_scn = 'Site configuration';
146my $sockets_scn = 'Sockets configuration values';
147my $filters_scn = 'Source Filters';
148my $stack_scn = 'Stack Manipulation Macros';
149my $string_scn = 'String Handling';
150my $SV_flags_scn = 'SV Flags';
151my $SV_scn = 'SV Handling';
152my $time_scn = 'Time';
153my $typedefs_scn = 'Typedef names';
154my $unicode_scn = 'Unicode Support';
155my $utility_scn = 'Utility Functions';
156my $versioning_scn = 'Versioning';
157my $warning_scn = 'Warning and Dieing';
158my $XS_scn = 'XS';
159
160# Kept separate at end
161my $undocumented_scn = 'Undocumented elements';
162
163my %valid_sections = (
164    $AV_scn => {},
165    $callback_scn => {},
166    $casting_scn => {},
167    $casing_scn => {},
168    $classification_scn => {},
169    $scope_scn => {},
170    $compiler_scn => {},
171    $directives_scn => {},
172    $concurrency_scn => {},
173    $COP_scn => {},
174    $CV_scn => {
175        header => <<~'EOT',
176            This section documents functions to manipulate CVs which are
177            code-values, meaning subroutines.  For more information, see
178            L<perlguts>.
179            EOT
180    },
181
182    $custom_scn => {},
183    $debugging_scn => {},
184    $display_scn => {},
185    $embedding_scn => {},
186    $errno_scn => {},
187    $exceptions_scn => {},
188    $filesystem_scn => {
189        header => <<~'EOT',
190            Also see L</List of capability HAS_foo symbols>.
191            EOT
192        },
193    $floating_scn => {
194        header => <<~'EOT',
195            Also L</List of capability HAS_foo symbols> lists capabilities
196            that arent in this section.  For example C<HAS_ASINH>, for the
197            hyperbolic sine function.
198            EOT
199        },
200    $formats_scn => {
201        header => <<~'EOT',
202            These are used for formatting the corresponding type For example,
203            instead of saying
204
205             Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
206
207            use
208
209             Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
210
211            This keeps you from having to know if, say an IV, needs to be
212            printed as C<%d>, C<%ld>, or something else.
213            EOT
214      },
215    $genconfig_scn => {
216        header => <<~'EOT',
217            This section contains configuration information not otherwise
218            found in the more specialized sections of this document.  At the
219            end is a list of C<#defines> whose name should be enough to tell
220            you what they do, and a list of #defines which tell you if you
221            need to C<#include> files to get the corresponding functionality.
222            EOT
223
224        footer => <<~'EOT',
225
226            =head2 List of capability C<HAS_I<foo>> symbols
227
228            This is a list of those symbols that dont appear elsewhere in ths
229            document that indicate if the current platform has a certain
230            capability.  Their names all begin with C<HAS_>.  Only those
231            symbols whose capability is directly derived from the name are
232            listed here.  All others have their meaning expanded out elsewhere
233            in this document.  This (relatively) compact list is because we
234            think that the expansion would add little or no value and take up
235            a lot of space (because there are so many).  If you think certain
236            ones should be expanded, send email to
237            L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
238
239            Each symbol here will be C<#define>d if and only if the platform
240            has the capability.  If you need more detail, see the
241            corresponding entry in F<config.h>.  For convenience, the list is
242            split so that the ones that indicate there is a reentrant version
243            of a capability are listed separately
244
245            __HAS_LIST__
246
247            And, the reentrant capabilities:
248
249            __HAS_R_LIST__
250
251            Example usage:
252
253            =over
254
255             #ifdef HAS_STRNLEN
256               use strnlen()
257             #else
258               use an alternative implementation
259             #endif
260
261            =back
262
263            =head2 List of C<#include> needed symbols
264
265            This list contains symbols that indicate if certain C<#include>
266            files are present on the platform.  If your code accesses the
267            functionality that one of these is for, you will need to
268            C<#include> it if the symbol on this list is C<#define>d.  For
269            more detail, see the corresponding entry in F<config.h>.
270
271            __INCLUDE_LIST__
272
273            Example usage:
274
275            =over
276
277             #ifdef I_WCHAR
278               #include <wchar.h>
279             #endif
280
281            =back
282            EOT
283      },
284    $globals_scn => {},
285    $GV_scn => {},
286    $hook_scn => {},
287    $HV_scn => {},
288    $io_scn => {},
289    $integer_scn => {},
290    $lexer_scn => {},
291    $locale_scn => {},
292    $magic_scn => {},
293    $memory_scn => {},
294    $MRO_scn => {},
295    $multicall_scn => {},
296    $numeric_scn => {},
297    $optree_construction_scn => {},
298    $optree_manipulation_scn => {},
299    $pack_scn => {},
300    $pad_scn => {},
301    $password_scn => {},
302    $paths_scn => {},
303    $prototypes_scn => {},
304    $regexp_scn => {},
305    $signals_scn => {},
306    $site_scn => {
307        header => <<~'EOT',
308            These variables give details as to where various libraries,
309            installation destinations, I<etc.>, go, as well as what various
310            installation options were selected
311            EOT
312      },
313    $sockets_scn => {},
314    $filters_scn => {},
315    $stack_scn => {},
316    $string_scn => {
317        header => <<~EOT,
318            See also C<L</$unicode_scn>>.
319            EOT
320      },
321    $SV_flags_scn => {},
322    $SV_scn => {},
323    $time_scn => {},
324    $typedefs_scn => {},
325    $unicode_scn => {
326        header => <<~EOT,
327            L<perlguts/Unicode Support> has an introduction to this API.
328
329            See also C<L</$classification_scn>>,
330            C<L</$casing_scn>>,
331            and C<L</$string_scn>>.
332            Various functions outside this section also work specially with
333            Unicode.  Search for the string "utf8" in this document.
334            EOT
335      },
336    $utility_scn => {},
337    $versioning_scn => {},
338    $warning_scn => {},
339    $XS_scn => {},
340);
341
342# Somewhat loose match for an apidoc line so we can catch minor typos.
343# Parentheses are used to capture portions so that below we verify
344# that things are the actual correct syntax.
345my $apidoc_re = qr/ ^ (\s*)            # $1
346                      (=?)             # $2
347                      (\s*)            # $3
348                      for (\s*)        # $4
349                      apidoc (_item)?  # $5
350                      (\s*)            # $6
351                      (.*?)            # $7
352                      \s* \n /x;
353# Only certain flags, dealing with display, are acceptable for apidoc_item
354my $display_flags = "fFnDopsT";
355
356sub check_api_doc_line ($$) {
357    my ($file, $in) = @_;
358
359    return unless $in =~ $apidoc_re;
360
361    my $is_item = defined $5;
362    my $is_in_proper_form = length $1 == 0
363                         && length $2 > 0
364                         && length $3 == 0
365                         && length $4 > 0
366                         && length $6 > 0
367                         && length $7 > 0;
368    my $proto_in_file = $7;
369    my $proto = $proto_in_file;
370    $proto = "||$proto" if $proto !~ /\|/;
371    my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
372
373    $name && $is_in_proper_form or die <<EOS;
374Bad apidoc at $file line $.:
375  $in
376Expected:
377  =for apidoc flags|returntype|name|arg|arg|...
378  =for apidoc flags|returntype|name
379  =for apidoc name
380(or 'apidoc_item')
381EOS
382
383    die "Only [$display_flags] allowed in apidoc_item:\n$in"
384                            if $is_item && $flags =~ /[^$display_flags]/;
385
386    return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
387}
388
389sub embed_override($) {
390    my ($element_name) = shift;
391
392    # If the entry is also in embed.fnc, it should be defined
393    # completely there, but not here
394    my $embed_docref = delete $funcflags{$element_name};
395
396    return unless $embed_docref and %$embed_docref;
397
398    my $flags = $embed_docref->{'flags'};
399    warn "embed.fnc entry '$element_name' missing 'd' flag"
400                                            unless $flags =~ /d/;
401
402    return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
403}
404
405# The section that is in effect at the beginning of the given file.  If not
406# listed here, an apidoc_section line must precede any apidoc lines.
407# This allows the files listed here that generally are single-purpose, to not
408# have to worry about the autodoc section
409my %initial_file_section = (
410                            'av.c' => $AV_scn,
411                            'av.h' => $AV_scn,
412                            'cv.h' => $CV_scn,
413                            'doio.c' => $io_scn,
414                            'gv.c' => $GV_scn,
415                            'gv.h' => $GV_scn,
416                            'hv.h' => $HV_scn,
417                            'locale.c' => $locale_scn,
418                            'malloc.c' => $memory_scn,
419                            'numeric.c' => $numeric_scn,
420                            'opnames.h' => $optree_construction_scn,
421                            'pad.h'=> $pad_scn,
422                            'patchlevel.h' => $versioning_scn,
423                            'perlio.h' => $io_scn,
424                            'pod/perlapio.pod' => $io_scn,
425                            'pod/perlcall.pod' => $callback_scn,
426                            'pod/perlembed.pod' => $embedding_scn,
427                            'pod/perlfilter.pod' => $filters_scn,
428                            'pod/perliol.pod' => $io_scn,
429                            'pod/perlmroapi.pod' => $MRO_scn,
430                            'pod/perlreguts.pod' => $regexp_scn,
431                            'pp_pack.c' => $pack_scn,
432                            'pp_sort.c' => $SV_scn,
433                            'regcomp.c' => $regexp_scn,
434                            'regexp.h' => $regexp_scn,
435                            'unicode_constants.h' => $unicode_scn,
436                            'utf8.c' => $unicode_scn,
437                            'utf8.h' => $unicode_scn,
438                            'vutil.c' => $versioning_scn,
439                           );
440
441sub autodoc ($$) { # parse a file and extract documentation info
442    my($fh,$file) = @_;
443    my($in, $line_num, $header, $section);
444
445    $section = $initial_file_section{$file}
446                                    if defined $initial_file_section{$file};
447
448    my $file_is_C = $file =~ / \. [ch] $ /x;
449
450    # Count lines easier
451    my $get_next_line = sub { $line_num++; return <$fh> };
452
453    # Read the file
454    while ($in = $get_next_line->()) {
455        last unless defined $in;
456
457        next unless (    $in =~ / ^ =for [ ]+ apidoc /x
458                                      # =head1 lines only have effect in C files
459                     || ($file_is_C && $in =~ /^=head1/));
460
461        # Here, the line introduces a portion of the input that we care about.
462        # Either it is for an API element, or heading text which we expect
463        # will be used for elements later in the file
464
465        my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
466        my (@args, @items);
467
468        # If the line starts a new section ...
469        if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
470
471            $section = $1;
472            if ($section =~ / ^ \$ /x) {
473                $section .= '_scn' unless $section =~ / _scn $ /;
474                $section = eval "$section";
475                die "Unknown \$section variable '$section' in $file: $@" if $@;
476            }
477            die "Unknown section name '$section' in $file near line $.\n"
478                                    unless defined $valid_sections{$section};
479
480        }
481        elsif ($in=~ /^ =for [ ]+ apidoc \B /x) {   # Otherwise better be a
482                                                    # plain apidoc line
483            die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
484            die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
485        }
486        else {  # Plain apidoc
487
488            ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
489                                                = check_api_doc_line($file, $in);
490            # Override this line with any info in embed.fnc
491            my ($embed_flags, $embed_ret_type, @embed_args)
492                                                = embed_override($element_name);
493            if ($embed_ret_type) {
494                warn "embed.fnc entry overrides redundant information in"
495                    . " '$proto_in_file' in $file"
496                                               if $flags || $ret_type || @args;
497                $flags = $embed_flags;
498                $ret_type = $embed_ret_type;
499                @args = @embed_args;
500            }
501            elsif ($flags !~ /[my]/)  { # Not in embed.fnc, is missing if not
502                                        # a macro or typedef
503                $missing{$element_name} = $file;
504            }
505
506            die "flag '$1' is not legal (for function $element_name (from $file))"
507                        if $flags =~ / ( [^AabCDdEeFfGhiMmNnTOoPpRrSsUuWXxy] ) /x;
508
509            die "'u' flag must also have 'm' or 'y' flags' for $element_name"
510                                            if $flags =~ /u/ && $flags !~ /[my]/;
511            warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
512                        if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
513
514            if (exists $seen{$element_name} && $flags !~ /h/) {
515                die ("'$element_name' in $file was already documented in $seen{$element_name}");
516            }
517            else {
518                $seen{$element_name} = $file;
519            }
520        }
521
522        # Here we have processed the initial line in the heading text or API
523        # element, and have saved the important information from it into the
524        # corresponding variables.  Now accumulate the text that applies to it
525        # up to a terminating line, which is one of:
526        # 1) =cut
527        # 2) =head (in a C file only =head1)
528        # 3) an end comment line in a C file: m:^\s*\*/:
529        # 4) =for apidoc... (except apidoc_item lines)
530        $text = "";
531        my $head_ender_num = ($file_is_C) ? 1 : "";
532        while (defined($in = $get_next_line->())) {
533
534            last if $in =~ /^=cut/x;
535            last if $in =~ /^=head$head_ender_num/;
536
537            if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
538
539                # End of comment line in C files is a fall-back terminator,
540                # but warn only if there actually is some accumulated text
541                warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
542                last;
543            }
544
545            if ($in !~ / ^ =for [ ]+ apidoc /x) {
546                $text .= $in;
547                next;
548            }
549
550            # Here, the line is an apidoc line.  All but apidoc_item terminate
551            # the text being accumulated.
552            last if $in =~ / ^ =for [ ]+ apidoc_section /x;
553
554            my ($item_name, $item_flags, $item_ret_type, $is_item,
555                    $item_proto, @item_args) = check_api_doc_line($file, $in);
556            last unless $is_item;
557
558            # Here, is an apidoc_item_line; They can only come within apidoc
559            # paragraphs.
560            die "Unexpected api_doc_item line '$item_proto'"
561                                                        unless $element_name;
562
563            # We accept blank lines between these, but nothing else;
564            die "apidoc_item lines must immediately follow apidoc lines for "
565              . " '$element_name' in $file"
566                                                            if $text =~ /\S/;
567            # Override this line with any info in embed.fnc
568            my ($embed_flags, $embed_ret_type, @embed_args)
569                                                = embed_override($item_name);
570            if ($embed_ret_type) {
571                warn "embed.fnc entry overrides redundant information in"
572                    . " '$item_proto' in $file"
573                                if $item_flags || $item_ret_type || @item_args;
574
575                $item_flags = $embed_flags;
576                $item_ret_type = $embed_ret_type;
577                @item_args = @embed_args;
578            }
579
580            # Use the base entry flags if none for this item; otherwise add in
581            # any non-display base entry flags.
582            if ($item_flags) {
583                $item_flags .= $flags =~ s/[$display_flags]//rg;
584            }
585            else {
586                $item_flags = $flags;
587            }
588            $item_ret_type = $ret_type unless $item_ret_type;
589            @item_args = @args unless @item_args;
590            push @items, { name     => $item_name,
591                           ret_type => $item_ret_type,
592                           flags    => $item_flags,
593                           args     => [ @item_args ],
594                         };
595
596            # This line shows that this element is documented.
597            delete $funcflags{$item_name};
598        }
599
600        # Here, are done accumulating the text for this item.  Trim it
601        $text =~ s/ ^ \s* //x;
602        $text =~ s/ \s* $ //x;
603        $text .= "\n" if $text ne "";
604
605        # And treat all-spaces as nothing at all
606        undef $text unless $text =~ /\S/;
607
608        if ($element_name) {
609
610            # Here, we have accumulated into $text, the pod for $element_name
611            my $where = $flags =~ /A/ ? 'api' : 'guts';
612
613            $section = "Functions in file $file" unless defined $section;
614            die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
615                                                    unless defined $section;
616            if (exists $docs{$where}{$section}{$element_name}) {
617                warn "$0: duplicate API entry for '$element_name' in"
618                    . " $where/$section\n";
619                next;
620            }
621
622            # Override the text with just a link if the flags call for that
623            my $is_link_only = ($flags =~ /h/);
624            if ($is_link_only) {
625                if ($file_is_C) {
626                    die "Can't currently handle link with items to it:\n$in" if @items;
627                    redo;    # Don't put anything if C source
628                }
629
630                # Here, is an 'h' flag in pod.  We add a reference to the pod (and
631                # nothing else) to perlapi/intern.  (It would be better to add a
632                # reference to the correct =item,=header, but something that makes
633                # it harder is that it that might be a duplicate, like '=item *';
634                # so that is a future enhancement XXX.  Another complication is
635                # there might be more than one deserving candidates.)
636                my $podname = $file =~ s!.*/!!r;    # Rmv directory name(s)
637                $podname =~ s/\.pod//;
638                $text = "Described in L<$podname>.\n";
639
640                # Don't output a usage example for linked to documentation if
641                # it is trivial (has no arguments) and we aren't to add a
642                # semicolon
643                $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
644
645                # Keep track of all the pod files that we refer to.
646                push $described_elsewhere{$podname}->@*, $podname;
647            }
648
649            $docs{$where}{$section}{$element_name}{flags} = $flags;
650            $docs{$where}{$section}{$element_name}{pod} = $text;
651            $docs{$where}{$section}{$element_name}{file} = $file;
652            $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
653            push $docs{$where}{$section}{$element_name}{args}->@*, @args;
654            push $docs{$where}{$section}{$element_name}{items}->@*, @items;
655        }
656        elsif ($text) {
657            $valid_sections{$section}{header} = "" unless
658                                    defined $valid_sections{$section}{header};
659            $valid_sections{$section}{header} .= "\n$text";
660        }
661
662        # We already have the first line of what's to come in $in
663        redo;
664
665    } # End of loop through input
666}
667
668my %configs;
669my @has_defs;
670my @has_r_defs;     # Reentrant symbols
671my @include_defs;
672
673sub parse_config_h {
674    use re '/aa';   # Everthing is ASCII in this file
675
676    # Process config.h
677    my $config_h = 'config.h';
678    $config_h = 'win32/config.h' unless -e $config_h;
679    die "Can't find $config_h" unless -e $config_h;
680    open my $fh, '<', $config_h or die "Can't open $config_h: $!";
681    while (<$fh>) {
682
683        # Look for lines like /* FOO_BAR:
684        # By convention all config.h descriptions begin like that
685        if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
686            my $name = $1;
687
688            # Here we are starting the description for $name in config.h.  We
689            # accumulate the entire description for it into @description.
690            # Flowing text from one input line to another is appended into the
691            # same array element to make a single flowing line element, but
692            # verbatim lines are kept as separate elements in @description.
693            # This will facilitate later doing pattern matching without regard
694            # to line boundaries on non-verbatim text.
695
696            die "Multiple config.h entries for '$name'"
697                                        if defined $configs{$name}{description};
698
699            # Get first line of description
700            $_ = <$fh>;
701
702            # Each line in the description begins with blanks followed by '/*'
703            # and some spaces.
704            die "Unexpected config.h initial line for $name: '$_'"
705                                            unless s/ ^ ( \s* \* \s* ) //x;
706            my $initial_text = $1;
707
708            # Initialize the description with this first line (after having
709            # stripped the prefix text)
710            my @description = $_;
711
712            # The first line is used as a template for how much indentation
713            # each normal succeeding line has.  Lines indented further
714            # will be considered as intended to be verbatim.  But, empty lines
715            # likely won't have trailing blanks, so just strip the whole thing
716            # for them.
717            my $strip_initial_qr = qr!   \s* \* \s* $
718                                    | \Q$initial_text\E
719                                    !x;
720            $configs{$name}{verbatim} = 0;
721
722            # Read in the remainder of the description
723            while (<$fh>) {
724                last if s| ^ \s* \* / ||x;  # A '*/' ends it
725
726                die "Unexpected config.h description line for $name: '$_'"
727                                                unless s/$strip_initial_qr//;
728
729                # Fix up the few flawed lines in config.h wherein a new
730                # sentence begins with a tab (and maybe a space after that).
731                # Although none of them currently do, let it recognize
732                # something like
733                #
734                #   "... text").  The next sentence ...
735                #
736                s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1  $2/xg;
737
738                # If this line has extra indentation or looks to have columns,
739                # it should be treated as verbatim.  Columns are indicated by
740                # use of interior: tabs, 3 spaces in a row, or even 2 spaces
741                # not preceded by punctuation.
742                if ($_ !~ m/  ^ \s
743                              | \S (?:                    \t
744                                    |                     \s{3}
745                                    |  (*nlb:[[:punct:]]) \s{2}
746                                   )
747                           /x)
748                {
749                    # But here, is not a verbatim line.  Add an empty line if
750                    # this is the first non-verbatim after a run of verbatims
751                    if ($description[-1] =~ /^\s/) {
752                        push @description, "\n", $_;
753                    }
754                    else {  # Otherwise, append this flowing line to the
755                            # current flowing line
756                        $description[-1] .= $_;
757                    }
758                }
759                else {
760                    $configs{$name}{verbatim} = 1;
761
762                    # The first verbatim line in a run of them is separated by an
763                    # empty line from the flowing lines above it
764                    push @description, "\n" if $description[-1] =~ /^\S/;
765
766                    $_ = Text::Tabs::expand($_);
767
768                    # Only a single space so less likely to wrap
769                    s/ ^ \s* / /x;
770
771                    push @description, $_;
772                }
773            }
774
775            push $configs{$name}{description}->@*, @description
776
777        }   # Not a description; see if it is a macro definition.
778        elsif (m! ^
779                  (?: / \* )?                   # Optional commented-out
780                                                # indication
781                      \# \s* define \s+ ( \w+ ) # $1 is the name
782                  (   \s* )                     # $2 indicates if args or not
783                  (   .*? )                     # $3 is any definition
784                  (?: / \s* \* \* / )?          # Optional trailing /**/ or / **/
785                  $
786                !x)
787        {
788            my $name = $1;
789
790            # There can be multiple definitions for a name.  We want to know
791            # if any of them has arguments, and if any has a body.
792            $configs{$name}{has_args} //= $2 eq "";
793            $configs{$name}{has_args} ||= $2 eq "";
794            $configs{$name}{has_defn} //= $3 ne "";
795            $configs{$name}{has_defn} ||= $3 ne "";
796        }
797    }
798
799    # We now have stored the description and information about every #define
800    # in the file.  The description is in a form convenient to operate on to
801    # convert to pod.  Do that now.
802    foreach my $name (keys %configs) {
803        next unless defined $configs{$name}{description};
804
805        # All adjacent non-verbatim lines of the description are appended
806        # together in a single element in the array.  This allows the patterns
807        # to work across input line boundaries.
808
809        my $pod = "";
810        while (defined ($_ = shift $configs{$name}{description}->@*)) {
811            chomp;
812
813            if (/ ^ \S /x) {  # Don't edit verbatim lines
814
815                # Enclose known file/path names not already so enclosed
816                # with <...>.  (Some entries in config.h are already
817                # '<path/to/file>')
818                my $file_name_qr = qr! [ \w / ]+ \.
819                                    (?: c | h | xs | p [lm] | pmc | PL
820                                        | sh | SH | exe ) \b
821                                    !xx;
822                my $path_name_qr = qr! (?: / \w+ )+ !x;
823                for my $re ($file_name_qr, $path_name_qr) {
824                    s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
825                }
826
827                # Enclose <... file/path names with F<...> (but no double
828                # angle brackets)
829                for my $re ($file_name_qr, $path_name_qr) {
830                    s! < ( $re ) > !F<$1>!gxx;
831                }
832
833                # Explain metaconfig units
834                s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
835
836                # Convert "See foo" to "See C<L</foo>>" if foo is described in
837                # this file.  Also create a link to the known file INSTALL.
838                # And, to be more general, handle "See also foo and bar", and
839                # "See also foo, bar, and baz"
840                while (m/ \b [Ss]ee \s+
841                         (?: also \s+ )?    ( \w+ )
842                         (?: ,  \s+         ( \w+ ) )?
843                         (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
844                    my @links = $1;
845                    push @links, $2 if defined $2;
846                    push @links, $3 if defined $3;
847                    foreach my $link (@links) {
848                        if ($link eq 'INSTALL') {
849                            s/ \b INSTALL \b /C<L<INSTALL>>/xg;
850                        }
851                        elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
852                            s| \b $link \b |C<L</$link>>|xg;
853                            $configs{$link}{linked} = 1;
854                            $configs{$name}{linked} = 1;
855                        }
856                    }
857                }
858
859                # Enclose what we think are symbols with C<...>.
860                no warnings 'experimental::vlb';
861                s/ (*nlb:<)
862                   (
863                        # Any word followed immediately with parens or
864                        # brackets
865                        \b \w+ (?: \( [^)]* \)    # parameter list
866                                 | \[ [^]]* \]    # or array reference
867                               )
868                    | (*plb: ^ | \s ) -D \w+    # Also -Dsymbols.
869                    | \b (?: struct | union ) \s \w+
870
871                        # Words that contain underscores (which are
872                        # definitely not text) or three uppercase letters in
873                        # a row.  Length two ones, like IV, aren't enclosed,
874                        # because they often don't look as nice.
875                    | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
876                   )
877                    (*nla:>)
878                 /C<$1>/xg;
879
880                # These include foo when the name is HAS_foo.  This is a
881                # heuristic which works in most cases.
882                if ($name =~ / ^ HAS_ (.*) /x) {
883                    my $symbol = lc $1;
884
885                    # Don't include path components, nor things already in
886                    # <>, or with trailing '(', '['
887                    s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
888                }
889            }
890
891            $pod .=  "$_\n";
892        }
893        delete $configs{$name}{description};
894
895        $configs{$name}{pod} = $pod;
896    }
897
898    # Now have converted the description to pod.  We also now have enough
899    # information that we can do cross checking to find definitions without
900    # corresponding pod, and see if they are mentioned in some description;
901    # otherwise they aren't documented.
902  NAME:
903    foreach my $name (keys %configs) {
904
905        # A definition without pod
906        if (! defined $configs{$name}{pod}) {
907
908            # Leading/trailing underscore means internal to config.h, e.g.,
909            # _GNU_SOURCE
910            next if $name =~ / ^ _ /x;
911            next if $name =~ / _ $ /x;
912
913            # MiXeD case names are internal to config.h; the first 4
914            # characters are sufficient to determine this
915            next if $name =~ / ^ [[:upper:]] [[:lower:]]
916                                 [[:upper:]] [[:lower:]]
917                            /x;
918
919            # Here, not internal to config.h.  Look to see if this symbol is
920            # mentioned in the pod of some other.  If so, assume it is
921            # documented.
922            foreach my $check_name (keys %configs) {
923                my $this_element = $configs{$check_name};
924                my $this_pod = $this_element->{pod};
925                if (defined $this_pod) {
926                    next NAME if $this_pod =~ / \b $name \b /x;
927                }
928            }
929
930            warn "$name has no documentation\n";
931            $missing_macros{$name} = 'config.h';
932
933            next;
934        }
935
936        my $has_defn = $configs{$name}{has_defn};
937        my $has_args = $configs{$name}{has_args};
938
939        # Check if any section already has an entry for this element.
940        # If so, it better be a placeholder, in which case we replace it
941        # with this entry.
942        foreach my $section (keys $docs{'api'}->%*) {
943            if (exists $docs{'api'}{$section}{$name}) {
944                my $was = $docs{'api'}{$section}{$name}->{pod};
945                $was = "" unless $was;
946                chomp $was;
947                if ($was ne "" && $was !~ m/$link_text/) {
948                    die "Multiple descriptions for $name\n"
949                        . "$section contained '$was'";
950                }
951                $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
952                $configs{$name}{section} = $section;
953                last;
954            }
955        }
956
957        my $handled = 0;    # Haven't handled this yet
958
959        if (defined $configs{$name}{'section'}) {
960            # This has been taken care of elsewhere.
961            $handled = 1;
962        }
963        else {
964            my $flags = "";
965            if ($has_defn && ! $has_args) {
966                $configs{$name}{args} = 1;
967            }
968
969            # Symbols of the form I_FOO are for #include files.  They have
970            # special usage information
971            if ($name =~ / ^ I_ ( .* ) /x) {
972                my $file = lc $1 . '.h';
973                $configs{$name}{usage} = <<~"EOT";
974                    #ifdef $name
975                        #include <$file>
976                    #endif
977                    EOT
978            }
979
980            # Compute what section this variable should go into.  This
981            # heuristic was determined by manually inspecting the current
982            # things in config.h, and should be adjusted as necessary as
983            # deficiencies are found.
984            #
985            # This is the default section for macros with a definiton but
986            # no arguments, meaning it is replaced unconditionally
987            #
988            my $sb = qr/ _ | \b /x; # segment boundary
989            my $dash_or_spaces = qr/ - | \s+ /x;
990            my $pod = $configs{$name}{pod};
991            if ($name =~ / ^ USE_ /x) {
992                $configs{$name}{'section'} = $site_scn;
993            }
994            elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
995            {
996                $configs{$name}{'section'} = $time_scn;
997            }
998            elsif (   $name =~ / ^ [[:alpha:]]+ f $ /x
999                   && $configs{$name}{pod} =~ m/ \b format \b /ix)
1000            {
1001                $configs{$name}{'section'} = $formats_scn;
1002            }
1003            elsif ($name =~ /  DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1004                            | $sb CASTFLAGS $sb
1005                            | QUADMATH
1006                            | $sb (?: IS )? NAN
1007                            | $sb (?: IS )? FINITE
1008                            /x)
1009            {
1010                $configs{$name}{'section'} =
1011                                    $floating_scn;
1012            }
1013            elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
1014                $configs{$name}{'section'} = $filesystem_scn;
1015            }
1016            elsif (   $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1017                   || $configs{$name}{pod} =~ m/ \b align /x)
1018            {
1019                $configs{$name}{'section'} = $compiler_scn;
1020            }
1021            elsif ($name =~ / ^ [IU] [ \d V ]
1022                            | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1023            {
1024                $configs{$name}{'section'} = $integer_scn;
1025            }
1026            elsif ($name =~ / $sb t $sb /x) {
1027                $configs{$name}{'section'} = $typedefs_scn;
1028                $flags .= 'y';
1029            }
1030            elsif (   $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1031                    && $configs{$name}{pod} =~ m/ \b format \b /ix)
1032            {
1033                $configs{$name}{'section'} = $formats_scn;
1034            }
1035            elsif ($name =~ / BACKTRACE /x) {
1036                $configs{$name}{'section'} = $debugging_scn;
1037            }
1038            elsif ($name =~ / ALLOC $sb /x) {
1039                $configs{$name}{'section'} = $memory_scn;
1040            }
1041            elsif (   $name =~ /   STDIO | FCNTL | EOF | FFLUSH
1042                                | $sb FILE $sb
1043                                | $sb DIR $sb
1044                                | $sb LSEEK
1045                                | $sb INO $sb
1046                                | $sb OPEN
1047                                | $sb CLOSE
1048                                | ^ DIR
1049                                | ^ INO $sb
1050                                | DIR $
1051                                | FILENAMES
1052                                /x
1053                    || $configs{$name}{pod} =~ m!  I/O | stdio
1054                                                | file \s+ descriptor
1055                                                | file \s* system
1056                                                | statfs
1057                                                !x)
1058            {
1059                $configs{$name}{'section'} = $filesystem_scn;
1060            }
1061            elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
1062                $configs{$name}{'section'} = $signals_scn;
1063            }
1064            elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
1065                $configs{$name}{'section'} = $prototypes_scn;
1066            }
1067            elsif (   $name =~ / ^ LOC_ /x
1068                    || $configs{$name}{pod} =~ /full path/i)
1069            {
1070                $configs{$name}{'section'} = $paths_scn;
1071            }
1072            elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
1073                $configs{$name}{'section'} = $locale_scn;
1074            }
1075            elsif ($configs{$name}{pod} =~ /  GCC | C99 | C\+\+ /xi) {
1076                $configs{$name}{'section'} = $compiler_scn;
1077            }
1078            elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1079            {
1080                $configs{$name}{'section'} = $password_scn;
1081            }
1082            elsif ($name =~ /  SOCKET | $sb SOCK /x) {
1083                $configs{$name}{'section'} = $sockets_scn;
1084            }
1085            elsif (   $name =~ / THREAD | MULTIPLICITY /x
1086                    || $configs{$name}{pod} =~ m/ \b pthread /ix)
1087            {
1088                $configs{$name}{'section'} = $concurrency_scn;
1089            }
1090            elsif ($name =~ /  PERL | ^ PRIV | SITE | ARCH | BIN
1091                                | VENDOR | ^ USE
1092                            /x)
1093            {
1094                $configs{$name}{'section'} = $site_scn;
1095            }
1096            elsif (   $pod =~ / \b floating $dash_or_spaces point \b /ix
1097                   || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1098                   || $pod =~ / \b doubles \b /ix
1099                   || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
1100            {
1101                $configs{$name}{'section'} =
1102                                    $floating_scn;
1103            }
1104            else {
1105                # Above are the specific sections.  The rest go into a
1106                # grab-bag of general configuration values.  However, we put
1107                # two classes of them into lists of their names, without their
1108                # descriptions, when we think that the description doesn't add
1109                # any real value.  One list contains the #include variables:
1110                # the description is basically boiler plate for each of these.
1111                # The other list contains the very many things that are of the
1112                # form HAS_foo, and \bfoo\b is contained in its description,
1113                # and there is no verbatim text in the pod or links to/from it
1114                # (which would add value).  That means that it is likely the
1115                # intent of the variable can be gleaned from just its name,
1116                # and unlikely the description adds signficant value, so just
1117                # listing them suffices.  Giving their descriptions would
1118                # expand this pod significantly with little added value.
1119                if (   ! $has_defn
1120                    && ! $configs{$name}{verbatim}
1121                    && ! $configs{$name}{linked})
1122                {
1123                    if ($name =~ / ^ I_ ( .* ) /x) {
1124                        push @include_defs, $name;
1125                        next;
1126                    }
1127                    elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1128                        my $canonical_name = $1;
1129                        $canonical_name =~ s/_//g;
1130
1131                        my $canonical_pod = $configs{$name}{pod};
1132                        $canonical_pod =~ s/_//g;
1133
1134                        if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1135                            if ($name =~ / $sb R $sb /x) {
1136                                push @has_r_defs, $name;
1137                            }
1138                            else {
1139                                push @has_defs, $name;
1140                            }
1141                            next;
1142                        }
1143                    }
1144                }
1145
1146                $configs{$name}{'section'} = $genconfig_scn;
1147            }
1148
1149            my $section = $configs{$name}{'section'};
1150            die "Internal error: '$section' not in \%valid_sections"
1151                            unless grep { $_ eq $section } keys %valid_sections;
1152            $flags .= 'AdmnT';
1153            $flags .= 'U' unless defined $configs{$name}{usage};
1154
1155            # All the information has been gathered; save it
1156            $docs{'api'}{$section}{$name}{flags} = $flags;
1157            $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1158            $docs{'api'}{$section}{$name}{ret_type} = "";
1159            $docs{'api'}{$section}{$name}{file} = 'config.h';
1160            $docs{'api'}{$section}{$name}{usage}
1161                = $configs{$name}{usage} if defined $configs{$name}{usage};
1162            push $docs{'api'}{$section}{$name}{args}->@*, ();
1163            push $docs{'api'}{$section}{$name}{items}->@*, ();
1164        }
1165    }
1166}
1167
1168sub format_pod_indexes($) {
1169    my $entries_ref = shift;
1170
1171    # Output the X<> references to the names, packed since they don't get
1172    # displayed, but not too many per line so that when someone is editing the
1173    # file, it doesn't run on
1174
1175    my $text ="";
1176    my $line_length = 0;
1177    for my $name (sort dictionary_order $entries_ref->@*) {
1178        my $entry = "X<$name>";
1179        my $entry_length = length $entry;
1180
1181        # Don't loop forever if we have a verrry long name, and don't go too
1182        # far to the right.
1183        if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1184            $text .= "\n";
1185            $line_length = 0;
1186        }
1187
1188        $text .= $entry;
1189        $line_length += $entry_length;
1190    }
1191
1192    return $text;
1193}
1194
1195sub docout ($$$) { # output the docs for one function
1196    my($fh, $element_name, $docref) = @_;
1197
1198    # Trim trailing space
1199    $element_name =~ s/\s*$//;
1200
1201    my $flags = $docref->{flags};
1202    my $pod = $docref->{pod} // "";
1203    my $file = $docref->{file};
1204
1205    my @items = $docref->{items}->@*;
1206
1207    # Make the main element the first of the items.  This allows uniform
1208    # treatment below
1209    unshift @items, {   name => $element_name,
1210                        flags => $flags,
1211                        ret_type => $docref->{ret_type},
1212                        args => [ $docref->{args}->@* ],
1213                    };
1214
1215    warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
1216
1217    print $fh "\n=over $description_indent\n";
1218    print $fh "\n=item C<$_->{name}>\n" for @items;
1219
1220    # If we're printing only a link to an element, this isn't the major entry,
1221    # so no X<> here.
1222    if ($flags !~ /h/) {
1223        print $fh "X<$_->{name}>" for @items;
1224        print $fh "\n";
1225    }
1226
1227    for my $item (@items) {
1228        if ($item->{flags} =~ /D/) {
1229            print $fh <<~"EOT";
1230
1231                C<B<DEPRECATED!>>  It is planned to remove C<$item->{name}> from a
1232                future release of Perl.  Do not use it for new code; remove it from
1233                existing code.
1234                EOT
1235        }
1236        elsif ($item->{flags} =~ /x/) {
1237            print $fh <<~"EOT";
1238
1239                NOTE: C<$item->{name}> is B<experimental> and may change or be
1240                removed without notice.
1241                EOT
1242        }
1243    }
1244
1245    chomp $pod;     # Make sure prints pod with a single trailing \n
1246    print $fh "\n", $pod, "\n";
1247
1248    for my $item (@items) {
1249        my $item_flags = $item->{flags};
1250        my $item_name = $item->{name};
1251
1252        print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1253                                                    if $item_flags =~ /O/;
1254        # Is Perl_, but no #define foo # Perl_foo
1255        if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1256
1257             # Can't handle threaded varargs
1258         || ($item_flags =~ /f/ && $item_flags !~ /T/))
1259        {
1260            $item->{name} = "Perl_$item_name";
1261            print $fh <<~"EOT";
1262
1263                NOTE: C<$item_name> must be explicitly called as
1264                C<$item->{name}>
1265                EOT
1266            print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1267            print $fh ".\n";
1268        }
1269    }
1270
1271    if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1272                            # to never warrant a usage line
1273        warn("U and s flags are incompatible")
1274                                            if $flags =~ /U/ && $flags =~ /s/;
1275        # nothing
1276    } else {
1277
1278        print $fh "\n=over $usage_indent\n";
1279
1280        if (defined $docref->{usage}) {     # An override of the usage section
1281            print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1282        }
1283        else {
1284
1285            # Add the thread context formal parameter on expanded-out names
1286            for my $item (@items) {
1287                unshift $item->{args}->@*, (($item->{args}->@*)
1288                                            ? "pTHX_"
1289                                            : "pTHX")
1290                                                   if $item->{flags} !~ /T/
1291                                                   && $item->{name} =~ /^Perl_/;
1292            }
1293
1294            # Look through all the items in this entry.  If all have the same
1295            # return type and arguments, only the main entry is displayed.
1296            # Also, find the longest return type and longest name so that if
1297            # multiple ones are shown, they can be vertically aligned nicely
1298            my $need_individual_usage = 0;
1299            my $longest_name_length = length $items[0]->{name};
1300            my $base_ret_type = $items[0]->{ret_type};
1301            my $longest_ret = length $base_ret_type;
1302            my @base_args = $items[0]->{args}->@*;
1303            for (my $i = 1; $i < @items; $i++) {
1304                no warnings 'experimental::smartmatch';
1305                my $item = $items[$i];
1306                $need_individual_usage = 1
1307                                    if    $item->{ret_type} ne $base_ret_type
1308                                    || ! ($item->{args}->@* ~~ @base_args);
1309                my $ret_length = length $item->{ret_type};
1310                $longest_ret = $ret_length if $ret_length > $longest_ret;
1311                my $name_length = length $item->{name};
1312                $longest_name_length = $name_length
1313                                        if $name_length > $longest_name_length;
1314            }
1315
1316            # If we're only showing one entry, only its length matters.
1317            $longest_name_length = length($items[0]->{name})
1318                                                unless $need_individual_usage;
1319            print $fh "\n";
1320
1321            my $indent = 1;     # 1 is sufficient for verbatim; =over is used
1322                                # for more
1323            my $ret_name_sep_length = 2; # spaces between return type and name
1324            my $name_indent = $indent + $longest_ret;
1325            $name_indent += $ret_name_sep_length if $longest_ret;
1326
1327            my $this_max_width =
1328                               $max_width - $description_indent - $usage_indent;
1329
1330            for my $item (@items) {
1331                my $ret_type = $item->{ret_type};
1332                my @args = $item->{args}->@*;
1333                my $name = $item->{name};
1334                my $item_flags = $item->{flags};
1335
1336                # The return type
1337                print $fh (" " x $indent), $ret_type;
1338
1339                print $fh " " x (  $ret_name_sep_length
1340                                 + $longest_ret - length $ret_type);
1341                print $fh $name;
1342
1343                if ($item_flags =~ /n/) { # no args
1344                    warn("$file: $element_name: n flag without m")
1345                                                    unless $item_flags =~ /m/;
1346                    warn("$file: $name: n flag but apparently has args")
1347                                                                    if @args;
1348                }
1349                else {
1350                    # +1 for the '('
1351                    my $arg_indent = $name_indent + $longest_name_length + 1;
1352
1353                    # Align the argument lists of the items
1354                    print $fh " " x ($longest_name_length - length($name));
1355                    print $fh "(";
1356
1357                    # Display as many of the arguments on the same line as
1358                    # will fit.
1359                    my $total_length = $arg_indent;
1360                    my $first_line = 1;
1361                    for (my $i = 0; $i < @args; $i++) {
1362                        my $arg = $args[$i];
1363                        my $arg_length = length($arg);
1364
1365                        # All but the first arg are preceded by a blank
1366                        my $use_blank = $i > 0;
1367
1368                        # +1 here and below because either the argument has a
1369                        # trailing comma or trailing ')'
1370                        $total_length += $arg_length + $use_blank + 1;
1371
1372                        # We want none of the arguments to be positioned so
1373                        # they extend too far to the right.  Ideally, they
1374                        # should all start in the same column as the arguments
1375                        # on the first line of the function display do.  But, if
1376                        # necessary, outdent them so that they all start in
1377                        # another column, with the longest ending at the right
1378                        # margin, like so:
1379                        #                   void  function_name(pTHX_ short1,
1380                        #                                    short2,
1381                        #                                    very_long_argument,
1382                        #                                    short3)
1383                        if ($total_length > $this_max_width) {
1384
1385                            # If this is the first continuation line,
1386                            # calculate the longest argument; this will be the
1387                            # one we may have to outdent for.
1388                            if ($first_line) {
1389                                $first_line = 0;
1390
1391                                # We will need at least as much as the current
1392                                # argument
1393                                my $longest_arg_length = $arg_length
1394                                                       + $use_blank + 1;
1395
1396                                # Look through the rest of the args to see if
1397                                # any are longer than this one.
1398                                for (my $j = $i + 1; $j < @args; $j++) {
1399
1400                                    # Include the trailing ',' or ')' in the
1401                                    # length.  No need to concern ourselves
1402                                    # with a leading blank, as the argument
1403                                    # would be positioned first on the next
1404                                    # line
1405                                    my $peek_arg_length = length ($args[$j])
1406                                                        + 1;
1407                                    $longest_arg_length = $peek_arg_length
1408                                      if $peek_arg_length > $longest_arg_length;
1409                                }
1410
1411                                # Calculate the new indent if necessary.
1412                                $arg_indent =
1413                                        $this_max_width - $longest_arg_length
1414                                        if $arg_indent + $longest_arg_length
1415                                                            > $this_max_width;
1416                            }
1417
1418                            print $fh "\n", (" " x $arg_indent);
1419                            $total_length = $arg_indent + $arg_length + 1;
1420                            $use_blank = 0;
1421                        }
1422
1423                        # Display this argument
1424                        print $fh " " if $use_blank;
1425                        print $fh $arg;
1426                        print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1427
1428                    } # End of loop through args
1429
1430                    print $fh ")";
1431                }
1432
1433                print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
1434                print $fh "\n";
1435
1436                # Only the first entry is normally displayed
1437                last unless $need_individual_usage;
1438            }
1439        }
1440
1441        print $fh "\n=back\n";
1442    }
1443
1444    print $fh "\n=back\n";
1445    print $fh "\n=for hackers\nFound in file $file\n";
1446}
1447
1448sub construct_missings_section {
1449    my ($pod_name, $missings_ref) = @_;
1450    my $text = "";
1451
1452    return $text unless $missings_ref->@*;
1453
1454    $text .= <<~EOT;
1455
1456        =head1 $undocumented_scn
1457
1458        EOT
1459    if ($pod_name eq 'perlapi') {
1460        $text .= <<~'EOT';
1461            The following functions have been flagged as part of the public
1462            API, but are currently undocumented.  Use them at your own risk,
1463            as the interfaces are subject to change.  Functions that are not
1464            listed in this document are not intended for public use, and
1465            should NOT be used under any circumstances.
1466
1467            If you feel you need to use one of these functions, first send
1468            email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1469            It may be that there is a good reason for the function not being
1470            documented, and it should be removed from this list; or it may
1471            just be that no one has gotten around to documenting it.  In the
1472            latter case, you will be asked to submit a patch to document the
1473            function.  Once your patch is accepted, it will indicate that the
1474            interface is stable (unless it is explicitly marked otherwise) and
1475            usable by you.
1476            EOT
1477    }
1478    else {
1479        $text .= <<~'EOT';
1480            The following functions are currently undocumented.  If you use
1481            one of them, you may wish to consider creating and submitting
1482            documentation for it.
1483            EOT
1484    }
1485
1486    $text .= "\n" . format_pod_indexes($missings_ref);
1487
1488    # Sort the elements.
1489    my @missings = sort dictionary_order $missings_ref->@*;
1490
1491
1492    $text .= "\n";
1493
1494    use integer;
1495
1496    # Look through all the elements in the list and see how many columns we
1497    # could place them in the output what will fit in the available width.
1498    my $min_spacer = 2;     # Need this much space between columns
1499    my $columns;
1500    my $rows;
1501    my @col_widths;
1502
1503  COLUMN:
1504    # We start with more columns, and work down until we find a number that
1505    # can accommodate all the data.  This algorithm doesn't require the
1506    # resulting columns to all have the same width.  This can allow for
1507    # as tight of packing as the data will possibly allow.
1508    for ($columns = 7; $columns > 1; $columns--) {
1509
1510        # For this many columns, we will need this many rows (final row might
1511        # not be completely filled)
1512        $rows = (@missings + $columns - 1) / $columns;
1513
1514        my $row_width = 0;
1515        my $i = 0;  # Which missing element
1516
1517        # For each column ...
1518        for my $col (0 .. $columns - 1) {
1519
1520            # Calculate how wide the column needs to be, which is based on the
1521            # widest element in it
1522            $col_widths[$col] = 0;
1523
1524            # Look through all the rows to find the widest element
1525            for my $row (0 .. $rows - 1) {
1526
1527                # Skip if this row doesn't have an entry for this column
1528                last if $i >= @missings;
1529
1530                # This entry occupies this many bytes.
1531                my $this_width = length $missings[$i];
1532
1533                # All but the final column need a spacer between it and the
1534                # next column over.
1535                $this_width += $min_spacer if $col < $columns - 1;
1536
1537
1538                # This column will need to have enough width to accommodate
1539                # this element
1540                if ($this_width > $col_widths[$col]) {
1541
1542                    # We can't have this many columns if the total width
1543                    # exceeds the available; bail now and try fewer columns
1544                    next COLUMN if $row_width + $this_width > $max_width;
1545
1546                    $col_widths[$col] = $this_width;
1547                }
1548
1549                $i++;   # The next row will contain the next item
1550            }
1551
1552            $row_width += $col_widths[$col];
1553            next COLUMN if $row_width > $max_width;
1554        }
1555
1556        # If we get this far, this many columns works
1557        last;
1558    }
1559
1560    # Here, have calculated the number of rows ($rows) and columns ($columns)
1561    # required to list the elements.  @col_widths contains the width of each
1562    # column.
1563
1564    $text .= "\n\n=over $description_indent\n\n";
1565
1566    # Assemble the output
1567    for my $row (0 .. $rows - 1) {
1568        for my $col (0 .. $columns - 1) {
1569            $text .= " " if $col == 0;  # Indent one to mark as verbatim
1570
1571            my $index = $row + $rows * $col;  # Convert 2 dimensions to 1
1572
1573            # Skip if this row doesn't have an entry for this column
1574            next if $index >= @missings;
1575
1576            my $element = $missings[$index];
1577            $text .= $element;
1578
1579            # Add alignment spaces for all but final column
1580            $text .= " " x ($col_widths[$col] - length $element)
1581                                                        if $col < $columns - 1;
1582        }
1583
1584        $text .= "\n";  # End of row
1585    }
1586
1587    $text .= "\n=back\n";
1588
1589    return $text;
1590}
1591
1592sub dictionary_order {
1593    # Do a case-insensitive dictionary sort, with only alphabetics
1594    # significant, falling back to using everything for determinancy
1595    return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
1596           || uc($a) cmp uc($b)
1597           || $a cmp $b;
1598}
1599
1600sub output {
1601    my ($podname, $header, $dochash, $missings_ref, $footer) = @_;
1602    #
1603    # strip leading '|' from each line which had been used to hide
1604    # pod from pod checkers.
1605    s/^\|//gm for $header, $footer;
1606
1607    my $fh = open_new("pod/$podname.pod", undef,
1608                      {by => "$0 extracting documentation",
1609                       from => 'the C source files'}, 1);
1610
1611    print $fh $header, "\n";
1612
1613    for my $section_name (sort dictionary_order keys %valid_sections) {
1614        my $section_info = $dochash->{$section_name};
1615
1616        # We allow empty sections in perlintern.
1617        if (! $section_info && $podname eq 'perlapi') {
1618            warn "Empty section '$section_name'; skipped";
1619            next;
1620        }
1621
1622        print $fh "\n=head1 $section_name\n";
1623
1624        if ($podname eq 'perlapi') {
1625            print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1626                                if defined $valid_sections{$section_name}{header};
1627
1628            # Output any heading-level documentation and delete so won't get in
1629            # the way later
1630            if (exists $section_info->{""}) {
1631                print $fh "\n", $section_info->{""}, "\n";
1632                delete $section_info->{""};
1633            }
1634        }
1635
1636        if ($section_info && keys $section_info->%*) {
1637            for my $function_name (sort dictionary_order keys %$section_info) {
1638                docout($fh, $function_name, $section_info->{$function_name});
1639            }
1640        }
1641        else {
1642            print $fh "\nThere are only public API items currently in $section_name\n";
1643        }
1644
1645        print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
1646                            if $podname eq 'perlapi'
1647                            && defined $valid_sections{$section_name}{footer};
1648    }
1649
1650    print $fh construct_missings_section($podname, $missings_ref);
1651
1652    print $fh "\n$footer\n=cut\n";
1653
1654    read_only_bottom_close_and_rename($fh);
1655}
1656
1657foreach (@{(setup_embed())[0]}) {
1658    next if @$_ < 2;
1659    my ($flags, $ret_type, $func, @args) = @$_;
1660    s/\b(?:NN|NULLOK)\b\s+//g for @args;
1661
1662    $funcflags{$func} = {
1663                         flags => $flags,
1664                         ret_type => $ret_type,
1665                         args => \@args,
1666                        };
1667}
1668
1669# glob() picks up docs from extra .c or .h files that may be in unclean
1670# development trees.
1671open my $fh, '<', 'MANIFEST'
1672    or die "Can't open MANIFEST: $!";
1673while (my $line = <$fh>) {
1674    next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
1675
1676    # Don't pick up pods from these.  (We may pick up generated stuff from
1677    # /lib though)
1678    next if $file =~ m! ^ ( cpan | dist | ext ) / !x;
1679
1680    open F, '<', $file or die "Cannot open $file for docs: $!\n";
1681    autodoc(\*F,$file);
1682    close F or die "Error closing $file: $!\n";
1683}
1684close $fh or die "Error whilst reading MANIFEST: $!";
1685
1686parse_config_h();
1687
1688for (sort keys %funcflags) {
1689    next unless $funcflags{$_}{flags} =~ /d/;
1690    next if $funcflags{$_}{flags} =~ /h/;
1691    warn "no docs for $_\n";
1692}
1693
1694foreach (sort keys %missing) {
1695    warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
1696}
1697
1698# List of funcs in the public API that aren't also marked as core-only,
1699# experimental nor deprecated.
1700my @missing_api = grep $funcflags{$_}{flags} =~ /A/
1701                    && $funcflags{$_}{flags} !~ /[xD]/
1702                    && !$docs{api}{$_}, keys %funcflags;
1703push @missing_api, keys %missing_macros;
1704
1705my @other_places = ( qw(perlclib perlxs), keys %described_elsewhere );
1706my $places_other_than_intern = join ", ",
1707            map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1708my $places_other_than_api = join ", ",
1709            map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
1710
1711# The S< > makes things less densely packed, hence more readable
1712my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1713my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
1714$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1715$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1716
1717my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
1718$valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1719
1720my $section_list = join "\n\n", map { "=item L</$_>" }
1721                                sort(dictionary_order keys %valid_sections),
1722                                $undocumented_scn;  # Keep last
1723
1724output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
1725|=encoding UTF-8
1726|
1727|=head1 NAME
1728|
1729|perlapi - autogenerated documentation for the perl public API
1730|
1731|=head1 DESCRIPTION
1732|X<Perl API> X<API> X<api>
1733|
1734|This file contains most of the documentation of the perl public API, as
1735|generated by F<embed.pl>.  Specifically, it is a listing of functions,
1736|macros, flags, and variables that may be used by extension writers.  Besides
1737|L<perlintern> and F<config.h>, some items are listed here as being actually
1738|documented in another pod.
1739|
1740|L<At the end|/$undocumented_scn> is a list of functions which have yet
1741|to be documented.  Patches welcome!  The interfaces of these are subject to
1742|change without notice.
1743|
1744|Some of the functions documented here are consolidated so that a single entry
1745|serves for multiple functions which all do basically the same thing, but have
1746|some slight differences.  For example, one form might process magic, while
1747|another doesn't.  The name of each variation is listed at the top of the
1748|single entry.  But if all have the same signature (arguments and return type)
1749|except for their names, only the usage for the base form is shown.  If any
1750|one of the forms has a different signature (such as returning C<const> or
1751|not) every function's signature is explicitly displayed.
1752|
1753|Anything not listed here or in the other mentioned pods is not part of the
1754|public API, and should not be used by extension writers at all.  For these
1755|reasons, blindly using functions listed in F<proto.h> is to be avoided when
1756|writing extensions.
1757|
1758|In Perl, unlike C, a string of characters may generally contain embedded
1759|C<NUL> characters.  Sometimes in the documentation a Perl string is referred
1760|to as a "buffer" to distinguish it from a C string, but sometimes they are
1761|both just referred to as strings.
1762|
1763|Note that all Perl API global variables must be referenced with the C<PL_>
1764|prefix.  Again, those not listed here are not to be used by extension writers,
1765|and can be changed or removed without notice; same with macros.
1766|Some macros are provided for compatibility with the older,
1767|unadorned names, but this support may be disabled in a future release.
1768|
1769|Perl was originally written to handle US-ASCII only (that is characters
1770|whose ordinal numbers are in the range 0 - 127).
1771|And documentation and comments may still use the term ASCII, when
1772|sometimes in fact the entire range from 0 - 255 is meant.
1773|
1774|The non-ASCII characters below 256 can have various meanings, depending on
1775|various things.  (See, most notably, L<perllocale>.)  But usually the whole
1776|range can be referred to as ISO-8859-1.  Often, the term "Latin-1" (or
1777|"Latin1") is used as an equivalent for ISO-8859-1.  But some people treat
1778|"Latin1" as referring just to the characters in the range 128 through 255, or
1779|sometimes from 160 through 255.
1780|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1781|
1782|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1783|L<perlebcdic>).  Most of the documentation (and even comments in the code)
1784|ignore the EBCDIC possibility.
1785|For almost all purposes the differences are transparent.
1786|As an example, under EBCDIC,
1787|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1788|whenever this documentation refers to C<utf8>
1789|(and variants of that name, including in function names),
1790|it also (essentially transparently) means C<UTF-EBCDIC>.
1791|But the ordinals of characters differ between ASCII, EBCDIC, and
1792|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1793|number of bytes than in UTF-8.
1794|
1795|The organization of this document is tentative and subject to change.
1796|Suggestions and patches welcome
1797|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1798|
1799|The sections in this document currently are
1800|
1801|=over
1802
1803|$section_list
1804|
1805|=back
1806|
1807|The listing below is alphabetical, case insensitive.
1808_EOB_
1809|=head1 AUTHORS
1810|
1811|Until May 1997, this document was maintained by Jeff Okamoto
1812|<okamoto\@corp.hp.com>.  It is now maintained as part of Perl itself.
1813|
1814|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1815|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1816|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1817|Stephen McCamant, and Gurusamy Sarathy.
1818|
1819|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
1820|
1821|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1822|
1823|=head1 SEE ALSO
1824|
1825|F<config.h>, $places_other_than_api
1826_EOE_
1827
1828# List of non-static internal functions
1829my @missing_guts =
1830 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
1831
1832output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
1833|=head1 NAME
1834|
1835|perlintern - autogenerated documentation of purely B<internal>
1836|Perl functions
1837|
1838|=head1 DESCRIPTION
1839|X<internal Perl functions> X<interpreter functions>
1840|
1841|This file is the autogenerated documentation of functions in the
1842|Perl interpreter that are documented using Perl's internal documentation
1843|format but are not marked as part of the Perl API.  In other words,
1844|B<they are not for use in extensions>!
1845
1846|It has the same sections as L<perlapi>, though some may be empty.
1847|
1848_EOB_
1849|
1850|=head1 AUTHORS
1851|
1852|The autodocumentation system was originally added to the Perl core by
1853|Benjamin Stuhl.  Documentation is by whoever was kind enough to
1854|document their functions.
1855|
1856|=head1 SEE ALSO
1857|
1858|F<config.h>, $places_other_than_intern
1859_EOE_
1860