1#!/usr/bin/perl -w 2 3use Text::Tabs; 4# 5# Unconditionally regenerate: 6# 7# pod/perlintern.pod 8# pod/perlapi.pod 9# 10# from information stored in 11# 12# embed.fnc 13# plus all the core .c, .h, and .pod files listed in MANIFEST 14# 15# Has an optional arg, which is the directory to chdir to before reading 16# MANIFEST and the files 17# 18# This script is invoked as part of 'make all' 19# 20# The generated pod consists of sections of related elements, functions, 21# macros, and variables. The keys of %valid_sections give the current legal 22# ones. Just add a new key to add a section. 23# 24# Throughout the files read by this script are lines like 25# 26# =for apidoc_section Section Name 27# =for apidoc_section $section_name_variable 28# 29# "Section Name" (after having been stripped of leading space) must be one of 30# the legal section names, or an error is thrown. $section_name_variable must 31# be one of the legal section name variables defined below; these expand to 32# legal section names. This form is used so that minor wording changes in 33# these titles can be confied to this file. All the names of the variables 34# end in '_scn'; this suffix is optional in the apidoc_section lines. 35# 36# All API elements defined between this line and the next 'apidoc_section' 37# line will go into the section "Section Name" (or $section_name_variable), 38# sorted by dictionary order within it. perlintern and perlapi are parallel 39# documents, each potentially with a section "Section Name". Each element is 40# marked as to which document it goes into. If there are none for a 41# particular section in perlapi, that section is omitted. 42# 43# Also, in .[ch] files, there may be 44# 45# =head1 Section Name 46# 47# lines in comments. These are also used by this program to switch to section 48# "Section Name". The difference is that if there are any lines after the 49# =head1, inside the same comment, and before any =for apidoc-ish lines, they 50# are used as a heading for section "Section Name" (in both perlintern and 51# perlapi). This includes any =head[2-5]. If more than one '=head1 Section 52# Name' line has content, they appear in the generated pod in an undefined 53# order. Note that you can't use a $section_name_variable in =head1 lines 54# 55# The next =head1, =for apidoc_section, or file end terminates what goes into 56# the current section 57# 58# The %valid_sections hash below also can have header content, which will 59# appear before any =head1 content. The hash can also have footer content 60# content, which will appear at the end of the section, after all the 61# elements. 62# 63# The lines that define the actual functions, etc are documented in embed.fnc, 64# because they have flags which must be kept in sync with that file. 65 66use strict; 67use warnings; 68 69# 80 column terminal - 2 for pager adding 2 columns; -4 for indent for 70# non-heading lines; 71my $max_width = 80 - 2 - 4; 72 73if (@ARGV) { 74 my $workdir = shift; 75 chdir $workdir 76 or die "Couldn't chdir to '$workdir': $!"; 77} 78require './regen/regen_lib.pl'; 79require './regen/embed_lib.pl'; 80 81my %described_elsewhere; 82 83# 84# See database of global and static function prototypes in embed.fnc 85# This is used to generate prototype headers under various configurations, 86# export symbols lists for different platforms, and macros to provide an 87# implicit interpreter context argument. 88# 89 90my %docs; 91my %seen; 92my %funcflags; 93my %missing; 94my %missing_macros; 95 96my $link_text = "Described in"; 97 98my $description_indent = 4; 99my $usage_indent = 3; # + initial blank yields 4 total 100 101my $AV_scn = 'AV Handling'; 102my $callback_scn = 'Callback Functions'; 103my $casting_scn = 'Casting'; 104my $casing_scn = 'Character case changing'; 105my $classification_scn = 'Character classification'; 106my $names_scn = 'Character names'; 107my $scope_scn = 'Compile-time scope hooks'; 108my $compiler_scn = 'Compiler and Preprocessor information'; 109my $directives_scn = 'Compiler directives'; 110my $concurrency_scn = 'Concurrency'; 111my $COP_scn = 'COP Hint Hashes'; 112my $CV_scn = 'CV Handling'; 113my $custom_scn = 'Custom Operators'; 114my $debugging_scn = 'Debugging'; 115my $display_scn = 'Display functions'; 116my $embedding_scn = 'Embedding and Interpreter Cloning'; 117my $errno_scn = 'Errno'; 118my $exceptions_scn = 'Exception Handling (simple) Macros'; 119my $filesystem_scn = 'Filesystem configuration values'; 120my $floating_scn = 'Floating point configuration values'; 121my $formats_scn = 'Formats'; 122my $genconfig_scn = 'General Configuration'; 123my $globals_scn = 'Global Variables'; 124my $GV_scn = 'GV Handling'; 125my $hook_scn = 'Hook manipulation'; 126my $HV_scn = 'HV Handling'; 127my $io_scn = 'Input/Output'; 128my $integer_scn = 'Integer configuration values'; 129my $lexer_scn = 'Lexer interface'; 130my $locale_scn = 'Locales'; 131my $magic_scn = 'Magic'; 132my $memory_scn = 'Memory Management'; 133my $MRO_scn = 'MRO'; 134my $multicall_scn = 'Multicall Functions'; 135my $numeric_scn = 'Numeric Functions'; 136my $optree_construction_scn = 'Optree construction'; 137my $optree_manipulation_scn = 'Optree Manipulation Functions'; 138my $pack_scn = 'Pack and Unpack'; 139my $pad_scn = 'Pad Data Structures'; 140my $password_scn = 'Password and Group access'; 141my $paths_scn = 'Paths to system commands'; 142my $prototypes_scn = 'Prototype information'; 143my $regexp_scn = 'REGEXP Functions'; 144my $signals_scn = 'Signals'; 145my $site_scn = 'Site configuration'; 146my $sockets_scn = 'Sockets configuration values'; 147my $filters_scn = 'Source Filters'; 148my $stack_scn = 'Stack Manipulation Macros'; 149my $string_scn = 'String Handling'; 150my $SV_flags_scn = 'SV Flags'; 151my $SV_scn = 'SV Handling'; 152my $time_scn = 'Time'; 153my $typedefs_scn = 'Typedef names'; 154my $unicode_scn = 'Unicode Support'; 155my $utility_scn = 'Utility Functions'; 156my $versioning_scn = 'Versioning'; 157my $warning_scn = 'Warning and Dieing'; 158my $XS_scn = 'XS'; 159 160# Kept separate at end 161my $undocumented_scn = 'Undocumented elements'; 162 163my %valid_sections = ( 164 $AV_scn => {}, 165 $callback_scn => {}, 166 $casting_scn => {}, 167 $casing_scn => {}, 168 $classification_scn => {}, 169 $scope_scn => {}, 170 $compiler_scn => {}, 171 $directives_scn => {}, 172 $concurrency_scn => {}, 173 $COP_scn => {}, 174 $CV_scn => { 175 header => <<~'EOT', 176 This section documents functions to manipulate CVs which are 177 code-values, meaning subroutines. For more information, see 178 L<perlguts>. 179 EOT 180 }, 181 182 $custom_scn => {}, 183 $debugging_scn => {}, 184 $display_scn => {}, 185 $embedding_scn => {}, 186 $errno_scn => {}, 187 $exceptions_scn => {}, 188 $filesystem_scn => { 189 header => <<~'EOT', 190 Also see L</List of capability HAS_foo symbols>. 191 EOT 192 }, 193 $floating_scn => { 194 header => <<~'EOT', 195 Also L</List of capability HAS_foo symbols> lists capabilities 196 that arent in this section. For example C<HAS_ASINH>, for the 197 hyperbolic sine function. 198 EOT 199 }, 200 $formats_scn => { 201 header => <<~'EOT', 202 These are used for formatting the corresponding type For example, 203 instead of saying 204 205 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv); 206 207 use 208 209 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv); 210 211 This keeps you from having to know if, say an IV, needs to be 212 printed as C<%d>, C<%ld>, or something else. 213 EOT 214 }, 215 $genconfig_scn => { 216 header => <<~'EOT', 217 This section contains configuration information not otherwise 218 found in the more specialized sections of this document. At the 219 end is a list of C<#defines> whose name should be enough to tell 220 you what they do, and a list of #defines which tell you if you 221 need to C<#include> files to get the corresponding functionality. 222 EOT 223 224 footer => <<~'EOT', 225 226 =head2 List of capability C<HAS_I<foo>> symbols 227 228 This is a list of those symbols that dont appear elsewhere in ths 229 document that indicate if the current platform has a certain 230 capability. Their names all begin with C<HAS_>. Only those 231 symbols whose capability is directly derived from the name are 232 listed here. All others have their meaning expanded out elsewhere 233 in this document. This (relatively) compact list is because we 234 think that the expansion would add little or no value and take up 235 a lot of space (because there are so many). If you think certain 236 ones should be expanded, send email to 237 L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. 238 239 Each symbol here will be C<#define>d if and only if the platform 240 has the capability. If you need more detail, see the 241 corresponding entry in F<config.h>. For convenience, the list is 242 split so that the ones that indicate there is a reentrant version 243 of a capability are listed separately 244 245 __HAS_LIST__ 246 247 And, the reentrant capabilities: 248 249 __HAS_R_LIST__ 250 251 Example usage: 252 253 =over 254 255 #ifdef HAS_STRNLEN 256 use strnlen() 257 #else 258 use an alternative implementation 259 #endif 260 261 =back 262 263 =head2 List of C<#include> needed symbols 264 265 This list contains symbols that indicate if certain C<#include> 266 files are present on the platform. If your code accesses the 267 functionality that one of these is for, you will need to 268 C<#include> it if the symbol on this list is C<#define>d. For 269 more detail, see the corresponding entry in F<config.h>. 270 271 __INCLUDE_LIST__ 272 273 Example usage: 274 275 =over 276 277 #ifdef I_WCHAR 278 #include <wchar.h> 279 #endif 280 281 =back 282 EOT 283 }, 284 $globals_scn => {}, 285 $GV_scn => {}, 286 $hook_scn => {}, 287 $HV_scn => {}, 288 $io_scn => {}, 289 $integer_scn => {}, 290 $lexer_scn => {}, 291 $locale_scn => {}, 292 $magic_scn => {}, 293 $memory_scn => {}, 294 $MRO_scn => {}, 295 $multicall_scn => {}, 296 $numeric_scn => {}, 297 $optree_construction_scn => {}, 298 $optree_manipulation_scn => {}, 299 $pack_scn => {}, 300 $pad_scn => {}, 301 $password_scn => {}, 302 $paths_scn => {}, 303 $prototypes_scn => {}, 304 $regexp_scn => {}, 305 $signals_scn => {}, 306 $site_scn => { 307 header => <<~'EOT', 308 These variables give details as to where various libraries, 309 installation destinations, I<etc.>, go, as well as what various 310 installation options were selected 311 EOT 312 }, 313 $sockets_scn => {}, 314 $filters_scn => {}, 315 $stack_scn => {}, 316 $string_scn => { 317 header => <<~EOT, 318 See also C<L</$unicode_scn>>. 319 EOT 320 }, 321 $SV_flags_scn => {}, 322 $SV_scn => {}, 323 $time_scn => {}, 324 $typedefs_scn => {}, 325 $unicode_scn => { 326 header => <<~EOT, 327 L<perlguts/Unicode Support> has an introduction to this API. 328 329 See also C<L</$classification_scn>>, 330 C<L</$casing_scn>>, 331 and C<L</$string_scn>>. 332 Various functions outside this section also work specially with 333 Unicode. Search for the string "utf8" in this document. 334 EOT 335 }, 336 $utility_scn => {}, 337 $versioning_scn => {}, 338 $warning_scn => {}, 339 $XS_scn => {}, 340); 341 342# Somewhat loose match for an apidoc line so we can catch minor typos. 343# Parentheses are used to capture portions so that below we verify 344# that things are the actual correct syntax. 345my $apidoc_re = qr/ ^ (\s*) # $1 346 (=?) # $2 347 (\s*) # $3 348 for (\s*) # $4 349 apidoc (_item)? # $5 350 (\s*) # $6 351 (.*?) # $7 352 \s* \n /x; 353# Only certain flags, dealing with display, are acceptable for apidoc_item 354my $display_flags = "fFnDopsT"; 355 356sub check_api_doc_line ($$) { 357 my ($file, $in) = @_; 358 359 return unless $in =~ $apidoc_re; 360 361 my $is_item = defined $5; 362 my $is_in_proper_form = length $1 == 0 363 && length $2 > 0 364 && length $3 == 0 365 && length $4 > 0 366 && length $6 > 0 367 && length $7 > 0; 368 my $proto_in_file = $7; 369 my $proto = $proto_in_file; 370 $proto = "||$proto" if $proto !~ /\|/; 371 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto; 372 373 $name && $is_in_proper_form or die <<EOS; 374Bad apidoc at $file line $.: 375 $in 376Expected: 377 =for apidoc flags|returntype|name|arg|arg|... 378 =for apidoc flags|returntype|name 379 =for apidoc name 380(or 'apidoc_item') 381EOS 382 383 die "Only [$display_flags] allowed in apidoc_item:\n$in" 384 if $is_item && $flags =~ /[^$display_flags]/; 385 386 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args); 387} 388 389sub embed_override($) { 390 my ($element_name) = shift; 391 392 # If the entry is also in embed.fnc, it should be defined 393 # completely there, but not here 394 my $embed_docref = delete $funcflags{$element_name}; 395 396 return unless $embed_docref and %$embed_docref; 397 398 my $flags = $embed_docref->{'flags'}; 399 warn "embed.fnc entry '$element_name' missing 'd' flag" 400 unless $flags =~ /d/; 401 402 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*); 403} 404 405# The section that is in effect at the beginning of the given file. If not 406# listed here, an apidoc_section line must precede any apidoc lines. 407# This allows the files listed here that generally are single-purpose, to not 408# have to worry about the autodoc section 409my %initial_file_section = ( 410 'av.c' => $AV_scn, 411 'av.h' => $AV_scn, 412 'cv.h' => $CV_scn, 413 'doio.c' => $io_scn, 414 'gv.c' => $GV_scn, 415 'gv.h' => $GV_scn, 416 'hv.h' => $HV_scn, 417 'locale.c' => $locale_scn, 418 'malloc.c' => $memory_scn, 419 'numeric.c' => $numeric_scn, 420 'opnames.h' => $optree_construction_scn, 421 'pad.h'=> $pad_scn, 422 'patchlevel.h' => $versioning_scn, 423 'perlio.h' => $io_scn, 424 'pod/perlapio.pod' => $io_scn, 425 'pod/perlcall.pod' => $callback_scn, 426 'pod/perlembed.pod' => $embedding_scn, 427 'pod/perlfilter.pod' => $filters_scn, 428 'pod/perliol.pod' => $io_scn, 429 'pod/perlmroapi.pod' => $MRO_scn, 430 'pod/perlreguts.pod' => $regexp_scn, 431 'pp_pack.c' => $pack_scn, 432 'pp_sort.c' => $SV_scn, 433 'regcomp.c' => $regexp_scn, 434 'regexp.h' => $regexp_scn, 435 'unicode_constants.h' => $unicode_scn, 436 'utf8.c' => $unicode_scn, 437 'utf8.h' => $unicode_scn, 438 'vutil.c' => $versioning_scn, 439 ); 440 441sub autodoc ($$) { # parse a file and extract documentation info 442 my($fh,$file) = @_; 443 my($in, $line_num, $header, $section); 444 445 $section = $initial_file_section{$file} 446 if defined $initial_file_section{$file}; 447 448 my $file_is_C = $file =~ / \. [ch] $ /x; 449 450 # Count lines easier 451 my $get_next_line = sub { $line_num++; return <$fh> }; 452 453 # Read the file 454 while ($in = $get_next_line->()) { 455 last unless defined $in; 456 457 next unless ( $in =~ / ^ =for [ ]+ apidoc /x 458 # =head1 lines only have effect in C files 459 || ($file_is_C && $in =~ /^=head1/)); 460 461 # Here, the line introduces a portion of the input that we care about. 462 # Either it is for an API element, or heading text which we expect 463 # will be used for elements later in the file 464 465 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file); 466 my (@args, @items); 467 468 # If the line starts a new section ... 469 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) { 470 471 $section = $1; 472 if ($section =~ / ^ \$ /x) { 473 $section .= '_scn' unless $section =~ / _scn $ /; 474 $section = eval "$section"; 475 die "Unknown \$section variable '$section' in $file: $@" if $@; 476 } 477 die "Unknown section name '$section' in $file near line $.\n" 478 unless defined $valid_sections{$section}; 479 480 } 481 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a 482 # plain apidoc line 483 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/; 484 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'"; 485 } 486 else { # Plain apidoc 487 488 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args) 489 = check_api_doc_line($file, $in); 490 # Override this line with any info in embed.fnc 491 my ($embed_flags, $embed_ret_type, @embed_args) 492 = embed_override($element_name); 493 if ($embed_ret_type) { 494 warn "embed.fnc entry overrides redundant information in" 495 . " '$proto_in_file' in $file" 496 if $flags || $ret_type || @args; 497 $flags = $embed_flags; 498 $ret_type = $embed_ret_type; 499 @args = @embed_args; 500 } 501 elsif ($flags !~ /[my]/) { # Not in embed.fnc, is missing if not 502 # a macro or typedef 503 $missing{$element_name} = $file; 504 } 505 506 die "flag '$1' is not legal (for function $element_name (from $file))" 507 if $flags =~ / ( [^AabCDdEeFfGhiMmNnTOoPpRrSsUuWXxy] ) /x; 508 509 die "'u' flag must also have 'm' or 'y' flags' for $element_name" 510 if $flags =~ /u/ && $flags !~ /[my]/; 511 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file") 512 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x; 513 514 if (exists $seen{$element_name} && $flags !~ /h/) { 515 die ("'$element_name' in $file was already documented in $seen{$element_name}"); 516 } 517 else { 518 $seen{$element_name} = $file; 519 } 520 } 521 522 # Here we have processed the initial line in the heading text or API 523 # element, and have saved the important information from it into the 524 # corresponding variables. Now accumulate the text that applies to it 525 # up to a terminating line, which is one of: 526 # 1) =cut 527 # 2) =head (in a C file only =head1) 528 # 3) an end comment line in a C file: m:^\s*\*/: 529 # 4) =for apidoc... (except apidoc_item lines) 530 $text = ""; 531 my $head_ender_num = ($file_is_C) ? 1 : ""; 532 while (defined($in = $get_next_line->())) { 533 534 last if $in =~ /^=cut/x; 535 last if $in =~ /^=head$head_ender_num/; 536 537 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) { 538 539 # End of comment line in C files is a fall-back terminator, 540 # but warn only if there actually is some accumulated text 541 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/; 542 last; 543 } 544 545 if ($in !~ / ^ =for [ ]+ apidoc /x) { 546 $text .= $in; 547 next; 548 } 549 550 # Here, the line is an apidoc line. All but apidoc_item terminate 551 # the text being accumulated. 552 last if $in =~ / ^ =for [ ]+ apidoc_section /x; 553 554 my ($item_name, $item_flags, $item_ret_type, $is_item, 555 $item_proto, @item_args) = check_api_doc_line($file, $in); 556 last unless $is_item; 557 558 # Here, is an apidoc_item_line; They can only come within apidoc 559 # paragraphs. 560 die "Unexpected api_doc_item line '$item_proto'" 561 unless $element_name; 562 563 # We accept blank lines between these, but nothing else; 564 die "apidoc_item lines must immediately follow apidoc lines for " 565 . " '$element_name' in $file" 566 if $text =~ /\S/; 567 # Override this line with any info in embed.fnc 568 my ($embed_flags, $embed_ret_type, @embed_args) 569 = embed_override($item_name); 570 if ($embed_ret_type) { 571 warn "embed.fnc entry overrides redundant information in" 572 . " '$item_proto' in $file" 573 if $item_flags || $item_ret_type || @item_args; 574 575 $item_flags = $embed_flags; 576 $item_ret_type = $embed_ret_type; 577 @item_args = @embed_args; 578 } 579 580 # Use the base entry flags if none for this item; otherwise add in 581 # any non-display base entry flags. 582 if ($item_flags) { 583 $item_flags .= $flags =~ s/[$display_flags]//rg; 584 } 585 else { 586 $item_flags = $flags; 587 } 588 $item_ret_type = $ret_type unless $item_ret_type; 589 @item_args = @args unless @item_args; 590 push @items, { name => $item_name, 591 ret_type => $item_ret_type, 592 flags => $item_flags, 593 args => [ @item_args ], 594 }; 595 596 # This line shows that this element is documented. 597 delete $funcflags{$item_name}; 598 } 599 600 # Here, are done accumulating the text for this item. Trim it 601 $text =~ s/ ^ \s* //x; 602 $text =~ s/ \s* $ //x; 603 $text .= "\n" if $text ne ""; 604 605 # And treat all-spaces as nothing at all 606 undef $text unless $text =~ /\S/; 607 608 if ($element_name) { 609 610 # Here, we have accumulated into $text, the pod for $element_name 611 my $where = $flags =~ /A/ ? 'api' : 'guts'; 612 613 $section = "Functions in file $file" unless defined $section; 614 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n" 615 unless defined $section; 616 if (exists $docs{$where}{$section}{$element_name}) { 617 warn "$0: duplicate API entry for '$element_name' in" 618 . " $where/$section\n"; 619 next; 620 } 621 622 # Override the text with just a link if the flags call for that 623 my $is_link_only = ($flags =~ /h/); 624 if ($is_link_only) { 625 if ($file_is_C) { 626 die "Can't currently handle link with items to it:\n$in" if @items; 627 redo; # Don't put anything if C source 628 } 629 630 # Here, is an 'h' flag in pod. We add a reference to the pod (and 631 # nothing else) to perlapi/intern. (It would be better to add a 632 # reference to the correct =item,=header, but something that makes 633 # it harder is that it that might be a duplicate, like '=item *'; 634 # so that is a future enhancement XXX. Another complication is 635 # there might be more than one deserving candidates.) 636 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s) 637 $podname =~ s/\.pod//; 638 $text = "Described in L<$podname>.\n"; 639 640 # Don't output a usage example for linked to documentation if 641 # it is trivial (has no arguments) and we aren't to add a 642 # semicolon 643 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/; 644 645 # Keep track of all the pod files that we refer to. 646 push $described_elsewhere{$podname}->@*, $podname; 647 } 648 649 $docs{$where}{$section}{$element_name}{flags} = $flags; 650 $docs{$where}{$section}{$element_name}{pod} = $text; 651 $docs{$where}{$section}{$element_name}{file} = $file; 652 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type; 653 push $docs{$where}{$section}{$element_name}{args}->@*, @args; 654 push $docs{$where}{$section}{$element_name}{items}->@*, @items; 655 } 656 elsif ($text) { 657 $valid_sections{$section}{header} = "" unless 658 defined $valid_sections{$section}{header}; 659 $valid_sections{$section}{header} .= "\n$text"; 660 } 661 662 # We already have the first line of what's to come in $in 663 redo; 664 665 } # End of loop through input 666} 667 668my %configs; 669my @has_defs; 670my @has_r_defs; # Reentrant symbols 671my @include_defs; 672 673sub parse_config_h { 674 use re '/aa'; # Everthing is ASCII in this file 675 676 # Process config.h 677 my $config_h = 'config.h'; 678 $config_h = 'win32/config.h' unless -e $config_h; 679 die "Can't find $config_h" unless -e $config_h; 680 open my $fh, '<', $config_h or die "Can't open $config_h: $!"; 681 while (<$fh>) { 682 683 # Look for lines like /* FOO_BAR: 684 # By convention all config.h descriptions begin like that 685 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) { 686 my $name = $1; 687 688 # Here we are starting the description for $name in config.h. We 689 # accumulate the entire description for it into @description. 690 # Flowing text from one input line to another is appended into the 691 # same array element to make a single flowing line element, but 692 # verbatim lines are kept as separate elements in @description. 693 # This will facilitate later doing pattern matching without regard 694 # to line boundaries on non-verbatim text. 695 696 die "Multiple config.h entries for '$name'" 697 if defined $configs{$name}{description}; 698 699 # Get first line of description 700 $_ = <$fh>; 701 702 # Each line in the description begins with blanks followed by '/*' 703 # and some spaces. 704 die "Unexpected config.h initial line for $name: '$_'" 705 unless s/ ^ ( \s* \* \s* ) //x; 706 my $initial_text = $1; 707 708 # Initialize the description with this first line (after having 709 # stripped the prefix text) 710 my @description = $_; 711 712 # The first line is used as a template for how much indentation 713 # each normal succeeding line has. Lines indented further 714 # will be considered as intended to be verbatim. But, empty lines 715 # likely won't have trailing blanks, so just strip the whole thing 716 # for them. 717 my $strip_initial_qr = qr! \s* \* \s* $ 718 | \Q$initial_text\E 719 !x; 720 $configs{$name}{verbatim} = 0; 721 722 # Read in the remainder of the description 723 while (<$fh>) { 724 last if s| ^ \s* \* / ||x; # A '*/' ends it 725 726 die "Unexpected config.h description line for $name: '$_'" 727 unless s/$strip_initial_qr//; 728 729 # Fix up the few flawed lines in config.h wherein a new 730 # sentence begins with a tab (and maybe a space after that). 731 # Although none of them currently do, let it recognize 732 # something like 733 # 734 # "... text"). The next sentence ... 735 # 736 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg; 737 738 # If this line has extra indentation or looks to have columns, 739 # it should be treated as verbatim. Columns are indicated by 740 # use of interior: tabs, 3 spaces in a row, or even 2 spaces 741 # not preceded by punctuation. 742 if ($_ !~ m/ ^ \s 743 | \S (?: \t 744 | \s{3} 745 | (*nlb:[[:punct:]]) \s{2} 746 ) 747 /x) 748 { 749 # But here, is not a verbatim line. Add an empty line if 750 # this is the first non-verbatim after a run of verbatims 751 if ($description[-1] =~ /^\s/) { 752 push @description, "\n", $_; 753 } 754 else { # Otherwise, append this flowing line to the 755 # current flowing line 756 $description[-1] .= $_; 757 } 758 } 759 else { 760 $configs{$name}{verbatim} = 1; 761 762 # The first verbatim line in a run of them is separated by an 763 # empty line from the flowing lines above it 764 push @description, "\n" if $description[-1] =~ /^\S/; 765 766 $_ = Text::Tabs::expand($_); 767 768 # Only a single space so less likely to wrap 769 s/ ^ \s* / /x; 770 771 push @description, $_; 772 } 773 } 774 775 push $configs{$name}{description}->@*, @description 776 777 } # Not a description; see if it is a macro definition. 778 elsif (m! ^ 779 (?: / \* )? # Optional commented-out 780 # indication 781 \# \s* define \s+ ( \w+ ) # $1 is the name 782 ( \s* ) # $2 indicates if args or not 783 ( .*? ) # $3 is any definition 784 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/ 785 $ 786 !x) 787 { 788 my $name = $1; 789 790 # There can be multiple definitions for a name. We want to know 791 # if any of them has arguments, and if any has a body. 792 $configs{$name}{has_args} //= $2 eq ""; 793 $configs{$name}{has_args} ||= $2 eq ""; 794 $configs{$name}{has_defn} //= $3 ne ""; 795 $configs{$name}{has_defn} ||= $3 ne ""; 796 } 797 } 798 799 # We now have stored the description and information about every #define 800 # in the file. The description is in a form convenient to operate on to 801 # convert to pod. Do that now. 802 foreach my $name (keys %configs) { 803 next unless defined $configs{$name}{description}; 804 805 # All adjacent non-verbatim lines of the description are appended 806 # together in a single element in the array. This allows the patterns 807 # to work across input line boundaries. 808 809 my $pod = ""; 810 while (defined ($_ = shift $configs{$name}{description}->@*)) { 811 chomp; 812 813 if (/ ^ \S /x) { # Don't edit verbatim lines 814 815 # Enclose known file/path names not already so enclosed 816 # with <...>. (Some entries in config.h are already 817 # '<path/to/file>') 818 my $file_name_qr = qr! [ \w / ]+ \. 819 (?: c | h | xs | p [lm] | pmc | PL 820 | sh | SH | exe ) \b 821 !xx; 822 my $path_name_qr = qr! (?: / \w+ )+ !x; 823 for my $re ($file_name_qr, $path_name_qr) { 824 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx; 825 } 826 827 # Enclose <... file/path names with F<...> (but no double 828 # angle brackets) 829 for my $re ($file_name_qr, $path_name_qr) { 830 s! < ( $re ) > !F<$1>!gxx; 831 } 832 833 # Explain metaconfig units 834 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx; 835 836 # Convert "See foo" to "See C<L</foo>>" if foo is described in 837 # this file. Also create a link to the known file INSTALL. 838 # And, to be more general, handle "See also foo and bar", and 839 # "See also foo, bar, and baz" 840 while (m/ \b [Ss]ee \s+ 841 (?: also \s+ )? ( \w+ ) 842 (?: , \s+ ( \w+ ) )? 843 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) { 844 my @links = $1; 845 push @links, $2 if defined $2; 846 push @links, $3 if defined $3; 847 foreach my $link (@links) { 848 if ($link eq 'INSTALL') { 849 s/ \b INSTALL \b /C<L<INSTALL>>/xg; 850 } 851 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) { 852 s| \b $link \b |C<L</$link>>|xg; 853 $configs{$link}{linked} = 1; 854 $configs{$name}{linked} = 1; 855 } 856 } 857 } 858 859 # Enclose what we think are symbols with C<...>. 860 no warnings 'experimental::vlb'; 861 s/ (*nlb:<) 862 ( 863 # Any word followed immediately with parens or 864 # brackets 865 \b \w+ (?: \( [^)]* \) # parameter list 866 | \[ [^]]* \] # or array reference 867 ) 868 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols. 869 | \b (?: struct | union ) \s \w+ 870 871 # Words that contain underscores (which are 872 # definitely not text) or three uppercase letters in 873 # a row. Length two ones, like IV, aren't enclosed, 874 # because they often don't look as nice. 875 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b 876 ) 877 (*nla:>) 878 /C<$1>/xg; 879 880 # These include foo when the name is HAS_foo. This is a 881 # heuristic which works in most cases. 882 if ($name =~ / ^ HAS_ (.*) /x) { 883 my $symbol = lc $1; 884 885 # Don't include path components, nor things already in 886 # <>, or with trailing '(', '[' 887 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg; 888 } 889 } 890 891 $pod .= "$_\n"; 892 } 893 delete $configs{$name}{description}; 894 895 $configs{$name}{pod} = $pod; 896 } 897 898 # Now have converted the description to pod. We also now have enough 899 # information that we can do cross checking to find definitions without 900 # corresponding pod, and see if they are mentioned in some description; 901 # otherwise they aren't documented. 902 NAME: 903 foreach my $name (keys %configs) { 904 905 # A definition without pod 906 if (! defined $configs{$name}{pod}) { 907 908 # Leading/trailing underscore means internal to config.h, e.g., 909 # _GNU_SOURCE 910 next if $name =~ / ^ _ /x; 911 next if $name =~ / _ $ /x; 912 913 # MiXeD case names are internal to config.h; the first 4 914 # characters are sufficient to determine this 915 next if $name =~ / ^ [[:upper:]] [[:lower:]] 916 [[:upper:]] [[:lower:]] 917 /x; 918 919 # Here, not internal to config.h. Look to see if this symbol is 920 # mentioned in the pod of some other. If so, assume it is 921 # documented. 922 foreach my $check_name (keys %configs) { 923 my $this_element = $configs{$check_name}; 924 my $this_pod = $this_element->{pod}; 925 if (defined $this_pod) { 926 next NAME if $this_pod =~ / \b $name \b /x; 927 } 928 } 929 930 warn "$name has no documentation\n"; 931 $missing_macros{$name} = 'config.h'; 932 933 next; 934 } 935 936 my $has_defn = $configs{$name}{has_defn}; 937 my $has_args = $configs{$name}{has_args}; 938 939 # Check if any section already has an entry for this element. 940 # If so, it better be a placeholder, in which case we replace it 941 # with this entry. 942 foreach my $section (keys $docs{'api'}->%*) { 943 if (exists $docs{'api'}{$section}{$name}) { 944 my $was = $docs{'api'}{$section}{$name}->{pod}; 945 $was = "" unless $was; 946 chomp $was; 947 if ($was ne "" && $was !~ m/$link_text/) { 948 die "Multiple descriptions for $name\n" 949 . "$section contained '$was'"; 950 } 951 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod}; 952 $configs{$name}{section} = $section; 953 last; 954 } 955 } 956 957 my $handled = 0; # Haven't handled this yet 958 959 if (defined $configs{$name}{'section'}) { 960 # This has been taken care of elsewhere. 961 $handled = 1; 962 } 963 else { 964 my $flags = ""; 965 if ($has_defn && ! $has_args) { 966 $configs{$name}{args} = 1; 967 } 968 969 # Symbols of the form I_FOO are for #include files. They have 970 # special usage information 971 if ($name =~ / ^ I_ ( .* ) /x) { 972 my $file = lc $1 . '.h'; 973 $configs{$name}{usage} = <<~"EOT"; 974 #ifdef $name 975 #include <$file> 976 #endif 977 EOT 978 } 979 980 # Compute what section this variable should go into. This 981 # heuristic was determined by manually inspecting the current 982 # things in config.h, and should be adjusted as necessary as 983 # deficiencies are found. 984 # 985 # This is the default section for macros with a definiton but 986 # no arguments, meaning it is replaced unconditionally 987 # 988 my $sb = qr/ _ | \b /x; # segment boundary 989 my $dash_or_spaces = qr/ - | \s+ /x; 990 my $pod = $configs{$name}{pod}; 991 if ($name =~ / ^ USE_ /x) { 992 $configs{$name}{'section'} = $site_scn; 993 } 994 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x) 995 { 996 $configs{$name}{'section'} = $time_scn; 997 } 998 elsif ( $name =~ / ^ [[:alpha:]]+ f $ /x 999 && $configs{$name}{pod} =~ m/ \b format \b /ix) 1000 { 1001 $configs{$name}{'section'} = $formats_scn; 1002 } 1003 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV 1004 | $sb CASTFLAGS $sb 1005 | QUADMATH 1006 | $sb (?: IS )? NAN 1007 | $sb (?: IS )? FINITE 1008 /x) 1009 { 1010 $configs{$name}{'section'} = 1011 $floating_scn; 1012 } 1013 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) { 1014 $configs{$name}{'section'} = $filesystem_scn; 1015 } 1016 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x 1017 || $configs{$name}{pod} =~ m/ \b align /x) 1018 { 1019 $configs{$name}{'section'} = $compiler_scn; 1020 } 1021 elsif ($name =~ / ^ [IU] [ \d V ] 1022 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx) 1023 { 1024 $configs{$name}{'section'} = $integer_scn; 1025 } 1026 elsif ($name =~ / $sb t $sb /x) { 1027 $configs{$name}{'section'} = $typedefs_scn; 1028 $flags .= 'y'; 1029 } 1030 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x 1031 && $configs{$name}{pod} =~ m/ \b format \b /ix) 1032 { 1033 $configs{$name}{'section'} = $formats_scn; 1034 } 1035 elsif ($name =~ / BACKTRACE /x) { 1036 $configs{$name}{'section'} = $debugging_scn; 1037 } 1038 elsif ($name =~ / ALLOC $sb /x) { 1039 $configs{$name}{'section'} = $memory_scn; 1040 } 1041 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH 1042 | $sb FILE $sb 1043 | $sb DIR $sb 1044 | $sb LSEEK 1045 | $sb INO $sb 1046 | $sb OPEN 1047 | $sb CLOSE 1048 | ^ DIR 1049 | ^ INO $sb 1050 | DIR $ 1051 | FILENAMES 1052 /x 1053 || $configs{$name}{pod} =~ m! I/O | stdio 1054 | file \s+ descriptor 1055 | file \s* system 1056 | statfs 1057 !x) 1058 { 1059 $configs{$name}{'section'} = $filesystem_scn; 1060 } 1061 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) { 1062 $configs{$name}{'section'} = $signals_scn; 1063 } 1064 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) { 1065 $configs{$name}{'section'} = $prototypes_scn; 1066 } 1067 elsif ( $name =~ / ^ LOC_ /x 1068 || $configs{$name}{pod} =~ /full path/i) 1069 { 1070 $configs{$name}{'section'} = $paths_scn; 1071 } 1072 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) { 1073 $configs{$name}{'section'} = $locale_scn; 1074 } 1075 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) { 1076 $configs{$name}{'section'} = $compiler_scn; 1077 } 1078 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x) 1079 { 1080 $configs{$name}{'section'} = $password_scn; 1081 } 1082 elsif ($name =~ / SOCKET | $sb SOCK /x) { 1083 $configs{$name}{'section'} = $sockets_scn; 1084 } 1085 elsif ( $name =~ / THREAD | MULTIPLICITY /x 1086 || $configs{$name}{pod} =~ m/ \b pthread /ix) 1087 { 1088 $configs{$name}{'section'} = $concurrency_scn; 1089 } 1090 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN 1091 | VENDOR | ^ USE 1092 /x) 1093 { 1094 $configs{$name}{'section'} = $site_scn; 1095 } 1096 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix 1097 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix 1098 || $pod =~ / \b doubles \b /ix 1099 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix) 1100 { 1101 $configs{$name}{'section'} = 1102 $floating_scn; 1103 } 1104 else { 1105 # Above are the specific sections. The rest go into a 1106 # grab-bag of general configuration values. However, we put 1107 # two classes of them into lists of their names, without their 1108 # descriptions, when we think that the description doesn't add 1109 # any real value. One list contains the #include variables: 1110 # the description is basically boiler plate for each of these. 1111 # The other list contains the very many things that are of the 1112 # form HAS_foo, and \bfoo\b is contained in its description, 1113 # and there is no verbatim text in the pod or links to/from it 1114 # (which would add value). That means that it is likely the 1115 # intent of the variable can be gleaned from just its name, 1116 # and unlikely the description adds signficant value, so just 1117 # listing them suffices. Giving their descriptions would 1118 # expand this pod significantly with little added value. 1119 if ( ! $has_defn 1120 && ! $configs{$name}{verbatim} 1121 && ! $configs{$name}{linked}) 1122 { 1123 if ($name =~ / ^ I_ ( .* ) /x) { 1124 push @include_defs, $name; 1125 next; 1126 } 1127 elsif ($name =~ / ^ HAS_ ( .* ) /x) { 1128 my $canonical_name = $1; 1129 $canonical_name =~ s/_//g; 1130 1131 my $canonical_pod = $configs{$name}{pod}; 1132 $canonical_pod =~ s/_//g; 1133 1134 if ($canonical_pod =~ / \b $canonical_name \b /xi) { 1135 if ($name =~ / $sb R $sb /x) { 1136 push @has_r_defs, $name; 1137 } 1138 else { 1139 push @has_defs, $name; 1140 } 1141 next; 1142 } 1143 } 1144 } 1145 1146 $configs{$name}{'section'} = $genconfig_scn; 1147 } 1148 1149 my $section = $configs{$name}{'section'}; 1150 die "Internal error: '$section' not in \%valid_sections" 1151 unless grep { $_ eq $section } keys %valid_sections; 1152 $flags .= 'AdmnT'; 1153 $flags .= 'U' unless defined $configs{$name}{usage}; 1154 1155 # All the information has been gathered; save it 1156 $docs{'api'}{$section}{$name}{flags} = $flags; 1157 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod}; 1158 $docs{'api'}{$section}{$name}{ret_type} = ""; 1159 $docs{'api'}{$section}{$name}{file} = 'config.h'; 1160 $docs{'api'}{$section}{$name}{usage} 1161 = $configs{$name}{usage} if defined $configs{$name}{usage}; 1162 push $docs{'api'}{$section}{$name}{args}->@*, (); 1163 push $docs{'api'}{$section}{$name}{items}->@*, (); 1164 } 1165 } 1166} 1167 1168sub format_pod_indexes($) { 1169 my $entries_ref = shift; 1170 1171 # Output the X<> references to the names, packed since they don't get 1172 # displayed, but not too many per line so that when someone is editing the 1173 # file, it doesn't run on 1174 1175 my $text =""; 1176 my $line_length = 0; 1177 for my $name (sort dictionary_order $entries_ref->@*) { 1178 my $entry = "X<$name>"; 1179 my $entry_length = length $entry; 1180 1181 # Don't loop forever if we have a verrry long name, and don't go too 1182 # far to the right. 1183 if ($line_length > 0 && $line_length + $entry_length > $max_width) { 1184 $text .= "\n"; 1185 $line_length = 0; 1186 } 1187 1188 $text .= $entry; 1189 $line_length += $entry_length; 1190 } 1191 1192 return $text; 1193} 1194 1195sub docout ($$$) { # output the docs for one function 1196 my($fh, $element_name, $docref) = @_; 1197 1198 # Trim trailing space 1199 $element_name =~ s/\s*$//; 1200 1201 my $flags = $docref->{flags}; 1202 my $pod = $docref->{pod} // ""; 1203 my $file = $docref->{file}; 1204 1205 my @items = $docref->{items}->@*; 1206 1207 # Make the main element the first of the items. This allows uniform 1208 # treatment below 1209 unshift @items, { name => $element_name, 1210 flags => $flags, 1211 ret_type => $docref->{ret_type}, 1212 args => [ $docref->{args}->@* ], 1213 }; 1214 1215 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/; 1216 1217 print $fh "\n=over $description_indent\n"; 1218 print $fh "\n=item C<$_->{name}>\n" for @items; 1219 1220 # If we're printing only a link to an element, this isn't the major entry, 1221 # so no X<> here. 1222 if ($flags !~ /h/) { 1223 print $fh "X<$_->{name}>" for @items; 1224 print $fh "\n"; 1225 } 1226 1227 for my $item (@items) { 1228 if ($item->{flags} =~ /D/) { 1229 print $fh <<~"EOT"; 1230 1231 C<B<DEPRECATED!>> It is planned to remove C<$item->{name}> from a 1232 future release of Perl. Do not use it for new code; remove it from 1233 existing code. 1234 EOT 1235 } 1236 elsif ($item->{flags} =~ /x/) { 1237 print $fh <<~"EOT"; 1238 1239 NOTE: C<$item->{name}> is B<experimental> and may change or be 1240 removed without notice. 1241 EOT 1242 } 1243 } 1244 1245 chomp $pod; # Make sure prints pod with a single trailing \n 1246 print $fh "\n", $pod, "\n"; 1247 1248 for my $item (@items) { 1249 my $item_flags = $item->{flags}; 1250 my $item_name = $item->{name}; 1251 1252 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n" 1253 if $item_flags =~ /O/; 1254 # Is Perl_, but no #define foo # Perl_foo 1255 if (($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/) 1256 1257 # Can't handle threaded varargs 1258 || ($item_flags =~ /f/ && $item_flags !~ /T/)) 1259 { 1260 $item->{name} = "Perl_$item_name"; 1261 print $fh <<~"EOT"; 1262 1263 NOTE: C<$item_name> must be explicitly called as 1264 C<$item->{name}> 1265 EOT 1266 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/; 1267 print $fh ".\n"; 1268 } 1269 } 1270 1271 if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough 1272 # to never warrant a usage line 1273 warn("U and s flags are incompatible") 1274 if $flags =~ /U/ && $flags =~ /s/; 1275 # nothing 1276 } else { 1277 1278 print $fh "\n=over $usage_indent\n"; 1279 1280 if (defined $docref->{usage}) { # An override of the usage section 1281 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n"; 1282 } 1283 else { 1284 1285 # Add the thread context formal parameter on expanded-out names 1286 for my $item (@items) { 1287 unshift $item->{args}->@*, (($item->{args}->@*) 1288 ? "pTHX_" 1289 : "pTHX") 1290 if $item->{flags} !~ /T/ 1291 && $item->{name} =~ /^Perl_/; 1292 } 1293 1294 # Look through all the items in this entry. If all have the same 1295 # return type and arguments, only the main entry is displayed. 1296 # Also, find the longest return type and longest name so that if 1297 # multiple ones are shown, they can be vertically aligned nicely 1298 my $need_individual_usage = 0; 1299 my $longest_name_length = length $items[0]->{name}; 1300 my $base_ret_type = $items[0]->{ret_type}; 1301 my $longest_ret = length $base_ret_type; 1302 my @base_args = $items[0]->{args}->@*; 1303 for (my $i = 1; $i < @items; $i++) { 1304 no warnings 'experimental::smartmatch'; 1305 my $item = $items[$i]; 1306 $need_individual_usage = 1 1307 if $item->{ret_type} ne $base_ret_type 1308 || ! ($item->{args}->@* ~~ @base_args); 1309 my $ret_length = length $item->{ret_type}; 1310 $longest_ret = $ret_length if $ret_length > $longest_ret; 1311 my $name_length = length $item->{name}; 1312 $longest_name_length = $name_length 1313 if $name_length > $longest_name_length; 1314 } 1315 1316 # If we're only showing one entry, only its length matters. 1317 $longest_name_length = length($items[0]->{name}) 1318 unless $need_individual_usage; 1319 print $fh "\n"; 1320 1321 my $indent = 1; # 1 is sufficient for verbatim; =over is used 1322 # for more 1323 my $ret_name_sep_length = 2; # spaces between return type and name 1324 my $name_indent = $indent + $longest_ret; 1325 $name_indent += $ret_name_sep_length if $longest_ret; 1326 1327 my $this_max_width = 1328 $max_width - $description_indent - $usage_indent; 1329 1330 for my $item (@items) { 1331 my $ret_type = $item->{ret_type}; 1332 my @args = $item->{args}->@*; 1333 my $name = $item->{name}; 1334 my $item_flags = $item->{flags}; 1335 1336 # The return type 1337 print $fh (" " x $indent), $ret_type; 1338 1339 print $fh " " x ( $ret_name_sep_length 1340 + $longest_ret - length $ret_type); 1341 print $fh $name; 1342 1343 if ($item_flags =~ /n/) { # no args 1344 warn("$file: $element_name: n flag without m") 1345 unless $item_flags =~ /m/; 1346 warn("$file: $name: n flag but apparently has args") 1347 if @args; 1348 } 1349 else { 1350 # +1 for the '(' 1351 my $arg_indent = $name_indent + $longest_name_length + 1; 1352 1353 # Align the argument lists of the items 1354 print $fh " " x ($longest_name_length - length($name)); 1355 print $fh "("; 1356 1357 # Display as many of the arguments on the same line as 1358 # will fit. 1359 my $total_length = $arg_indent; 1360 my $first_line = 1; 1361 for (my $i = 0; $i < @args; $i++) { 1362 my $arg = $args[$i]; 1363 my $arg_length = length($arg); 1364 1365 # All but the first arg are preceded by a blank 1366 my $use_blank = $i > 0; 1367 1368 # +1 here and below because either the argument has a 1369 # trailing comma or trailing ')' 1370 $total_length += $arg_length + $use_blank + 1; 1371 1372 # We want none of the arguments to be positioned so 1373 # they extend too far to the right. Ideally, they 1374 # should all start in the same column as the arguments 1375 # on the first line of the function display do. But, if 1376 # necessary, outdent them so that they all start in 1377 # another column, with the longest ending at the right 1378 # margin, like so: 1379 # void function_name(pTHX_ short1, 1380 # short2, 1381 # very_long_argument, 1382 # short3) 1383 if ($total_length > $this_max_width) { 1384 1385 # If this is the first continuation line, 1386 # calculate the longest argument; this will be the 1387 # one we may have to outdent for. 1388 if ($first_line) { 1389 $first_line = 0; 1390 1391 # We will need at least as much as the current 1392 # argument 1393 my $longest_arg_length = $arg_length 1394 + $use_blank + 1; 1395 1396 # Look through the rest of the args to see if 1397 # any are longer than this one. 1398 for (my $j = $i + 1; $j < @args; $j++) { 1399 1400 # Include the trailing ',' or ')' in the 1401 # length. No need to concern ourselves 1402 # with a leading blank, as the argument 1403 # would be positioned first on the next 1404 # line 1405 my $peek_arg_length = length ($args[$j]) 1406 + 1; 1407 $longest_arg_length = $peek_arg_length 1408 if $peek_arg_length > $longest_arg_length; 1409 } 1410 1411 # Calculate the new indent if necessary. 1412 $arg_indent = 1413 $this_max_width - $longest_arg_length 1414 if $arg_indent + $longest_arg_length 1415 > $this_max_width; 1416 } 1417 1418 print $fh "\n", (" " x $arg_indent); 1419 $total_length = $arg_indent + $arg_length + 1; 1420 $use_blank = 0; 1421 } 1422 1423 # Display this argument 1424 print $fh " " if $use_blank; 1425 print $fh $arg; 1426 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_'; 1427 1428 } # End of loop through args 1429 1430 print $fh ")"; 1431 } 1432 1433 print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;" 1434 print $fh "\n"; 1435 1436 # Only the first entry is normally displayed 1437 last unless $need_individual_usage; 1438 } 1439 } 1440 1441 print $fh "\n=back\n"; 1442 } 1443 1444 print $fh "\n=back\n"; 1445 print $fh "\n=for hackers\nFound in file $file\n"; 1446} 1447 1448sub construct_missings_section { 1449 my ($pod_name, $missings_ref) = @_; 1450 my $text = ""; 1451 1452 return $text unless $missings_ref->@*; 1453 1454 $text .= <<~EOT; 1455 1456 =head1 $undocumented_scn 1457 1458 EOT 1459 if ($pod_name eq 'perlapi') { 1460 $text .= <<~'EOT'; 1461 The following functions have been flagged as part of the public 1462 API, but are currently undocumented. Use them at your own risk, 1463 as the interfaces are subject to change. Functions that are not 1464 listed in this document are not intended for public use, and 1465 should NOT be used under any circumstances. 1466 1467 If you feel you need to use one of these functions, first send 1468 email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. 1469 It may be that there is a good reason for the function not being 1470 documented, and it should be removed from this list; or it may 1471 just be that no one has gotten around to documenting it. In the 1472 latter case, you will be asked to submit a patch to document the 1473 function. Once your patch is accepted, it will indicate that the 1474 interface is stable (unless it is explicitly marked otherwise) and 1475 usable by you. 1476 EOT 1477 } 1478 else { 1479 $text .= <<~'EOT'; 1480 The following functions are currently undocumented. If you use 1481 one of them, you may wish to consider creating and submitting 1482 documentation for it. 1483 EOT 1484 } 1485 1486 $text .= "\n" . format_pod_indexes($missings_ref); 1487 1488 # Sort the elements. 1489 my @missings = sort dictionary_order $missings_ref->@*; 1490 1491 1492 $text .= "\n"; 1493 1494 use integer; 1495 1496 # Look through all the elements in the list and see how many columns we 1497 # could place them in the output what will fit in the available width. 1498 my $min_spacer = 2; # Need this much space between columns 1499 my $columns; 1500 my $rows; 1501 my @col_widths; 1502 1503 COLUMN: 1504 # We start with more columns, and work down until we find a number that 1505 # can accommodate all the data. This algorithm doesn't require the 1506 # resulting columns to all have the same width. This can allow for 1507 # as tight of packing as the data will possibly allow. 1508 for ($columns = 7; $columns > 1; $columns--) { 1509 1510 # For this many columns, we will need this many rows (final row might 1511 # not be completely filled) 1512 $rows = (@missings + $columns - 1) / $columns; 1513 1514 my $row_width = 0; 1515 my $i = 0; # Which missing element 1516 1517 # For each column ... 1518 for my $col (0 .. $columns - 1) { 1519 1520 # Calculate how wide the column needs to be, which is based on the 1521 # widest element in it 1522 $col_widths[$col] = 0; 1523 1524 # Look through all the rows to find the widest element 1525 for my $row (0 .. $rows - 1) { 1526 1527 # Skip if this row doesn't have an entry for this column 1528 last if $i >= @missings; 1529 1530 # This entry occupies this many bytes. 1531 my $this_width = length $missings[$i]; 1532 1533 # All but the final column need a spacer between it and the 1534 # next column over. 1535 $this_width += $min_spacer if $col < $columns - 1; 1536 1537 1538 # This column will need to have enough width to accommodate 1539 # this element 1540 if ($this_width > $col_widths[$col]) { 1541 1542 # We can't have this many columns if the total width 1543 # exceeds the available; bail now and try fewer columns 1544 next COLUMN if $row_width + $this_width > $max_width; 1545 1546 $col_widths[$col] = $this_width; 1547 } 1548 1549 $i++; # The next row will contain the next item 1550 } 1551 1552 $row_width += $col_widths[$col]; 1553 next COLUMN if $row_width > $max_width; 1554 } 1555 1556 # If we get this far, this many columns works 1557 last; 1558 } 1559 1560 # Here, have calculated the number of rows ($rows) and columns ($columns) 1561 # required to list the elements. @col_widths contains the width of each 1562 # column. 1563 1564 $text .= "\n\n=over $description_indent\n\n"; 1565 1566 # Assemble the output 1567 for my $row (0 .. $rows - 1) { 1568 for my $col (0 .. $columns - 1) { 1569 $text .= " " if $col == 0; # Indent one to mark as verbatim 1570 1571 my $index = $row + $rows * $col; # Convert 2 dimensions to 1 1572 1573 # Skip if this row doesn't have an entry for this column 1574 next if $index >= @missings; 1575 1576 my $element = $missings[$index]; 1577 $text .= $element; 1578 1579 # Add alignment spaces for all but final column 1580 $text .= " " x ($col_widths[$col] - length $element) 1581 if $col < $columns - 1; 1582 } 1583 1584 $text .= "\n"; # End of row 1585 } 1586 1587 $text .= "\n=back\n"; 1588 1589 return $text; 1590} 1591 1592sub dictionary_order { 1593 # Do a case-insensitive dictionary sort, with only alphabetics 1594 # significant, falling back to using everything for determinancy 1595 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r)) 1596 || uc($a) cmp uc($b) 1597 || $a cmp $b; 1598} 1599 1600sub output { 1601 my ($podname, $header, $dochash, $missings_ref, $footer) = @_; 1602 # 1603 # strip leading '|' from each line which had been used to hide 1604 # pod from pod checkers. 1605 s/^\|//gm for $header, $footer; 1606 1607 my $fh = open_new("pod/$podname.pod", undef, 1608 {by => "$0 extracting documentation", 1609 from => 'the C source files'}, 1); 1610 1611 print $fh $header, "\n"; 1612 1613 for my $section_name (sort dictionary_order keys %valid_sections) { 1614 my $section_info = $dochash->{$section_name}; 1615 1616 # We allow empty sections in perlintern. 1617 if (! $section_info && $podname eq 'perlapi') { 1618 warn "Empty section '$section_name'; skipped"; 1619 next; 1620 } 1621 1622 print $fh "\n=head1 $section_name\n"; 1623 1624 if ($podname eq 'perlapi') { 1625 print $fh "\n", $valid_sections{$section_name}{header}, "\n" 1626 if defined $valid_sections{$section_name}{header}; 1627 1628 # Output any heading-level documentation and delete so won't get in 1629 # the way later 1630 if (exists $section_info->{""}) { 1631 print $fh "\n", $section_info->{""}, "\n"; 1632 delete $section_info->{""}; 1633 } 1634 } 1635 1636 if ($section_info && keys $section_info->%*) { 1637 for my $function_name (sort dictionary_order keys %$section_info) { 1638 docout($fh, $function_name, $section_info->{$function_name}); 1639 } 1640 } 1641 else { 1642 print $fh "\nThere are only public API items currently in $section_name\n"; 1643 } 1644 1645 print $fh "\n", $valid_sections{$section_name}{footer}, "\n" 1646 if $podname eq 'perlapi' 1647 && defined $valid_sections{$section_name}{footer}; 1648 } 1649 1650 print $fh construct_missings_section($podname, $missings_ref); 1651 1652 print $fh "\n$footer\n=cut\n"; 1653 1654 read_only_bottom_close_and_rename($fh); 1655} 1656 1657foreach (@{(setup_embed())[0]}) { 1658 next if @$_ < 2; 1659 my ($flags, $ret_type, $func, @args) = @$_; 1660 s/\b(?:NN|NULLOK)\b\s+//g for @args; 1661 1662 $funcflags{$func} = { 1663 flags => $flags, 1664 ret_type => $ret_type, 1665 args => \@args, 1666 }; 1667} 1668 1669# glob() picks up docs from extra .c or .h files that may be in unclean 1670# development trees. 1671open my $fh, '<', 'MANIFEST' 1672 or die "Can't open MANIFEST: $!"; 1673while (my $line = <$fh>) { 1674 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/; 1675 1676 # Don't pick up pods from these. (We may pick up generated stuff from 1677 # /lib though) 1678 next if $file =~ m! ^ ( cpan | dist | ext ) / !x; 1679 1680 open F, '<', $file or die "Cannot open $file for docs: $!\n"; 1681 autodoc(\*F,$file); 1682 close F or die "Error closing $file: $!\n"; 1683} 1684close $fh or die "Error whilst reading MANIFEST: $!"; 1685 1686parse_config_h(); 1687 1688for (sort keys %funcflags) { 1689 next unless $funcflags{$_}{flags} =~ /d/; 1690 next if $funcflags{$_}{flags} =~ /h/; 1691 warn "no docs for $_\n"; 1692} 1693 1694foreach (sort keys %missing) { 1695 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc"; 1696} 1697 1698# List of funcs in the public API that aren't also marked as core-only, 1699# experimental nor deprecated. 1700my @missing_api = grep $funcflags{$_}{flags} =~ /A/ 1701 && $funcflags{$_}{flags} !~ /[xD]/ 1702 && !$docs{api}{$_}, keys %funcflags; 1703push @missing_api, keys %missing_macros; 1704 1705my @other_places = ( qw(perlclib perlxs), keys %described_elsewhere ); 1706my $places_other_than_intern = join ", ", 1707 map { "L<$_>" } sort dictionary_order 'perlapi', @other_places; 1708my $places_other_than_api = join ", ", 1709 map { "L<$_>" } sort dictionary_order 'perlintern', @other_places; 1710 1711# The S< > makes things less densely packed, hence more readable 1712my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs; 1713my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs; 1714$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/; 1715$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/; 1716 1717my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs; 1718$valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/; 1719 1720my $section_list = join "\n\n", map { "=item L</$_>" } 1721 sort(dictionary_order keys %valid_sections), 1722 $undocumented_scn; # Keep last 1723 1724output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_"); 1725|=encoding UTF-8 1726| 1727|=head1 NAME 1728| 1729|perlapi - autogenerated documentation for the perl public API 1730| 1731|=head1 DESCRIPTION 1732|X<Perl API> X<API> X<api> 1733| 1734|This file contains most of the documentation of the perl public API, as 1735|generated by F<embed.pl>. Specifically, it is a listing of functions, 1736|macros, flags, and variables that may be used by extension writers. Besides 1737|L<perlintern> and F<config.h>, some items are listed here as being actually 1738|documented in another pod. 1739| 1740|L<At the end|/$undocumented_scn> is a list of functions which have yet 1741|to be documented. Patches welcome! The interfaces of these are subject to 1742|change without notice. 1743| 1744|Some of the functions documented here are consolidated so that a single entry 1745|serves for multiple functions which all do basically the same thing, but have 1746|some slight differences. For example, one form might process magic, while 1747|another doesn't. The name of each variation is listed at the top of the 1748|single entry. But if all have the same signature (arguments and return type) 1749|except for their names, only the usage for the base form is shown. If any 1750|one of the forms has a different signature (such as returning C<const> or 1751|not) every function's signature is explicitly displayed. 1752| 1753|Anything not listed here or in the other mentioned pods is not part of the 1754|public API, and should not be used by extension writers at all. For these 1755|reasons, blindly using functions listed in F<proto.h> is to be avoided when 1756|writing extensions. 1757| 1758|In Perl, unlike C, a string of characters may generally contain embedded 1759|C<NUL> characters. Sometimes in the documentation a Perl string is referred 1760|to as a "buffer" to distinguish it from a C string, but sometimes they are 1761|both just referred to as strings. 1762| 1763|Note that all Perl API global variables must be referenced with the C<PL_> 1764|prefix. Again, those not listed here are not to be used by extension writers, 1765|and can be changed or removed without notice; same with macros. 1766|Some macros are provided for compatibility with the older, 1767|unadorned names, but this support may be disabled in a future release. 1768| 1769|Perl was originally written to handle US-ASCII only (that is characters 1770|whose ordinal numbers are in the range 0 - 127). 1771|And documentation and comments may still use the term ASCII, when 1772|sometimes in fact the entire range from 0 - 255 is meant. 1773| 1774|The non-ASCII characters below 256 can have various meanings, depending on 1775|various things. (See, most notably, L<perllocale>.) But usually the whole 1776|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or 1777|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat 1778|"Latin1" as referring just to the characters in the range 128 through 255, or 1779|sometimes from 160 through 255. 1780|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters. 1781| 1782|Note that Perl can be compiled and run under either ASCII or EBCDIC (See 1783|L<perlebcdic>). Most of the documentation (and even comments in the code) 1784|ignore the EBCDIC possibility. 1785|For almost all purposes the differences are transparent. 1786|As an example, under EBCDIC, 1787|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so 1788|whenever this documentation refers to C<utf8> 1789|(and variants of that name, including in function names), 1790|it also (essentially transparently) means C<UTF-EBCDIC>. 1791|But the ordinals of characters differ between ASCII, EBCDIC, and 1792|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different 1793|number of bytes than in UTF-8. 1794| 1795|The organization of this document is tentative and subject to change. 1796|Suggestions and patches welcome 1797|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>. 1798| 1799|The sections in this document currently are 1800| 1801|=over 1802 1803|$section_list 1804| 1805|=back 1806| 1807|The listing below is alphabetical, case insensitive. 1808_EOB_ 1809|=head1 AUTHORS 1810| 1811|Until May 1997, this document was maintained by Jeff Okamoto 1812|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself. 1813| 1814|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie, 1815|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil 1816|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer, 1817|Stephen McCamant, and Gurusamy Sarathy. 1818| 1819|API Listing originally by Dean Roehrich <roehrich\@cray.com>. 1820| 1821|Updated to be autogenerated from comments in the source by Benjamin Stuhl. 1822| 1823|=head1 SEE ALSO 1824| 1825|F<config.h>, $places_other_than_api 1826_EOE_ 1827 1828# List of non-static internal functions 1829my @missing_guts = 1830 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags; 1831 1832output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_"); 1833|=head1 NAME 1834| 1835|perlintern - autogenerated documentation of purely B<internal> 1836|Perl functions 1837| 1838|=head1 DESCRIPTION 1839|X<internal Perl functions> X<interpreter functions> 1840| 1841|This file is the autogenerated documentation of functions in the 1842|Perl interpreter that are documented using Perl's internal documentation 1843|format but are not marked as part of the Perl API. In other words, 1844|B<they are not for use in extensions>! 1845 1846|It has the same sections as L<perlapi>, though some may be empty. 1847| 1848_EOB_ 1849| 1850|=head1 AUTHORS 1851| 1852|The autodocumentation system was originally added to the Perl core by 1853|Benjamin Stuhl. Documentation is by whoever was kind enough to 1854|document their functions. 1855| 1856|=head1 SEE ALSO 1857| 1858|F<config.h>, $places_other_than_intern 1859_EOE_ 1860