1package Pod::Html; 2use strict; 3require Exporter; 4 5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); 6$VERSION = 1.11; 7@ISA = qw(Exporter); 8@EXPORT = qw(pod2html htmlify); 9@EXPORT_OK = qw(anchorify); 10 11use Carp; 12use Config; 13use Cwd; 14use File::Spec; 15use File::Spec::Unix; 16use Getopt::Long; 17 18use locale; # make \w work right in non-ASCII lands 19 20=head1 NAME 21 22Pod::Html - module to convert pod files to HTML 23 24=head1 SYNOPSIS 25 26 use Pod::Html; 27 pod2html([options]); 28 29=head1 DESCRIPTION 30 31Converts files from pod format (see L<perlpod>) to HTML format. It 32can automatically generate indexes and cross-references, and it keeps 33a cache of things it knows how to cross-reference. 34 35=head1 FUNCTIONS 36 37=head2 pod2html 38 39 pod2html("pod2html", 40 "--podpath=lib:ext:pod:vms", 41 "--podroot=/usr/src/perl", 42 "--htmlroot=/perl/nmanual", 43 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop", 44 "--recurse", 45 "--infile=foo.pod", 46 "--outfile=/perl/nmanual/foo.html"); 47 48pod2html takes the following arguments: 49 50=over 4 51 52=item backlink 53 54 --backlink="Back to Top" 55 56Adds "Back to Top" links in front of every C<head1> heading (except for 57the first). By default, no backlinks are generated. 58 59=item cachedir 60 61 --cachedir=name 62 63Creates the item and directory caches in the given directory. 64 65=item css 66 67 --css=stylesheet 68 69Specify the URL of a cascading style sheet. Also disables all HTML/CSS 70C<style> attributes that are output by default (to avoid conflicts). 71 72=item flush 73 74 --flush 75 76Flushes the item and directory caches. 77 78=item header 79 80 --header 81 --noheader 82 83Creates header and footer blocks containing the text of the C<NAME> 84section. By default, no headers are generated. 85 86=item help 87 88 --help 89 90Displays the usage message. 91 92=item hiddendirs 93 94 --hiddendirs 95 --nohiddendirs 96 97Include hidden directories in the search for POD's in podpath if recurse 98is set. 99The default is not to traverse any directory whose name begins with C<.>. 100See L</"podpath"> and L</"recurse">. 101 102[This option is for backward compatibility only. 103It's hard to imagine that one would usefully create a module with a 104name component beginning with C<.>.] 105 106=item htmldir 107 108 --htmldir=name 109 110Sets the directory in which the resulting HTML file is placed. This 111is used to generate relative links to other files. Not passing this 112causes all links to be absolute, since this is the value that tells 113Pod::Html the root of the documentation tree. 114 115=item htmlroot 116 117 --htmlroot=name 118 119Sets the base URL for the HTML files. When cross-references are made, 120the HTML root is prepended to the URL. 121 122=item index 123 124 --index 125 --noindex 126 127Generate an index at the top of the HTML file. This is the default 128behaviour. 129 130=item infile 131 132 --infile=name 133 134Specify the pod file to convert. Input is taken from STDIN if no 135infile is specified. 136 137=item libpods 138 139 --libpods=name:...:name 140 141List of page names (eg, "perlfunc") which contain linkable C<=item>s. 142 143=item netscape 144 145 --netscape 146 --nonetscape 147 148B<Deprecated>, has no effect. For backwards compatibility only. 149 150=item outfile 151 152 --outfile=name 153 154Specify the HTML file to create. Output goes to STDOUT if no outfile 155is specified. 156 157=item podpath 158 159 --podpath=name:...:name 160 161Specify which subdirectories of the podroot contain pod files whose 162HTML converted forms can be linked to in cross references. 163 164=item podroot 165 166 --podroot=name 167 168Specify the base directory for finding library pods. 169 170=item quiet 171 172 --quiet 173 --noquiet 174 175Don't display I<mostly harmless> warning messages. These messages 176will be displayed by default. But this is not the same as C<verbose> 177mode. 178 179=item recurse 180 181 --recurse 182 --norecurse 183 184Recurse into subdirectories specified in podpath (default behaviour). 185 186=item title 187 188 --title=title 189 190Specify the title of the resulting HTML file. 191 192=item verbose 193 194 --verbose 195 --noverbose 196 197Display progress messages. By default, they won't be displayed. 198 199=back 200 201=head2 htmlify 202 203 htmlify($heading); 204 205Converts a pod section specification to a suitable section specification 206for HTML. Note that we keep spaces and special characters except 207C<", ?> (Netscape problem) and the hyphen (writer's problem...). 208 209=head2 anchorify 210 211 anchorify(@heading); 212 213Similar to C<htmlify()>, but turns non-alphanumerics into underscores. Note 214that C<anchorify()> is not exported by default. 215 216=head1 ENVIRONMENT 217 218Uses C<$Config{pod2html}> to setup default options. 219 220=head1 AUTHOR 221 222Tom Christiansen, E<lt>tchrist@perl.comE<gt>. 223 224=head1 SEE ALSO 225 226L<perlpod> 227 228=head1 COPYRIGHT 229 230This program is distributed under the Artistic License. 231 232=cut 233 234my($Cachedir); 235my($Dircache, $Itemcache); 236my @Begin_Stack; 237my @Libpods; 238my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl); 239my($Podfile, @Podpath, $Podroot); 240my $Css; 241 242my $Recurse; 243my $Quiet; 244my $HiddenDirs; 245my $Verbose; 246my $Doindex; 247 248my $Backlink; 249my($Listlevel, @Listtype); 250my $ListNewTerm; 251use vars qw($Ignore); # need to localize it later. 252 253my(%Items_Named, @Items_Seen); 254my($Title, $Header); 255 256my $Top; 257my $Paragraph; 258 259my %Sections; 260 261# Caches 262my %Pages = (); # associative array used to find the location 263 # of pages referenced by L<> links. 264my %Items = (); # associative array used to find the location 265 # of =item directives referenced by C<> links 266 267my %Local_Items; 268my $Is83; 269 270my $Curdir = File::Spec->curdir; 271 272init_globals(); 273 274sub init_globals { 275 $Cachedir = "."; # The directory to which item and directory 276 # caches will be written. 277 278 $Dircache = "pod2htmd.tmp"; 279 $Itemcache = "pod2htmi.tmp"; 280 281 @Begin_Stack = (); # begin/end stack 282 283 @Libpods = (); # files to search for links from C<> directives 284 $Htmlroot = "/"; # http-server base directory from which all 285 # relative paths in $podpath stem. 286 $Htmldir = ""; # The directory to which the html pages 287 # will (eventually) be written. 288 $Htmlfile = ""; # write to stdout by default 289 $Htmlfileurl = ""; # The url that other files would use to 290 # refer to this file. This is only used 291 # to make relative urls that point to 292 # other files. 293 294 $Podfile = ""; # read from stdin by default 295 @Podpath = (); # list of directories containing library pods. 296 $Podroot = $Curdir; # filesystem base directory from which all 297 # relative paths in $podpath stem. 298 $Css = ''; # Cascading style sheet 299 $Recurse = 1; # recurse on subdirectories in $podpath. 300 $Quiet = 0; # not quiet by default 301 $Verbose = 0; # not verbose by default 302 $Doindex = 1; # non-zero if we should generate an index 303 $Backlink = ''; # text for "back to top" links 304 $Listlevel = 0; # current list depth 305 @Listtype = (); # list types for open lists 306 $ListNewTerm = 0; # indicates new term in definition list; used 307 # to correctly open/close <dd> tags 308 $Ignore = 1; # whether or not to format text. we don't 309 # format text until we hit our first pod 310 # directive. 311 312 @Items_Seen = (); # for multiples of the same item in perlfunc 313 %Items_Named = (); 314 $Header = 0; # produce block header/footer 315 $Title = ''; # title to give the pod(s) 316 $Top = 1; # true if we are at the top of the doc. used 317 # to prevent the first <hr /> directive. 318 $Paragraph = ''; # which paragraph we're processing (used 319 # for error messages) 320 %Sections = (); # sections within this page 321 322 %Local_Items = (); 323 $Is83 = $^O eq 'dos'; # Is it an 8.3 filesystem? 324} 325 326# 327# clean_data: global clean-up of pod data 328# 329sub clean_data($){ 330 my( $dataref ) = @_; 331 for my $i ( 0..$#{$dataref} ) { 332 ${$dataref}[$i] =~ s/\s+\Z//; 333 334 # have a look for all-space lines 335 if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){ 336 my @chunks = split( /^\s+$/m, ${$dataref}[$i] ); 337 splice( @$dataref, $i, 1, @chunks ); 338 } 339 } 340} 341 342 343sub pod2html { 344 local(@ARGV) = @_; 345 local($/); 346 local $_; 347 348 init_globals(); 349 350 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN()); 351 352 # cache of %Pages and %Items from last time we ran pod2html 353 354 #undef $opt_help if defined $opt_help; 355 356 # parse the command-line parameters 357 parse_command_line(); 358 359 # escape the backlink argument (same goes for title but is done later...) 360 $Backlink = html_escape($Backlink) if defined $Backlink; 361 362 # set some variables to their default values if necessary 363 my $pod; 364 unless (@ARGV && $ARGV[0]) { 365 if ($Podfile and $Podfile ne '-') { 366 open $pod, '<', $Podfile 367 or die "$0: cannot open $Podfile file for input: $!\n"; 368 } else { 369 open $pod, '-'; 370 } 371 } else { 372 $Podfile = $ARGV[0]; # XXX: might be more filenames 373 $pod = *ARGV; 374 } 375 $Htmlfile = "-" unless $Htmlfile; # stdout 376 $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a // 377 $Htmldir =~ s#/\z## ; # so we don't get a // 378 if ( $Htmlroot eq '' 379 && defined( $Htmldir ) 380 && $Htmldir ne '' 381 && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir 382 ) 383 { 384 # Set the 'base' url for this file, so that we can use it 385 # as the location from which to calculate relative links 386 # to other files. If this is '', then absolute links will 387 # be used throughout. 388 $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1); 389 } 390 391 # read the pod a paragraph at a time 392 warn "Scanning for sections in input file(s)\n" if $Verbose; 393 $/ = ""; 394 my @poddata = <$pod>; 395 close $pod; 396 397 # be eol agnostic 398 for (@poddata) { 399 if (/\r/) { 400 if (/\r\n/) { 401 @poddata = map { s/\r\n/\n/g; 402 /\n\n/ ? 403 map { "$_\n\n" } split /\n\n/ : 404 $_ } @poddata; 405 } else { 406 @poddata = map { s/\r/\n/g; 407 /\n\n/ ? 408 map { "$_\n\n" } split /\n\n/ : 409 $_ } @poddata; 410 } 411 last; 412 } 413 } 414 415 clean_data( \@poddata ); 416 417 # scan the pod for =head[1-6] directives and build an index 418 my $index = scan_headings(\%Sections, @poddata); 419 420 unless($index) { 421 warn "No headings in $Podfile\n" if $Verbose; 422 } 423 424 # open the output file 425 my $html; 426 if($Htmlfile and $Htmlfile ne '-') { 427 open $html, ">", $Htmlfile 428 or die "$0: cannot open $Htmlfile file for output: $!\n"; 429 } else { 430 open $html, ">-"; 431 } 432 433 # put a title in the HTML file if one wasn't specified 434 if ($Title eq '') { 435 TITLE_SEARCH: { 436 for (my $i = 0; $i < @poddata; $i++) { 437 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) { 438 for my $para ( @poddata[$i, $i+1] ) { 439 last TITLE_SEARCH 440 if ($Title) = $para =~ /(\S+\s+-+.*\S)/s; 441 } 442 } 443 444 } 445 } 446 } 447 if (!$Title and $Podfile =~ /\.pod\z/) { 448 # probably a split pod so take first =head[12] as title 449 for (my $i = 0; $i < @poddata; $i++) { 450 last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/; 451 } 452 warn "adopted '$Title' as title for $Podfile\n" 453 if $Verbose and $Title; 454 } 455 if ($Title) { 456 $Title =~ s/\s*\(.*\)//; 457 } else { 458 warn "$0: no title for $Podfile.\n" unless $Quiet; 459 $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s; 460 $Title = ($Podfile eq "-" ? 'No Title' : $1); 461 warn "using $Title" if $Verbose; 462 } 463 $Title = html_escape($Title); 464 465 my $csslink = ''; 466 my $bodystyle = ' style="background-color: white"'; 467 my $tdstyle = ' style="background-color: #cccccc"'; 468 469 if ($Css) { 470 $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />); 471 $csslink =~ s,\\,/,g; 472 $csslink =~ s,(/.):,$1|,; 473 $bodystyle = ''; 474 $tdstyle = ''; 475 } 476 477 my $block = $Header ? <<END_OF_BLOCK : ''; 478<table border="0" width="100%" cellspacing="0" cellpadding="3"> 479<tr><td class="block"$tdstyle valign="middle"> 480<big><strong><span class="block"> $Title</span></strong></big> 481</td></tr> 482</table> 483END_OF_BLOCK 484 485 print $html <<END_OF_HEAD; 486<?xml version="1.0" ?> 487<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 488<html xmlns="http://www.w3.org/1999/xhtml"> 489<head> 490<title>$Title</title>$csslink 491<meta http-equiv="content-type" content="text/html; charset=utf-8" /> 492<link rev="made" href="mailto:$Config{perladmin}" /> 493</head> 494 495<body$bodystyle> 496$block 497END_OF_HEAD 498 499 # load/reload/validate/cache %Pages and %Items 500 get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse); 501 502 # scan the pod for =item directives 503 scan_items( \%Local_Items, "", @poddata); 504 505 # put an index at the top of the file. note, if $Doindex is 0 we 506 # still generate an index, but surround it with an html comment. 507 # that way some other program can extract it if desired. 508 $index =~ s/--+/-/g; 509 510 my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : ""; 511 512 unless ($Doindex) 513 { 514 $index = qq(<!--\n$index\n-->\n); 515 } 516 517 print $html <<"END_OF_INDEX"; 518 519<!-- INDEX BEGIN --> 520<div name="index"> 521<p><a name=\"__index__\"></a></p> 522$index 523$hr 524</div> 525<!-- INDEX END --> 526 527END_OF_INDEX 528 529 # now convert this file 530 my $after_item; # set to true after an =item 531 warn "Converting input file $Podfile\n" if $Verbose; 532 foreach my $i (0..$#poddata){ 533 $_ = $poddata[$i]; 534 $Paragraph = $i+1; 535 if (/^(=.*)/s) { # is it a pod directive? 536 $Ignore = 0; 537 $after_item = 0; 538 $_ = $1; 539 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin 540 process_begin($html, $1, $2); 541 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end 542 process_end($1, $2); 543 } elsif (/^=cut/) { # =cut 544 process_cut(); 545 } elsif (/^=pod/) { # =pod 546 process_pod(); 547 } else { 548 next if @Begin_Stack && $Begin_Stack[-1] ne 'html'; 549 550 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading 551 process_head( $html, $1, $2, $Doindex && $index ); 552 } elsif (/^=item\s*(.*\S)?/sm) { # =item text 553 process_item( $html, $1 ); 554 $after_item = 1; 555 } elsif (/^=over\s*(.*)/) { # =over N 556 process_over(); 557 } elsif (/^=back/) { # =back 558 process_back( $html ); 559 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for 560 process_for( $html, $1, $2 ); 561 } else { 562 /^=(\S*)\s*/; 563 warn "$0: $Podfile: unknown pod directive '$1' in " 564 . "paragraph $Paragraph. ignoring.\n" unless $Quiet; 565 } 566 } 567 $Top = 0; 568 } 569 else { 570 next if $Ignore; 571 if (@Begin_Stack) { 572 print $html $_ if $Begin_Stack[-1] eq 'html'; 573 next; 574 } 575 my $text = $_; 576 577 # Open tag for definition list as we have something to put in it 578 if( $ListNewTerm ){ 579 print $html "<dd>\n"; 580 $ListNewTerm = 0; 581 } 582 583 if( $text =~ /\A\s+/ ){ 584 process_pre( \$text ); 585 print $html "<pre>\n$text</pre>\n"; 586 587 } else { 588 process_text( \$text ); 589 590 # experimental: check for a paragraph where all lines 591 # have some ...\t...\t...\n pattern 592 if( $text =~ /\t/ ){ 593 my @lines = split( "\n", $text ); 594 if( @lines > 1 ){ 595 my $all = 2; 596 foreach my $line ( @lines ){ 597 if( $line =~ /\S/ && $line !~ /\t/ ){ 598 $all--; 599 last if $all == 0; 600 } 601 } 602 if( $all > 0 ){ 603 $text =~ s/\t+/<td>/g; 604 $text =~ s/^/<tr><td>/gm; 605 $text = '<table cellspacing="0" cellpadding="0">' . 606 $text . '</table>'; 607 } 608 } 609 } 610 ## end of experimental 611 612 print $html "<p>$text</p>\n"; 613 } 614 $after_item = 0; 615 } 616 } 617 618 # finish off any pending directives 619 finish_list( $html ); 620 621 # link to page index 622 print $html "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n" 623 if $Doindex and $index and $Backlink; 624 625 print $html <<END_OF_TAIL; 626$block 627</body> 628 629</html> 630END_OF_TAIL 631 632 # close the html file 633 close $html or die "Failed to close $Htmlfile: $!"; 634 635 warn "Finished\n" if $Verbose; 636} 637 638############################################################################## 639 640sub usage { 641 my $podfile = shift; 642 warn "$0: $podfile: @_\n" if @_; 643 die <<END_OF_USAGE; 644Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name> 645 --podpath=<name>:...:<name> --podroot=<name> 646 --libpods=<name>:...:<name> --recurse --verbose --index 647 --netscape --norecurse --noindex --cachedir=<name> 648 649 --backlink - set text for "back to top" links (default: none). 650 --cachedir - directory for the item and directory cache files. 651 --css - stylesheet URL 652 --flush - flushes the item and directory caches. 653 --[no]header - produce block header/footer (default is no headers). 654 --help - prints this message. 655 --hiddendirs - search hidden directories in podpath 656 --htmldir - directory for resulting HTML files. 657 --htmlroot - http-server base directory from which all relative paths 658 in podpath stem (default is /). 659 --[no]index - generate an index at the top of the resulting html 660 (default behaviour). 661 --infile - filename for the pod to convert (input taken from stdin 662 by default). 663 --libpods - colon-separated list of pages to search for =item pod 664 directives in as targets of C<> and implicit links (empty 665 by default). note, these are not filenames, but rather 666 page names like those that appear in L<> links. 667 --outfile - filename for the resulting html file (output sent to 668 stdout by default). 669 --podpath - colon-separated list of directories containing library 670 pods (empty by default). 671 --podroot - filesystem base directory from which all relative paths 672 in podpath stem (default is .). 673 --[no]quiet - suppress some benign warning messages (default is off). 674 --[no]recurse - recurse on those subdirectories listed in podpath 675 (default behaviour). 676 --title - title that will appear in resulting html file. 677 --[no]verbose - self-explanatory (off by default). 678 --[no]netscape - deprecated, has no effect. for backwards compatibility only. 679 680END_OF_USAGE 681 682} 683 684sub parse_command_line { 685 my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help, 686 $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods, 687 $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet, 688 $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs); 689 690 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html}; 691 my $result = GetOptions( 692 'backlink=s' => \$opt_backlink, 693 'cachedir=s' => \$opt_cachedir, 694 'css=s' => \$opt_css, 695 'flush' => \$opt_flush, 696 'header!' => \$opt_header, 697 'help' => \$opt_help, 698 'hiddendirs!'=> \$opt_hiddendirs, 699 'htmldir=s' => \$opt_htmldir, 700 'htmlroot=s' => \$opt_htmlroot, 701 'index!' => \$opt_index, 702 'infile=s' => \$opt_infile, 703 'libpods=s' => \$opt_libpods, 704 'netscape!' => \$opt_netscape, 705 'outfile=s' => \$opt_outfile, 706 'podpath=s' => \$opt_podpath, 707 'podroot=s' => \$opt_podroot, 708 'quiet!' => \$opt_quiet, 709 'recurse!' => \$opt_recurse, 710 'title=s' => \$opt_title, 711 'verbose!' => \$opt_verbose, 712 ); 713 usage("-", "invalid parameters") if not $result; 714 715 usage("-") if defined $opt_help; # see if the user asked for help 716 $opt_help = ""; # just to make -w shut-up. 717 718 @Podpath = split(":", $opt_podpath) if defined $opt_podpath; 719 @Libpods = split(":", $opt_libpods) if defined $opt_libpods; 720 721 $Backlink = $opt_backlink if defined $opt_backlink; 722 $Cachedir = $opt_cachedir if defined $opt_cachedir; 723 $Css = $opt_css if defined $opt_css; 724 $Header = $opt_header if defined $opt_header; 725 $Htmldir = $opt_htmldir if defined $opt_htmldir; 726 $Htmlroot = $opt_htmlroot if defined $opt_htmlroot; 727 $Doindex = $opt_index if defined $opt_index; 728 $Podfile = $opt_infile if defined $opt_infile; 729 $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs; 730 $Htmlfile = $opt_outfile if defined $opt_outfile; 731 $Podroot = $opt_podroot if defined $opt_podroot; 732 $Quiet = $opt_quiet if defined $opt_quiet; 733 $Recurse = $opt_recurse if defined $opt_recurse; 734 $Title = $opt_title if defined $opt_title; 735 $Verbose = $opt_verbose if defined $opt_verbose; 736 737 warn "Flushing item and directory caches\n" 738 if $opt_verbose && defined $opt_flush; 739 $Dircache = "$Cachedir/pod2htmd.tmp"; 740 $Itemcache = "$Cachedir/pod2htmi.tmp"; 741 if (defined $opt_flush) { 742 1 while unlink($Dircache, $Itemcache); 743 } 744} 745 746 747my $Saved_Cache_Key; 748 749sub get_cache { 750 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; 751 my @cache_key_args = @_; 752 753 # A first-level cache: 754 # Don't bother reading the cache files if they still apply 755 # and haven't changed since we last read them. 756 757 my $this_cache_key = cache_key(@cache_key_args); 758 759 return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key; 760 761 # load the cache of %Pages and %Items if possible. $tests will be 762 # non-zero if successful. 763 my $tests = 0; 764 if (-f $dircache && -f $itemcache) { 765 warn "scanning for item cache\n" if $Verbose; 766 $tests = load_cache($dircache, $itemcache, $podpath, $podroot); 767 } 768 769 # if we didn't succeed in loading the cache then we must (re)build 770 # %Pages and %Items. 771 if (!$tests) { 772 warn "scanning directories in pod-path\n" if $Verbose; 773 scan_podpath($podroot, $recurse, 0); 774 } 775 $Saved_Cache_Key = cache_key(@cache_key_args); 776} 777 778sub cache_key { 779 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_; 780 return join('!', $dircache, $itemcache, $recurse, 781 @$podpath, $podroot, stat($dircache), stat($itemcache)); 782} 783 784# 785# load_cache - tries to find if the caches stored in $dircache and $itemcache 786# are valid caches of %Pages and %Items. if they are valid then it loads 787# them and returns a non-zero value. 788# 789sub load_cache { 790 my($dircache, $itemcache, $podpath, $podroot) = @_; 791 my($tests); 792 local $_; 793 794 $tests = 0; 795 796 open(CACHE, "<$itemcache") || 797 die "$0: error opening $itemcache for reading: $!\n"; 798 $/ = "\n"; 799 800 # is it the same podpath? 801 $_ = <CACHE>; 802 chomp($_); 803 $tests++ if (join(":", @$podpath) eq $_); 804 805 # is it the same podroot? 806 $_ = <CACHE>; 807 chomp($_); 808 $tests++ if ($podroot eq $_); 809 810 # load the cache if its good 811 if ($tests != 2) { 812 close(CACHE); 813 return 0; 814 } 815 816 warn "loading item cache\n" if $Verbose; 817 while (<CACHE>) { 818 /(.*?) (.*)$/; 819 $Items{$1} = $2; 820 } 821 close(CACHE); 822 823 warn "scanning for directory cache\n" if $Verbose; 824 open(CACHE, "<$dircache") || 825 die "$0: error opening $dircache for reading: $!\n"; 826 $/ = "\n"; 827 $tests = 0; 828 829 # is it the same podpath? 830 $_ = <CACHE>; 831 chomp($_); 832 $tests++ if (join(":", @$podpath) eq $_); 833 834 # is it the same podroot? 835 $_ = <CACHE>; 836 chomp($_); 837 $tests++ if ($podroot eq $_); 838 839 # load the cache if its good 840 if ($tests != 2) { 841 close(CACHE); 842 return 0; 843 } 844 845 warn "loading directory cache\n" if $Verbose; 846 while (<CACHE>) { 847 /(.*?) (.*)$/; 848 $Pages{$1} = $2; 849 } 850 851 close(CACHE); 852 853 return 1; 854} 855 856# 857# scan_podpath - scans the directories specified in @podpath for directories, 858# .pod files, and .pm files. it also scans the pod files specified in 859# @Libpods for =item directives. 860# 861sub scan_podpath { 862 my($podroot, $recurse, $append) = @_; 863 my($pwd, $dir); 864 my($libpod, $dirname, $pod, @files, @poddata); 865 866 unless($append) { 867 %Items = (); 868 %Pages = (); 869 } 870 871 # scan each directory listed in @Podpath 872 $pwd = getcwd(); 873 chdir($podroot) 874 || die "$0: error changing to directory $podroot: $!\n"; 875 foreach $dir (@Podpath) { 876 scan_dir($dir, $recurse); 877 } 878 879 # scan the pods listed in @Libpods for =item directives 880 foreach $libpod (@Libpods) { 881 # if the page isn't defined then we won't know where to find it 882 # on the system. 883 next unless defined $Pages{$libpod} && $Pages{$libpod}; 884 885 # if there is a directory then use the .pod and .pm files within it. 886 # NOTE: Only finds the first so-named directory in the tree. 887# if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) { 888 if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { 889 # find all the .pod and .pm files within the directory 890 $dirname = $1; 891 opendir(DIR, $dirname) || 892 die "$0: error opening directory $dirname: $!\n"; 893 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR)); 894 closedir(DIR); 895 896 # scan each .pod and .pm file for =item directives 897 foreach $pod (@files) { 898 open my $fh, '<', "$dirname/$pod" 899 or die "$0: error opening $dirname/$pod for input: $!\n"; 900 @poddata = <$fh>; 901 close $fh; 902 clean_data( \@poddata ); 903 904 scan_items( \%Items, "$dirname/$pod", @poddata); 905 } 906 907 # use the names of files as =item directives too. 908### Don't think this should be done this way - confuses issues.(WL) 909### foreach $pod (@files) { 910### $pod =~ /^(.*)(\.pod|\.pm)$/; 911### $Items{$1} = "$dirname/$1.html" if $1; 912### } 913 } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ || 914 $Pages{$libpod} =~ /([^:]*\.pm):/) { 915 # scan the .pod or .pm file for =item directives 916 $pod = $1; 917 open my $fh, '<', $pod 918 or die "$0: error opening $pod for input: $!\n"; 919 @poddata = <$fh>; 920 close $fh; 921 clean_data( \@poddata ); 922 923 scan_items( \%Items, "$pod", @poddata); 924 } else { 925 warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet; 926 } 927 } 928 @poddata = (); # clean-up a bit 929 930 chdir($pwd) 931 || die "$0: error changing to directory $pwd: $!\n"; 932 933 # cache the item list for later use 934 warn "caching items for later use\n" if $Verbose; 935 open my $cache, '>', $Itemcache 936 or die "$0: error open $Itemcache for writing: $!\n"; 937 938 print $cache join(":", @Podpath) . "\n$podroot\n"; 939 foreach my $key (keys %Items) { 940 print $cache "$key $Items{$key}\n"; 941 } 942 943 close $cache or die "error closing $Itemcache: $!"; 944 945 # cache the directory list for later use 946 warn "caching directories for later use\n" if $Verbose; 947 open $cache, '>', $Dircache 948 or die "$0: error open $Dircache for writing: $!\n"; 949 950 print $cache join(":", @Podpath) . "\n$podroot\n"; 951 foreach my $key (keys %Pages) { 952 print $cache "$key $Pages{$key}\n"; 953 } 954 955 close $cache or die "error closing $Dircache: $!"; 956} 957 958# 959# scan_dir - scans the directory specified in $dir for subdirectories, .pod 960# files, and .pm files. notes those that it finds. this information will 961# be used later in order to figure out where the pages specified in L<> 962# links are on the filesystem. 963# 964sub scan_dir { 965 my($dir, $recurse) = @_; 966 my($t, @subdirs, @pods, $pod, $dirname, @dirs); 967 local $_; 968 969 @subdirs = (); 970 @pods = (); 971 972 opendir(DIR, $dir) || 973 die "$0: error opening directory $dir: $!\n"; 974 while (defined($_ = readdir(DIR))) { 975 if (-d "$dir/$_" && $_ ne "." && $_ ne ".." 976 && ($HiddenDirs || !/^\./) 977 ) { # directory 978 $Pages{$_} = "" unless defined $Pages{$_}; 979 $Pages{$_} .= "$dir/$_:"; 980 push(@subdirs, $_); 981 } elsif (/\.pod\z/) { # .pod 982 s/\.pod\z//; 983 $Pages{$_} = "" unless defined $Pages{$_}; 984 $Pages{$_} .= "$dir/$_.pod:"; 985 push(@pods, "$dir/$_.pod"); 986 } elsif (/\.html\z/) { # .html 987 s/\.html\z//; 988 $Pages{$_} = "" unless defined $Pages{$_}; 989 $Pages{$_} .= "$dir/$_.pod:"; 990 } elsif (/\.pm\z/) { # .pm 991 s/\.pm\z//; 992 $Pages{$_} = "" unless defined $Pages{$_}; 993 $Pages{$_} .= "$dir/$_.pm:"; 994 push(@pods, "$dir/$_.pm"); 995 } elsif (-T "$dir/$_") { # script(?) 996 local *F; 997 if (open(F, "$dir/$_")) { 998 my $line; 999 while (defined($line = <F>)) { 1000 if ($line =~ /^=(?:pod|head1)/) { 1001 $Pages{$_} = "" unless defined $Pages{$_}; 1002 $Pages{$_} .= "$dir/$_.pod:"; 1003 last; 1004 } 1005 } 1006 close(F); 1007 } 1008 } 1009 } 1010 closedir(DIR); 1011 1012 # recurse on the subdirectories if necessary 1013 if ($recurse) { 1014 foreach my $subdir (@subdirs) { 1015 scan_dir("$dir/$subdir", $recurse); 1016 } 1017 } 1018} 1019 1020# 1021# scan_headings - scan a pod file for head[1-6] tags, note the tags, and 1022# build an index. 1023# 1024sub scan_headings { 1025 my($sections, @data) = @_; 1026 my($tag, $which_head, $otitle, $listdepth, $index); 1027 1028 local $Ignore = 0; 1029 1030 $listdepth = 0; 1031 $index = ""; 1032 1033 # scan for =head directives, note their name, and build an index 1034 # pointing to each of them. 1035 foreach my $line (@data) { 1036 if ($line =~ /^=(head)([1-6])\s+(.*)/) { 1037 ($tag, $which_head, $otitle) = ($1,$2,$3); 1038 1039 my $title = depod( $otitle ); 1040 my $name = anchorify( $title ); 1041 $$sections{$name} = 1; 1042 $title = process_text( \$otitle ); 1043 1044 while ($which_head != $listdepth) { 1045 if ($which_head > $listdepth) { 1046 $index .= "\n" . ("\t" x $listdepth) . "<ul>\n"; 1047 $listdepth++; 1048 } elsif ($which_head < $listdepth) { 1049 $listdepth--; 1050 $index .= "\n" . ("\t" x $listdepth) . "</ul>\n"; 1051 } 1052 } 1053 1054 $index .= "\n" . ("\t" x $listdepth) . "<li>" . 1055 "<a href=\"#" . $name . "\">" . 1056 $title . "</a></li>"; 1057 } 1058 } 1059 1060 # finish off the lists 1061 while ($listdepth--) { 1062 $index .= "\n" . ("\t" x $listdepth) . "</ul>\n"; 1063 } 1064 1065 # get rid of bogus lists 1066 $index =~ s,\t*<ul>\s*</ul>\n,,g; 1067 1068 return $index; 1069} 1070 1071# 1072# scan_items - scans the pod specified by $pod for =item directives. we 1073# will use this information later on in resolving C<> links. 1074# 1075sub scan_items { 1076 my( $itemref, $pod, @poddata ) = @_; 1077 my($i, $item); 1078 local $_; 1079 1080 $pod =~ s/\.pod\z//; 1081 $pod .= ".html" if $pod; 1082 1083 foreach $i (0..$#poddata) { 1084 my $txt = depod( $poddata[$i] ); 1085 1086 # figure out what kind of item it is. 1087 # Build string for referencing this item. 1088 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bulleted list 1089 next unless $1; 1090 $item = $1; 1091 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list 1092 $item = $1; 1093 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # definition list 1094 $item = $1; 1095 } else { 1096 next; 1097 } 1098 my $fid = fragment_id( $item ); 1099 $$itemref{$fid} = "$pod" if $fid; 1100 } 1101} 1102 1103# 1104# process_head - convert a pod head[1-6] tag and convert it to HTML format. 1105# 1106sub process_head { 1107 my($fh, $tag, $heading, $hasindex) = @_; 1108 1109 # figure out the level of the =head 1110 $tag =~ /head([1-6])/; 1111 my $level = $1; 1112 1113 finish_list( $fh ); 1114 1115 print $fh "<p>\n"; 1116 if( $level == 1 && ! $Top ){ 1117 print $fh "<a href=\"#__index__\"><small>$Backlink</small></a>\n" 1118 if $hasindex and $Backlink; 1119 print $fh "</p>\n<hr />\n" 1120 } else { 1121 print $fh "</p>\n"; 1122 } 1123 1124 my $name = anchorify( depod( $heading ) ); 1125 my $convert = process_text( \$heading ); 1126 print $fh "<h$level><a name=\"$name\">$convert</a></h$level>\n"; 1127} 1128 1129 1130# 1131# emit_item_tag - print an =item's text 1132# Note: The global $EmittedItem is used for inhibiting self-references. 1133# 1134my $EmittedItem; 1135 1136sub emit_item_tag { 1137 my( $fh, $otext, $text, $compact ) = @_; 1138 my $item = fragment_id( depod($text) , -generate); 1139 Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile") 1140 if !defined $item; 1141 $EmittedItem = $item; 1142 ### print STDERR "emit_item_tag=$item ($text)\n"; 1143 1144 print $fh '<strong>'; 1145 if ($Items_Named{$item}++) { 1146 print $fh process_text( \$otext ); 1147 } else { 1148 my $name = $item; 1149 $name = anchorify($name); 1150 print $fh qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>'; 1151 } 1152 print $fh "</strong>"; 1153 undef( $EmittedItem ); 1154} 1155 1156sub new_listitem { 1157 my ($fh, $tag) = @_; 1158 # Open tag for definition list as we have something to put in it 1159 if( ($tag ne 'dl') && ($ListNewTerm) ){ 1160 print $fh "<dd>\n"; 1161 $ListNewTerm = 0; 1162 } 1163 1164 if( $Items_Seen[$Listlevel]++ == 0 ){ 1165 # start of new list 1166 push( @Listtype, "$tag" ); 1167 print $fh "<$tag>\n"; 1168 } else { 1169 # if this is not the first item, close the previous one 1170 if ( $tag eq 'dl' ){ 1171 print $fh "</dd>\n" unless $ListNewTerm; 1172 } else { 1173 print $fh "</li>\n"; 1174 } 1175 } 1176 my $opentag = $tag eq 'dl' ? 'dt' : 'li'; 1177 print $fh "<$opentag>"; 1178} 1179 1180# 1181# process_item - convert a pod item tag and convert it to HTML format. 1182# 1183sub process_item { 1184 my ($fh, $otext) = @_; 1185 1186 # lots of documents start a list without doing an =over. this is 1187 # bad! but, the proper thing to do seems to be to just assume 1188 # they did do an =over. so warn them once and then continue. 1189 if( $Listlevel == 0 ){ 1190 warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph. ignoring.\n" unless $Quiet; 1191 process_over(); 1192 } 1193 1194 # remove formatting instructions from the text 1195 my $text = depod( $otext ); 1196 1197 # all the list variants: 1198 if( $text =~ /\A\*/ ){ # bullet 1199 new_listitem( $fh, 'ul' ); 1200 if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text 1201 my $tag = $1; 1202 $otext =~ s/\A\*\s+//; 1203 emit_item_tag( $fh, $otext, $tag, 1 ); 1204 print $fh "\n"; 1205 } 1206 1207 } elsif( $text =~ /\A\d+/ ){ # numbered list 1208 new_listitem( $fh, 'ol' ); 1209 if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text 1210 my $tag = $1; 1211 $otext =~ s/\A\d+\.?\s*//; 1212 emit_item_tag( $fh, $otext, $tag, 1 ); 1213 print $fh "\n"; 1214 } 1215 1216 } else { # definition list 1217 # new_listitem takes care of opening the <dt> tag 1218 new_listitem( $fh, 'dl' ); 1219 if ($text =~ /\A(.+)\Z/s ){ # should have text 1220 emit_item_tag( $fh, $otext, $text, 1 ); 1221 # write the definition term and close <dt> tag 1222 print $fh "</dt>\n"; 1223 } 1224 # trigger opening a <dd> tag for the actual definition; will not 1225 # happen if next paragraph is also a definition term (=item) 1226 $ListNewTerm = 1; 1227 } 1228 print $fh "\n"; 1229} 1230 1231# 1232# process_over - process a pod over tag and start a corresponding HTML list. 1233# 1234sub process_over { 1235 # start a new list 1236 $Listlevel++; 1237 push( @Items_Seen, 0 ); 1238} 1239 1240# 1241# process_back - process a pod back tag and convert it to HTML format. 1242# 1243sub process_back { 1244 my $fh = shift; 1245 if( $Listlevel == 0 ){ 1246 warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph. ignoring.\n" unless $Quiet; 1247 return; 1248 } 1249 1250 # close off the list. note, I check to see if $Listtype[$Listlevel] is 1251 # defined because an =item directive may have never appeared and thus 1252 # $Listtype[$Listlevel] may have never been initialized. 1253 $Listlevel--; 1254 if( defined $Listtype[$Listlevel] ){ 1255 if ( $Listtype[$Listlevel] eq 'dl' ){ 1256 print $fh "</dd>\n" unless $ListNewTerm; 1257 } else { 1258 print $fh "</li>\n"; 1259 } 1260 print $fh "</$Listtype[$Listlevel]>\n"; 1261 pop( @Listtype ); 1262 $ListNewTerm = 0; 1263 } 1264 1265 # clean up item count 1266 pop( @Items_Seen ); 1267} 1268 1269# 1270# process_cut - process a pod cut tag, thus start ignoring pod directives. 1271# 1272sub process_cut { 1273 $Ignore = 1; 1274} 1275 1276# 1277# process_pod - process a pod tag, thus stop ignoring pod directives 1278# until we see a corresponding cut. 1279# 1280sub process_pod { 1281 # no need to set $Ignore to 0 cause the main loop did it 1282} 1283 1284# 1285# process_for - process a =for pod tag. if it's for html, spit 1286# it out verbatim, if illustration, center it, otherwise ignore it. 1287# 1288sub process_for { 1289 my ($fh, $whom, $text) = @_; 1290 if ( $whom =~ /^(pod2)?html$/i) { 1291 print $fh $text; 1292 } elsif ($whom =~ /^illustration$/i) { 1293 1 while chomp $text; 1294 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) { 1295 $text .= $ext, last if -r "$text$ext"; 1296 } 1297 print $fh qq{<p align="center"><img src="$text" alt="$text illustration" /></p>}; 1298 } 1299} 1300 1301# 1302# process_begin - process a =begin pod tag. this pushes 1303# whom we're beginning on the begin stack. if there's a 1304# begin stack, we only print if it us. 1305# 1306sub process_begin { 1307 my ($fh, $whom, $text) = @_; 1308 $whom = lc($whom); 1309 push (@Begin_Stack, $whom); 1310 if ( $whom =~ /^(pod2)?html$/) { 1311 print $fh $text if $text; 1312 } 1313} 1314 1315# 1316# process_end - process a =end pod tag. pop the 1317# begin stack. die if we're mismatched. 1318# 1319sub process_end { 1320 my($whom, $text) = @_; 1321 $whom = lc($whom); 1322 if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) { 1323 Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n") 1324 } 1325 pop( @Begin_Stack ); 1326} 1327 1328# 1329# process_pre - indented paragraph, made into <pre></pre> 1330# 1331sub process_pre { 1332 my( $text ) = @_; 1333 my( $rest ); 1334 return if $Ignore; 1335 1336 $rest = $$text; 1337 1338 # insert spaces in place of tabs 1339 $rest =~ s#(.+)# 1340 my $line = $1; 1341 1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e; 1342 $line; 1343 #eg; 1344 1345 # convert some special chars to HTML escapes 1346 $rest = html_escape($rest); 1347 1348 # try and create links for all occurrences of perl.* within 1349 # the preformatted text. 1350 $rest =~ s{ 1351 (\s*)(perl\w+) 1352 }{ 1353 if ( defined $Pages{$2} ){ # is a link 1354 qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>); 1355 } elsif (defined $Pages{dosify($2)}) { # is a link 1356 qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>); 1357 } else { 1358 "$1$2"; 1359 } 1360 }xeg; 1361 $rest =~ s{ 1362 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)? 1363 }{ 1364 my $url ; 1365 if ( $Htmlfileurl ne '' ){ 1366 # Here, we take advantage of the knowledge 1367 # that $Htmlfileurl ne '' implies $Htmlroot eq ''. 1368 # Since $Htmlroot eq '', we need to prepend $Htmldir 1369 # on the fron of the link to get the absolute path 1370 # of the link's target. We check for a leading '/' 1371 # to avoid corrupting links that are #, file:, etc. 1372 my $old_url = $3 ; 1373 $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/}; 1374 $url = relativize_url( "$old_url.html", $Htmlfileurl ); 1375 } else { 1376 $url = "$3.html" ; 1377 } 1378 "$1$url" ; 1379 }xeg; 1380 1381 # Look for embedded URLs and make them into links. We don't 1382 # relativize them since they are best left as the author intended. 1383 1384 my $urls = '(' . join ('|', qw{ 1385 http 1386 telnet 1387 mailto 1388 news 1389 gopher 1390 file 1391 wais 1392 ftp 1393 } ) 1394 . ')'; 1395 1396 my $ltrs = '\w'; 1397 my $gunk = '/#~:.?+=&%@!\-'; 1398 my $punc = '.:!?\-;'; 1399 my $any = "${ltrs}${gunk}${punc}"; 1400 1401 $rest =~ s{ 1402 \b # start at word boundary 1403 ( # begin $1 { 1404 $urls : # need resource and a colon 1405 (?!:) # Ignore File::, among others. 1406 [$any] +? # followed by one or more of any valid 1407 # character, but be conservative and 1408 # take only what you need to.... 1409 ) # end $1 } 1410 (?= 1411 " > # maybe pre-quoted '<a href="...">' 1412 | # or: 1413 [$punc]* # 0 or more punctuation 1414 (?: # followed 1415 [^$any] # by a non-url char 1416 | # or 1417 $ # end of the string 1418 ) # 1419 | # or else 1420 $ # then end of the string 1421 ) 1422 }{<a href="$1">$1</a>}igox; 1423 1424 # text should be as it is (verbatim) 1425 $$text = $rest; 1426} 1427 1428 1429# 1430# pure text processing 1431# 1432# pure_text/inIS_text: differ with respect to automatic C<> recognition. 1433# we don't want this to happen within IS 1434# 1435sub pure_text($){ 1436 my $text = shift(); 1437 process_puretext( $text, 1 ); 1438} 1439 1440sub inIS_text($){ 1441 my $text = shift(); 1442 process_puretext( $text, 0 ); 1443} 1444 1445# 1446# process_puretext - process pure text (without pod-escapes) converting 1447# double-quotes and handling implicit C<> links. 1448# 1449sub process_puretext { 1450 my($text, $notinIS) = @_; 1451 1452 ## Guessing at func() or [\$\@%&]*var references in plain text is destined 1453 ## to produce some strange looking ref's. uncomment to disable: 1454 ## $notinIS = 0; 1455 1456 my(@words, $lead, $trail); 1457 1458 # keep track of leading and trailing white-space 1459 $lead = ($text =~ s/\A(\s+)//s ? $1 : ""); 1460 $trail = ($text =~ s/(\s+)\Z//s ? $1 : ""); 1461 1462 # split at space/non-space boundaries 1463 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text ); 1464 1465 # process each word individually 1466 foreach my $word (@words) { 1467 # skip space runs 1468 next if $word =~ /^\s*$/; 1469 # see if we can infer a link or a function call 1470 # 1471 # NOTE: This is a word based search, it won't automatically 1472 # mark "substr($var, 1, 2)" because the 1st word would be "substr($var" 1473 # User has to enclose those with proper C<> 1474 1475 if( $notinIS && $word =~ 1476 m/ 1477 ^([a-z_]{2,}) # The function name 1478 \( 1479 ([0-9][a-z]* # Manual page(1) or page(1M) 1480 |[^)]*[\$\@\%][^)]+ # ($foo), (1, @foo), (%hash) 1481 | # () 1482 ) 1483 \) 1484 ([.,;]?)$ # a possible punctuation follows 1485 /xi 1486 ) { 1487 # has parenthesis so should have been a C<> ref 1488 ## try for a pagename (perlXXX(1))? 1489 my( $func, $args, $rest ) = ( $1, $2, $3 || '' ); 1490 if( $args =~ /^\d+$/ ){ 1491 my $url = page_sect( $word, '' ); 1492 if( defined $url ){ 1493 $word = qq(<a href="$url" class="man">the $word manpage</a>$rest); 1494 next; 1495 } 1496 } 1497 ## try function name for a link, append tt'ed argument list 1498 $word = emit_C( $func, '', "($args)") . $rest; 1499 1500#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing. 1501## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) { 1502## # perl variables, should be a C<> ref 1503## $word = emit_C( $word ); 1504 1505 } elsif ($word =~ m,^\w+://\w,) { 1506 # looks like a URL 1507 # Don't relativize it: leave it as the author intended 1508 $word = qq(<a href="$word">$word</a>); 1509 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) { 1510 # looks like an e-mail address 1511 my ($w1, $w2, $w3) = ("", $word, ""); 1512 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/; 1513 ($w1, $w2, $w3) = ("<", $1, ">$2") if $word =~ /^<(.*?)>(,?)/; 1514 $word = qq($w1<a href="mailto:$w2">$w2</a>$w3); 1515 } else { 1516 $word = html_escape($word) if $word =~ /["&<>]/; 1517 } 1518 } 1519 1520 # put everything back together 1521 return $lead . join( '', @words ) . $trail; 1522} 1523 1524 1525# 1526# process_text - handles plaintext that appears in the input pod file. 1527# there may be pod commands embedded within the text so those must be 1528# converted to html commands. 1529# 1530 1531sub process_text1($$;$$); 1532sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' } 1533sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 } 1534 1535sub process_text { 1536 return if $Ignore; 1537 my( $tref ) = @_; 1538 my $res = process_text1( 0, $tref ); 1539 $res =~ s/\s+$//s; 1540 $$tref = $res; 1541} 1542 1543sub process_text_rfc_links { 1544 my $text = shift; 1545 1546 # For every "RFCnnnn" or "RFC nnn", link it to the authoritative 1547 # ource. Do not use the /i modifier here. Require "RFC" to be written in 1548 # in capital letters. 1549 1550 $text =~ s{ 1551 (?<=[^<>[:alpha:]]) # Make sure this is not an URL already 1552 (RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits 1553 } 1554 {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx; 1555 1556 $text; 1557} 1558 1559sub process_text1($$;$$){ 1560 my( $lev, $rstr, $func, $closing ) = @_; 1561 my $res = ''; 1562 1563 unless (defined $func) { 1564 $func = ''; 1565 $lev++; 1566 } 1567 1568 if( $func eq 'B' ){ 1569 # B<text> - boldface 1570 $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>'; 1571 1572 } elsif( $func eq 'C' ){ 1573 # C<code> - can be a ref or <code></code> 1574 # need to extract text 1575 my $par = go_ahead( $rstr, 'C', $closing ); 1576 1577 ## clean-up of the link target 1578 my $text = depod( $par ); 1579 1580 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ; 1581 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n"; 1582 1583 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) ); 1584 1585 } elsif( $func eq 'E' ){ 1586 # E<x> - convert to character 1587 $$rstr =~ s/^([^>]*)>//; 1588 my $escape = $1; 1589 $escape =~ s/^0?x([\dA-F]+)$/#x$1/i 1590 or $escape =~ s/^0([0-7]+)$/'#'.oct($1)/ei 1591 or $escape =~ s/^(\d+)$/#$1/; 1592 $res = "&$escape;"; 1593 1594 } elsif( $func eq 'F' ){ 1595 # F<filename> - italicize 1596 $res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>'; 1597 1598 } elsif( $func eq 'I' ){ 1599 # I<text> - italicize 1600 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>'; 1601 1602 } elsif( $func eq 'L' ){ 1603 # L<link> - link 1604 ## L<text|cross-ref> => produce text, use cross-ref for linking 1605 ## L<cross-ref> => make text from cross-ref 1606 ## need to extract text 1607 my $par = go_ahead( $rstr, 'L', $closing ); 1608 1609 # some L<>'s that shouldn't be: 1610 # a) full-blown URL's are emitted as-is 1611 if( $par =~ m{^\w+://}s ){ 1612 return make_URL_href( $par ); 1613 } 1614 # b) C<...> is stripped and treated as C<> 1615 if( $par =~ /^C<(.*)>$/ ){ 1616 my $text = depod( $1 ); 1617 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) ); 1618 } 1619 1620 # analyze the contents 1621 $par =~ s/\n/ /g; # undo word-wrapped tags 1622 my $opar = $par; 1623 my $linktext; 1624 if( $par =~ s{^([^|]+)\|}{} ){ 1625 $linktext = $1; 1626 } 1627 1628 # make sure sections start with a / 1629 $par =~ s{^"}{/"}; 1630 1631 my( $page, $section, $ident ); 1632 1633 # check for link patterns 1634 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident 1635 # we've got a name/ident (no quotes) 1636 if (length $2) { 1637 ( $page, $ident ) = ( $1, $2 ); 1638 } else { 1639 ( $page, $section ) = ( $1, $2 ); 1640 } 1641 ### print STDERR "--> L<$par> to page $page, ident $ident\n"; 1642 1643 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section" 1644 # even though this should be a "section", we go for ident first 1645 ( $page, $ident ) = ( $1, $2 ); 1646 ### print STDERR "--> L<$par> to page $page, section $section\n"; 1647 1648 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes 1649 ( $page, $section ) = ( '', $par ); 1650 ### print STDERR "--> L<$par> to void page, section $section\n"; 1651 1652 } else { 1653 ( $page, $section ) = ( $par, '' ); 1654 ### print STDERR "--> L<$par> to page $par, void section\n"; 1655 } 1656 1657 # now, either $section or $ident is defined. the convoluted logic 1658 # below tries to resolve L<> according to what the user specified. 1659 # failing this, we try to find the next best thing... 1660 my( $url, $ltext, $fid ); 1661 1662 RESOLVE: { 1663 if( defined $ident ){ 1664 ## try to resolve $ident as an item 1665 ( $url, $fid ) = coderef( $page, $ident ); 1666 if( $url ){ 1667 if( ! defined( $linktext ) ){ 1668 $linktext = $ident; 1669 $linktext .= " in " if $ident && $page; 1670 $linktext .= "the $page manpage" if $page; 1671 } 1672 ### print STDERR "got coderef url=$url\n"; 1673 last RESOLVE; 1674 } 1675 ## no luck: go for a section (auto-quoting!) 1676 $section = $ident; 1677 } 1678 ## now go for a section 1679 my $htmlsection = htmlify( $section ); 1680 $url = page_sect( $page, $htmlsection ); 1681 if( $url ){ 1682 if( ! defined( $linktext ) ){ 1683 $linktext = $section; 1684 $linktext .= " in " if $section && $page; 1685 $linktext .= "the $page manpage" if $page; 1686 } 1687 ### print STDERR "got page/section url=$url\n"; 1688 last RESOLVE; 1689 } 1690 ## no luck: go for an ident 1691 if( $section ){ 1692 $ident = $section; 1693 } else { 1694 $ident = $page; 1695 $page = undef(); 1696 } 1697 ( $url, $fid ) = coderef( $page, $ident ); 1698 if( $url ){ 1699 if( ! defined( $linktext ) ){ 1700 $linktext = $ident; 1701 $linktext .= " in " if $ident && $page; 1702 $linktext .= "the $page manpage" if $page; 1703 } 1704 ### print STDERR "got section=>coderef url=$url\n"; 1705 last RESOLVE; 1706 } 1707 1708 # warning; show some text. 1709 $linktext = $opar unless defined $linktext; 1710 warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet; 1711 } 1712 1713 # now we have a URL or just plain code 1714 $$rstr = $linktext . '>' . $$rstr; 1715 if( defined( $url ) ){ 1716 $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>'; 1717 } else { 1718 $res = '<em>' . process_text1( $lev, $rstr ) . '</em>'; 1719 } 1720 1721 } elsif( $func eq 'S' ){ 1722 # S<text> - non-breaking spaces 1723 $res = process_text1( $lev, $rstr ); 1724 $res =~ s/ / /g; 1725 1726 } elsif( $func eq 'X' ){ 1727 # X<> - ignore 1728 warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n" 1729 unless $$rstr =~ s/^[^>]*>// or $Quiet; 1730 } elsif( $func eq 'Z' ){ 1731 # Z<> - empty 1732 warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n" 1733 unless $$rstr =~ s/^>// or $Quiet; 1734 1735 } else { 1736 my $term = pattern $closing; 1737 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){ 1738 # all others: either recurse into new function or 1739 # terminate at closing angle bracket(s) 1740 my $pt = $1; 1741 $pt .= $2 if !$3 && $lev == 1; 1742 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt ); 1743 return $res if !$3 && $lev > 1; 1744 if( $3 ){ 1745 $res .= process_text1( $lev, $rstr, $3, closing $4 ); 1746 } 1747 } 1748 if( $lev == 1 ){ 1749 $res .= pure_text( $$rstr ); 1750 } elsif( ! $Quiet ) { 1751 my $snippet = substr($$rstr,0,60); 1752 warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n" 1753 1754 } 1755 $res = process_text_rfc_links($res); 1756 } 1757 return $res; 1758} 1759 1760# 1761# go_ahead: extract text of an IS (can be nested) 1762# 1763sub go_ahead($$$){ 1764 my( $rstr, $func, $closing ) = @_; 1765 my $res = ''; 1766 my @closing = ($closing); 1767 while( $$rstr =~ 1768 s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){ 1769 $res .= $1; 1770 unless( $3 ){ 1771 shift @closing; 1772 return $res unless @closing; 1773 } else { 1774 unshift @closing, closing $4; 1775 } 1776 $res .= $2; 1777 } 1778 unless ($Quiet) { 1779 my $snippet = substr($$rstr,0,60); 1780 warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n" 1781 } 1782 return $res; 1783} 1784 1785# 1786# emit_C - output result of C<text> 1787# $text is the depod-ed text 1788# 1789sub emit_C($;$$){ 1790 my( $text, $nocode, $args ) = @_; 1791 $args = '' unless defined $args; 1792 my $res; 1793 my( $url, $fid ) = coderef( undef(), $text ); 1794 1795 # need HTML-safe text 1796 my $linktext = html_escape( "$text$args" ); 1797 1798 if( defined( $url ) && 1799 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){ 1800 $res = "<a href=\"$url\"><code>$linktext</code></a>"; 1801 } elsif( 0 && $nocode ){ 1802 $res = $linktext; 1803 } else { 1804 $res = "<code>$linktext</code>"; 1805 } 1806 return $res; 1807} 1808 1809# 1810# html_escape: make text safe for HTML 1811# 1812sub html_escape { 1813 my $rest = $_[0]; 1814 $rest =~ s/&/&/g; 1815 $rest =~ s/</</g; 1816 $rest =~ s/>/>/g; 1817 $rest =~ s/"/"/g; 1818 # ' is only in XHTML, not HTML4. Be conservative 1819 #$rest =~ s/'/'/g; 1820 return $rest; 1821} 1822 1823 1824# 1825# dosify - convert filenames to 8.3 1826# 1827sub dosify { 1828 my($str) = @_; 1829 return lc($str) if $^O eq 'VMS'; # VMS just needs casing 1830 if ($Is83) { 1831 $str = lc $str; 1832 $str =~ s/(\.\w+)/substr ($1,0,4)/ge; 1833 $str =~ s/(\w+)/substr ($1,0,8)/ge; 1834 } 1835 return $str; 1836} 1837 1838# 1839# page_sect - make a URL from the text of a L<> 1840# 1841sub page_sect($$) { 1842 my( $page, $section ) = @_; 1843 my( $linktext, $page83, $link); # work strings 1844 1845 # check if we know that this is a section in this page 1846 if (!defined $Pages{$page} && defined $Sections{$page}) { 1847 $section = $page; 1848 $page = ""; 1849 ### print STDERR "reset page='', section=$section\n"; 1850 } 1851 1852 $page83=dosify($page); 1853 $page=$page83 if (defined $Pages{$page83}); 1854 if ($page eq "") { 1855 $link = "#" . anchorify( $section ); 1856 } elsif ( $page =~ /::/ ) { 1857 $page =~ s,::,/,g; 1858 # Search page cache for an entry keyed under the html page name, 1859 # then look to see what directory that page might be in. NOTE: 1860 # this will only find one page. A better solution might be to produce 1861 # an intermediate page that is an index to all such pages. 1862 my $page_name = $page ; 1863 $page_name =~ s,^.*/,,s ; 1864 if ( defined( $Pages{ $page_name } ) && 1865 $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/ 1866 ) { 1867 $page = $1 ; 1868 } 1869 else { 1870 # NOTE: This branch assumes that all A::B pages are located in 1871 # $Htmlroot/A/B.html . This is often incorrect, since they are 1872 # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could 1873 # analyze the contents of %Pages and figure out where any 1874 # cousins of A::B are, then assume that. So, if A::B isn't found, 1875 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in 1876 # lib/A/B.pm. This is also limited, but it's an improvement. 1877 # Maybe a hints file so that the links point to the correct places 1878 # nonetheless? 1879 1880 } 1881 $link = "$Htmlroot/$page.html"; 1882 $link .= "#" . anchorify( $section ) if ($section); 1883 } elsif (!defined $Pages{$page}) { 1884 $link = ""; 1885 } else { 1886 $section = anchorify( $section ) if $section ne ""; 1887 ### print STDERR "...section=$section\n"; 1888 1889 # if there is a directory by the name of the page, then assume that an 1890 # appropriate section will exist in the subdirectory 1891# if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) { 1892 if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) { 1893 $link = "$Htmlroot/$1/$section.html"; 1894 ### print STDERR "...link=$link\n"; 1895 1896 # since there is no directory by the name of the page, the section will 1897 # have to exist within a .html of the same name. thus, make sure there 1898 # is a .pod or .pm that might become that .html 1899 } else { 1900 $section = "#$section" if $section; 1901 ### print STDERR "...section=$section\n"; 1902 1903 # check if there is a .pod with the page name. 1904 # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm) 1905 if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) { 1906 $link = "$Htmlroot/$1.html$section"; 1907 } else { 1908 $link = ""; 1909 } 1910 } 1911 } 1912 1913 if ($link) { 1914 # Here, we take advantage of the knowledge that $Htmlfileurl ne '' 1915 # implies $Htmlroot eq ''. This means that the link in question 1916 # needs a prefix of $Htmldir if it begins with '/'. The test for 1917 # the initial '/' is done to avoid '#'-only links, and to allow 1918 # for other kinds of links, like file:, ftp:, etc. 1919 my $url ; 1920 if ( $Htmlfileurl ne '' ) { 1921 $link = "$Htmldir$link" if $link =~ m{^/}s; 1922 $url = relativize_url( $link, $Htmlfileurl ); 1923# print( " b: [$link,$Htmlfileurl,$url]\n" ); 1924 } 1925 else { 1926 $url = $link ; 1927 } 1928 return $url; 1929 1930 } else { 1931 return undef(); 1932 } 1933} 1934 1935# 1936# relativize_url - convert an absolute URL to one relative to a base URL. 1937# Assumes both end in a filename. 1938# 1939sub relativize_url { 1940 my ($dest,$source) = @_ ; 1941 1942 my ($dest_volume,$dest_directory,$dest_file) = 1943 File::Spec::Unix->splitpath( $dest ) ; 1944 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ; 1945 1946 my ($source_volume,$source_directory,$source_file) = 1947 File::Spec::Unix->splitpath( $source ) ; 1948 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ; 1949 1950 my $rel_path = '' ; 1951 if ( $dest ne '' ) { 1952 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ; 1953 } 1954 1955 if ( $rel_path ne '' && 1956 substr( $rel_path, -1 ) ne '/' && 1957 substr( $dest_file, 0, 1 ) ne '#' 1958 ) { 1959 $rel_path .= "/$dest_file" ; 1960 } 1961 else { 1962 $rel_path .= "$dest_file" ; 1963 } 1964 1965 return $rel_path ; 1966} 1967 1968 1969# 1970# coderef - make URL from the text of a C<> 1971# 1972sub coderef($$){ 1973 my( $page, $item ) = @_; 1974 my( $url ); 1975 1976 my $fid = fragment_id( $item ); 1977 1978 if( defined( $page ) && $page ne "" ){ 1979 # we have been given a $page... 1980 $page =~ s{::}{/}g; 1981 1982 Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile") 1983 if !defined $fid; 1984 # Do we take it? Item could be a section! 1985 my $base = $Items{$fid} || ""; 1986 $base =~ s{[^/]*/}{}; 1987 if( $base ne "$page.html" ){ 1988 ### print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n"; 1989 $page = undef(); 1990 } 1991 1992 } else { 1993 # no page - local items precede cached items 1994 if( defined( $fid ) ){ 1995 if( exists $Local_Items{$fid} ){ 1996 $page = $Local_Items{$fid}; 1997 } else { 1998 $page = $Items{$fid}; 1999 } 2000 } 2001 } 2002 2003 # if there was a pod file that we found earlier with an appropriate 2004 # =item directive, then create a link to that page. 2005 if( defined $page ){ 2006 if( $page ){ 2007 if( exists $Pages{$page} and $Pages{$page} =~ /([^:]*)\.[^:.]*:/){ 2008 $page = $1 . '.html'; 2009 } 2010 my $link = "$Htmlroot/$page#" . anchorify($fid); 2011 2012 # Here, we take advantage of the knowledge that $Htmlfileurl 2013 # ne '' implies $Htmlroot eq ''. 2014 if ( $Htmlfileurl ne '' ) { 2015 $link = "$Htmldir$link" ; 2016 $url = relativize_url( $link, $Htmlfileurl ) ; 2017 } else { 2018 $url = $link ; 2019 } 2020 } else { 2021 $url = "#" . anchorify($fid); 2022 } 2023 2024 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/; 2025 } 2026 return( $url, $fid ); 2027} 2028 2029 2030 2031# 2032# Adapted from Nick Ing-Simmons' PodToHtml package. 2033sub relative_url { 2034 my $source_file = shift ; 2035 my $destination_file = shift; 2036 2037 my $source = URI::file->new_abs($source_file); 2038 my $uo = URI::file->new($destination_file,$source)->abs; 2039 return $uo->rel->as_string; 2040} 2041 2042 2043# 2044# finish_list - finish off any pending HTML lists. this should be called 2045# after the entire pod file has been read and converted. 2046# 2047sub finish_list { 2048 my $fh = shift; 2049 if( $Listlevel ){ 2050 warn "$0: $Podfile: unterminated list(s) at =head in paragraph $Paragraph. ignoring.\n" unless $Quiet; 2051 while( $Listlevel ){ 2052 process_back( $fh ); 2053 } 2054 } 2055} 2056 2057# 2058# htmlify - converts a pod section specification to a suitable section 2059# specification for HTML. Note that we keep spaces and special characters 2060# except ", ? (Netscape problem) and the hyphen (writer's problem...). 2061# 2062sub htmlify { 2063 my( $heading) = @_; 2064 $heading =~ s/(\s+)/ /g; 2065 $heading =~ s/\s+\Z//; 2066 $heading =~ s/\A\s+//; 2067 # The hyphen is a disgrace to the English language. 2068 # $heading =~ s/[-"?]//g; 2069 $heading =~ s/["?]//g; 2070 $heading = lc( $heading ); 2071 return $heading; 2072} 2073 2074# 2075# similar to htmlify, but turns non-alphanumerics into underscores 2076# 2077sub anchorify { 2078 my ($anchor) = @_; 2079 $anchor = htmlify($anchor); 2080 $anchor =~ s/\W/_/g; 2081 return $anchor; 2082} 2083 2084# 2085# depod - convert text by eliminating all interior sequences 2086# Note: can be called with copy or modify semantics 2087# 2088my %E2c; 2089$E2c{lt} = '<'; 2090$E2c{gt} = '>'; 2091$E2c{sol} = '/'; 2092$E2c{verbar} = '|'; 2093$E2c{amp} = '&'; # in Tk's pods 2094 2095sub depod1($;$$); 2096 2097sub depod($){ 2098 my $string; 2099 if( ref( $_[0] ) ){ 2100 $string = ${$_[0]}; 2101 ${$_[0]} = depod1( \$string ); 2102 } else { 2103 $string = $_[0]; 2104 depod1( \$string ); 2105 } 2106} 2107 2108sub depod1($;$$){ 2109 my( $rstr, $func, $closing ) = @_; 2110 my $res = ''; 2111 return $res unless defined $$rstr; 2112 if( ! defined( $func ) ){ 2113 # skip to next begin of an interior sequence 2114 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){ 2115 # recurse into its text 2116 $res .= $1 . depod1( $rstr, $2, closing $3); 2117 } 2118 $res .= $$rstr; 2119 } elsif( $func eq 'E' ){ 2120 # E<x> - convert to character 2121 $$rstr =~ s/^([^>]*)>//; 2122 $res .= $E2c{$1} || ""; 2123 } elsif( $func eq 'X' ){ 2124 # X<> - ignore 2125 $$rstr =~ s/^[^>]*>//; 2126 } elsif( $func eq 'Z' ){ 2127 # Z<> - empty 2128 $$rstr =~ s/^>//; 2129 } else { 2130 # all others: either recurse into new function or 2131 # terminate at closing angle bracket 2132 my $term = pattern $closing; 2133 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){ 2134 $res .= $1; 2135 last unless $3; 2136 $res .= depod1( $rstr, $3, closing $4 ); 2137 } 2138 ## If we're here and $2 ne '>': undelimited interior sequence. 2139 ## Ignored, as this is called without proper indication of where we are. 2140 ## Rely on process_text to produce diagnostics. 2141 } 2142 return $res; 2143} 2144 2145{ 2146 my %seen; # static fragment record hash 2147 2148sub fragment_id_readable { 2149 my $text = shift; 2150 my $generate = shift; # optional flag 2151 2152 my $orig = $text; 2153 2154 # leave the words for the fragment identifier, 2155 # change everything else to underbars. 2156 $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency. 2157 $text =~ s/_{2,}/_/g; 2158 $text =~ s/\A_//; 2159 $text =~ s/_\Z//; 2160 2161 unless ($text) 2162 { 2163 # Nothing left after removing punctuation, so leave it as is 2164 # E.g. if option is named: "=item -#" 2165 2166 $text = $orig; 2167 } 2168 2169 if ($generate) { 2170 if ( exists $seen{$text} ) { 2171 # This already exists, make it unique 2172 $seen{$text}++; 2173 $text = $text . $seen{$text}; 2174 } else { 2175 $seen{$text} = 1; # first time seen this fragment 2176 } 2177 } 2178 2179 $text; 2180}} 2181 2182my @HC; 2183sub fragment_id_obfuscated { # This was the old "_2d_2d__" 2184 my $text = shift; 2185 my $generate = shift; # optional flag 2186 2187 # text? Normalize by obfuscating the fragment id to make it unique 2188 $text =~ s/\s+/_/sg; 2189 2190 $text =~ s{(\W)}{ 2191 defined( $HC[ord($1)] ) ? $HC[ord($1)] 2192 : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe; 2193 $text = substr( $text, 0, 50 ); 2194 2195 $text; 2196} 2197 2198# 2199# fragment_id - construct a fragment identifier from: 2200# a) =item text 2201# b) contents of C<...> 2202# 2203 2204sub fragment_id { 2205 my $text = shift; 2206 my $generate = shift; # optional flag 2207 2208 $text =~ s/\s+\Z//s; 2209 if( $text ){ 2210 # a method or function? 2211 return $1 if $text =~ /(\w+)\s*\(/; 2212 return $1 if $text =~ /->\s*(\w+)\s*\(?/; 2213 2214 # a variable name? 2215 return $1 if $text =~ /^([\$\@%*]\S+)/; 2216 2217 # some pattern matching operator? 2218 return $1 if $text =~ m|^(\w+/).*/\w*$|; 2219 2220 # fancy stuff... like "do { }" 2221 return $1 if $text =~ m|^(\w+)\s*{.*}$|; 2222 2223 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...] 2224 # and some funnies with ... Module ... 2225 return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$}; 2226 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$}; 2227 2228 return fragment_id_readable($text, $generate); 2229 } else { 2230 return; 2231 } 2232} 2233 2234# 2235# make_URL_href - generate HTML href from URL 2236# Special treatment for CGI queries. 2237# 2238sub make_URL_href($){ 2239 my( $url ) = @_; 2240 if( $url !~ 2241 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){ 2242 $url = "<a href=\"$url\">$url</a>"; 2243 } 2244 return $url; 2245} 2246 22471; 2248