1package Pod::Html;
2use strict;
3require Exporter;
4
5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
6$VERSION = 1.11;
7@ISA = qw(Exporter);
8@EXPORT = qw(pod2html htmlify);
9@EXPORT_OK = qw(anchorify);
10
11use Carp;
12use Config;
13use Cwd;
14use File::Spec;
15use File::Spec::Unix;
16use Getopt::Long;
17
18use locale;	# make \w work right in non-ASCII lands
19
20=head1 NAME
21
22Pod::Html - module to convert pod files to HTML
23
24=head1 SYNOPSIS
25
26    use Pod::Html;
27    pod2html([options]);
28
29=head1 DESCRIPTION
30
31Converts files from pod format (see L<perlpod>) to HTML format.  It
32can automatically generate indexes and cross-references, and it keeps
33a cache of things it knows how to cross-reference.
34
35=head1 FUNCTIONS
36
37=head2 pod2html
38
39    pod2html("pod2html",
40             "--podpath=lib:ext:pod:vms",
41             "--podroot=/usr/src/perl",
42             "--htmlroot=/perl/nmanual",
43             "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
44             "--recurse",
45             "--infile=foo.pod",
46             "--outfile=/perl/nmanual/foo.html");
47
48pod2html takes the following arguments:
49
50=over 4
51
52=item backlink
53
54    --backlink="Back to Top"
55
56Adds "Back to Top" links in front of every C<head1> heading (except for
57the first).  By default, no backlinks are generated.
58
59=item cachedir
60
61    --cachedir=name
62
63Creates the item and directory caches in the given directory.
64
65=item css
66
67    --css=stylesheet
68
69Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
70C<style> attributes that are output by default (to avoid conflicts).
71
72=item flush
73
74    --flush
75
76Flushes the item and directory caches.
77
78=item header
79
80    --header
81    --noheader
82
83Creates header and footer blocks containing the text of the C<NAME>
84section.  By default, no headers are generated.
85
86=item help
87
88    --help
89
90Displays the usage message.
91
92=item hiddendirs
93
94    --hiddendirs
95    --nohiddendirs
96
97Include hidden directories in the search for POD's in podpath if recurse
98is set.
99The default is not to traverse any directory whose name begins with C<.>.
100See L</"podpath"> and L</"recurse">.
101
102[This option is for backward compatibility only.
103It's hard to imagine that one would usefully create a module with a
104name component beginning with C<.>.]
105
106=item htmldir
107
108    --htmldir=name
109
110Sets the directory in which the resulting HTML file is placed.  This
111is used to generate relative links to other files. Not passing this
112causes all links to be absolute, since this is the value that tells
113Pod::Html the root of the documentation tree.
114
115=item htmlroot
116
117    --htmlroot=name
118
119Sets the base URL for the HTML files.  When cross-references are made,
120the HTML root is prepended to the URL.
121
122=item index
123
124    --index
125    --noindex
126
127Generate an index at the top of the HTML file.  This is the default
128behaviour.
129
130=item infile
131
132    --infile=name
133
134Specify the pod file to convert.  Input is taken from STDIN if no
135infile is specified.
136
137=item libpods
138
139    --libpods=name:...:name
140
141List of page names (eg, "perlfunc") which contain linkable C<=item>s.
142
143=item netscape
144
145    --netscape
146    --nonetscape
147
148B<Deprecated>, has no effect. For backwards compatibility only.
149
150=item outfile
151
152    --outfile=name
153
154Specify the HTML file to create.  Output goes to STDOUT if no outfile
155is specified.
156
157=item podpath
158
159    --podpath=name:...:name
160
161Specify which subdirectories of the podroot contain pod files whose
162HTML converted forms can be linked to in cross references.
163
164=item podroot
165
166    --podroot=name
167
168Specify the base directory for finding library pods.
169
170=item quiet
171
172    --quiet
173    --noquiet
174
175Don't display I<mostly harmless> warning messages.  These messages
176will be displayed by default.  But this is not the same as C<verbose>
177mode.
178
179=item recurse
180
181    --recurse
182    --norecurse
183
184Recurse into subdirectories specified in podpath (default behaviour).
185
186=item title
187
188    --title=title
189
190Specify the title of the resulting HTML file.
191
192=item verbose
193
194    --verbose
195    --noverbose
196
197Display progress messages.  By default, they won't be displayed.
198
199=back
200
201=head2 htmlify
202
203    htmlify($heading);
204
205Converts a pod section specification to a suitable section specification
206for HTML. Note that we keep spaces and special characters except
207C<", ?> (Netscape problem) and the hyphen (writer's problem...).
208
209=head2 anchorify
210
211    anchorify(@heading);
212
213Similar to C<htmlify()>, but turns non-alphanumerics into underscores.  Note
214that C<anchorify()> is not exported by default.
215
216=head1 ENVIRONMENT
217
218Uses C<$Config{pod2html}> to setup default options.
219
220=head1 AUTHOR
221
222Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
223
224=head1 SEE ALSO
225
226L<perlpod>
227
228=head1 COPYRIGHT
229
230This program is distributed under the Artistic License.
231
232=cut
233
234my($Cachedir);
235my($Dircache, $Itemcache);
236my @Begin_Stack;
237my @Libpods;
238my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
239my($Podfile, @Podpath, $Podroot);
240my $Css;
241
242my $Recurse;
243my $Quiet;
244my $HiddenDirs;
245my $Verbose;
246my $Doindex;
247
248my $Backlink;
249my($Listlevel, @Listtype);
250my $ListNewTerm;
251use vars qw($Ignore);  # need to localize it later.
252
253my(%Items_Named, @Items_Seen);
254my($Title, $Header);
255
256my $Top;
257my $Paragraph;
258
259my %Sections;
260
261# Caches
262my %Pages = ();			# associative array used to find the location
263				#   of pages referenced by L<> links.
264my %Items = ();			# associative array used to find the location
265				#   of =item directives referenced by C<> links
266
267my %Local_Items;
268my $Is83;
269
270my $Curdir = File::Spec->curdir;
271
272init_globals();
273
274sub init_globals {
275    $Cachedir = ".";		# The directory to which item and directory
276				# caches will be written.
277
278    $Dircache = "pod2htmd.tmp";
279    $Itemcache = "pod2htmi.tmp";
280
281    @Begin_Stack = ();		# begin/end stack
282
283    @Libpods = ();	    	# files to search for links from C<> directives
284    $Htmlroot = "/";	    	# http-server base directory from which all
285				#   relative paths in $podpath stem.
286    $Htmldir = "";	    	# The directory to which the html pages
287				# will (eventually) be written.
288    $Htmlfile = "";		# write to stdout by default
289    $Htmlfileurl = "";		# The url that other files would use to
290				# refer to this file.  This is only used
291				# to make relative urls that point to
292				# other files.
293
294    $Podfile = "";		# read from stdin by default
295    @Podpath = ();		# list of directories containing library pods.
296    $Podroot = $Curdir;	        # filesystem base directory from which all
297				#   relative paths in $podpath stem.
298    $Css = '';                  # Cascading style sheet
299    $Recurse = 1;		# recurse on subdirectories in $podpath.
300    $Quiet = 0;		        # not quiet by default
301    $Verbose = 0;		# not verbose by default
302    $Doindex = 1;   	    	# non-zero if we should generate an index
303    $Backlink = '';		# text for "back to top" links
304    $Listlevel = 0;		# current list depth
305    @Listtype = ();		# list types for open lists
306    $ListNewTerm = 0;		# indicates new term in definition list; used
307    				# to correctly open/close <dd> tags
308    $Ignore = 1;		# whether or not to format text.  we don't
309				#   format text until we hit our first pod
310				#   directive.
311
312    @Items_Seen = ();	        # for multiples of the same item in perlfunc
313    %Items_Named = ();
314    $Header = 0;		# produce block header/footer
315    $Title = '';		# title to give the pod(s)
316    $Top = 1;			# true if we are at the top of the doc.  used
317				#   to prevent the first <hr /> directive.
318    $Paragraph = '';		# which paragraph we're processing (used
319				#   for error messages)
320    %Sections = ();		# sections within this page
321
322    %Local_Items = ();
323    $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
324}
325
326#
327# clean_data: global clean-up of pod data
328#
329sub clean_data($){
330    my( $dataref ) = @_;
331    for my $i ( 0..$#{$dataref} ) {
332	${$dataref}[$i] =~ s/\s+\Z//;
333
334        # have a look for all-space lines
335      if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
336	    my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
337	    splice( @$dataref, $i, 1, @chunks );
338	}
339    }
340}
341
342
343sub pod2html {
344    local(@ARGV) = @_;
345    local($/);
346    local $_;
347
348    init_globals();
349
350    $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
351
352    # cache of %Pages and %Items from last time we ran pod2html
353
354    #undef $opt_help if defined $opt_help;
355
356    # parse the command-line parameters
357    parse_command_line();
358
359    # escape the backlink argument (same goes for title but is done later...)
360    $Backlink = html_escape($Backlink) if defined $Backlink;
361
362    # set some variables to their default values if necessary
363    my $pod;
364    unless (@ARGV && $ARGV[0]) {
365	if ($Podfile and $Podfile ne '-') {
366	    open $pod, '<', $Podfile
367		or die "$0: cannot open $Podfile file for input: $!\n";
368	} else {
369	    open $pod, '-';
370	}
371    } else {
372	$Podfile = $ARGV[0];  # XXX: might be more filenames
373	$pod = *ARGV;
374    }
375    $Htmlfile = "-" unless $Htmlfile;	# stdout
376    $Htmlroot = "" if $Htmlroot eq "/";	# so we don't get a //
377    $Htmldir =~ s#/\z## ;               # so we don't get a //
378    if (  $Htmlroot eq ''
379       && defined( $Htmldir )
380       && $Htmldir ne ''
381       && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
382       )
383    {
384	# Set the 'base' url for this file, so that we can use it
385	# as the location from which to calculate relative links
386	# to other files. If this is '', then absolute links will
387	# be used throughout.
388        $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
389    }
390
391    # read the pod a paragraph at a time
392    warn "Scanning for sections in input file(s)\n" if $Verbose;
393    $/ = "";
394    my @poddata  = <$pod>;
395    close $pod;
396
397    # be eol agnostic
398    for (@poddata) {
399	if (/\r/) {
400	    if (/\r\n/) {
401		@poddata = map { s/\r\n/\n/g;
402				 /\n\n/ ?
403				     map { "$_\n\n" } split /\n\n/ :
404				     $_ } @poddata;
405	    } else {
406		@poddata = map { s/\r/\n/g;
407				 /\n\n/ ?
408				     map { "$_\n\n" } split /\n\n/ :
409				     $_ } @poddata;
410	    }
411	    last;
412	}
413    }
414
415    clean_data( \@poddata );
416
417    # scan the pod for =head[1-6] directives and build an index
418    my $index = scan_headings(\%Sections, @poddata);
419
420    unless($index) {
421	warn "No headings in $Podfile\n" if $Verbose;
422    }
423
424    # open the output file
425    my $html;
426    if($Htmlfile and $Htmlfile ne '-') {
427        open $html, ">", $Htmlfile
428            or die "$0: cannot open $Htmlfile file for output: $!\n";
429    } else {
430        open $html, ">-";
431    }
432
433    # put a title in the HTML file if one wasn't specified
434    if ($Title eq '') {
435	TITLE_SEARCH: {
436 	    for (my $i = 0; $i < @poddata; $i++) {
437		if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
438 		    for my $para ( @poddata[$i, $i+1] ) {
439			last TITLE_SEARCH
440			    if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
441		    }
442		}
443
444	    }
445	}
446    }
447    if (!$Title and $Podfile =~ /\.pod\z/) {
448	# probably a split pod so take first =head[12] as title
449 	for (my $i = 0; $i < @poddata; $i++) {
450	    last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
451	}
452	warn "adopted '$Title' as title for $Podfile\n"
453	    if $Verbose and $Title;
454    }
455    if ($Title) {
456	$Title =~ s/\s*\(.*\)//;
457    } else {
458	warn "$0: no title for $Podfile.\n" unless $Quiet;
459	$Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
460	$Title = ($Podfile eq "-" ? 'No Title' : $1);
461	warn "using $Title" if $Verbose;
462    }
463    $Title = html_escape($Title);
464
465    my $csslink = '';
466    my $bodystyle = ' style="background-color: white"';
467    my $tdstyle = ' style="background-color: #cccccc"';
468
469    if ($Css) {
470      $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
471      $csslink =~ s,\\,/,g;
472      $csslink =~ s,(/.):,$1|,;
473      $bodystyle = '';
474      $tdstyle = '';
475    }
476
477      my $block = $Header ? <<END_OF_BLOCK : '';
478<table border="0" width="100%" cellspacing="0" cellpadding="3">
479<tr><td class="block"$tdstyle valign="middle">
480<big><strong><span class="block">&nbsp;$Title</span></strong></big>
481</td></tr>
482</table>
483END_OF_BLOCK
484
485    print $html <<END_OF_HEAD;
486<?xml version="1.0" ?>
487<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
488<html xmlns="http://www.w3.org/1999/xhtml">
489<head>
490<title>$Title</title>$csslink
491<meta http-equiv="content-type" content="text/html; charset=utf-8" />
492<link rev="made" href="mailto:$Config{perladmin}" />
493</head>
494
495<body$bodystyle>
496$block
497END_OF_HEAD
498
499    # load/reload/validate/cache %Pages and %Items
500    get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse);
501
502    # scan the pod for =item directives
503    scan_items( \%Local_Items, "", @poddata);
504
505    # put an index at the top of the file.  note, if $Doindex is 0 we
506    # still generate an index, but surround it with an html comment.
507    # that way some other program can extract it if desired.
508    $index =~ s/--+/-/g;
509
510    my $hr = ($Doindex and $index) ? qq(<hr name="index" />) : "";
511
512    unless ($Doindex)
513    {
514        $index = qq(<!--\n$index\n-->\n);
515    }
516
517    print $html <<"END_OF_INDEX";
518
519<!-- INDEX BEGIN -->
520<div name="index">
521<p><a name=\"__index__\"></a></p>
522$index
523$hr
524</div>
525<!-- INDEX END -->
526
527END_OF_INDEX
528
529    # now convert this file
530    my $after_item;             # set to true after an =item
531    warn "Converting input file $Podfile\n" if $Verbose;
532    foreach my $i (0..$#poddata){
533	$_ = $poddata[$i];
534	$Paragraph = $i+1;
535	if (/^(=.*)/s) {	# is it a pod directive?
536	    $Ignore = 0;
537	    $after_item = 0;
538	    $_ = $1;
539	    if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
540		process_begin($html, $1, $2);
541	    } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
542		process_end($1, $2);
543	    } elsif (/^=cut/) {			# =cut
544		process_cut();
545	    } elsif (/^=pod/) {			# =pod
546		process_pod();
547	    } else {
548		next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
549
550		if (/^=(head[1-6])\s+(.*\S)/s) {	# =head[1-6] heading
551		    process_head( $html, $1, $2, $Doindex && $index );
552		} elsif (/^=item\s*(.*\S)?/sm) {	# =item text
553		    process_item( $html, $1 );
554		    $after_item = 1;
555		} elsif (/^=over\s*(.*)/) {		# =over N
556		    process_over();
557		} elsif (/^=back/) {		# =back
558		    process_back( $html );
559		} elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
560		    process_for( $html, $1, $2 );
561		} else {
562		    /^=(\S*)\s*/;
563		    warn "$0: $Podfile: unknown pod directive '$1' in "
564		       . "paragraph $Paragraph.  ignoring.\n" unless $Quiet;
565		}
566	    }
567	    $Top = 0;
568	}
569	else {
570	    next if $Ignore;
571	    if (@Begin_Stack) {
572		print $html $_ if $Begin_Stack[-1] eq 'html';
573		next;
574	    }
575	    my $text = $_;
576
577	    # Open tag for definition list as we have something to put in it
578	    if( $ListNewTerm ){
579		print $html "<dd>\n";
580		$ListNewTerm = 0;
581	    }
582
583	    if( $text =~ /\A\s+/ ){
584		process_pre( \$text );
585	        print $html "<pre>\n$text</pre>\n";
586
587	    } else {
588		process_text( \$text );
589
590		# experimental: check for a paragraph where all lines
591		# have some ...\t...\t...\n pattern
592		if( $text =~ /\t/ ){
593		    my @lines = split( "\n", $text );
594		    if( @lines > 1 ){
595			my $all = 2;
596			foreach my $line ( @lines ){
597			    if( $line =~ /\S/ && $line !~ /\t/ ){
598				$all--;
599				last if $all == 0;
600			    }
601			}
602			if( $all > 0 ){
603			    $text =~ s/\t+/<td>/g;
604			    $text =~ s/^/<tr><td>/gm;
605			    $text = '<table cellspacing="0" cellpadding="0">' .
606                                    $text . '</table>';
607			}
608		    }
609		}
610		## end of experimental
611
612		print $html "<p>$text</p>\n";
613	    }
614	    $after_item = 0;
615	}
616    }
617
618    # finish off any pending directives
619    finish_list( $html );
620
621    # link to page index
622    print $html "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
623	if $Doindex and $index and $Backlink;
624
625    print $html <<END_OF_TAIL;
626$block
627</body>
628
629</html>
630END_OF_TAIL
631
632    # close the html file
633    close $html or die "Failed to close $Htmlfile: $!";
634
635    warn "Finished\n" if $Verbose;
636}
637
638##############################################################################
639
640sub usage {
641    my $podfile = shift;
642    warn "$0: $podfile: @_\n" if @_;
643    die <<END_OF_USAGE;
644Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
645           --podpath=<name>:...:<name> --podroot=<name>
646           --libpods=<name>:...:<name> --recurse --verbose --index
647           --netscape --norecurse --noindex --cachedir=<name>
648
649  --backlink     - set text for "back to top" links (default: none).
650  --cachedir     - directory for the item and directory cache files.
651  --css          - stylesheet URL
652  --flush        - flushes the item and directory caches.
653  --[no]header   - produce block header/footer (default is no headers).
654  --help         - prints this message.
655  --hiddendirs   - search hidden directories in podpath
656  --htmldir      - directory for resulting HTML files.
657  --htmlroot     - http-server base directory from which all relative paths
658                   in podpath stem (default is /).
659  --[no]index    - generate an index at the top of the resulting html
660                   (default behaviour).
661  --infile       - filename for the pod to convert (input taken from stdin
662                   by default).
663  --libpods      - colon-separated list of pages to search for =item pod
664                   directives in as targets of C<> and implicit links (empty
665                   by default).  note, these are not filenames, but rather
666                   page names like those that appear in L<> links.
667  --outfile      - filename for the resulting html file (output sent to
668                   stdout by default).
669  --podpath      - colon-separated list of directories containing library
670                   pods (empty by default).
671  --podroot      - filesystem base directory from which all relative paths
672                   in podpath stem (default is .).
673  --[no]quiet    - suppress some benign warning messages (default is off).
674  --[no]recurse  - recurse on those subdirectories listed in podpath
675                   (default behaviour).
676  --title        - title that will appear in resulting html file.
677  --[no]verbose  - self-explanatory (off by default).
678  --[no]netscape - deprecated, has no effect. for backwards compatibility only.
679
680END_OF_USAGE
681
682}
683
684sub parse_command_line {
685    my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
686	$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
687	$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
688	$opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
689
690    unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
691    my $result = GetOptions(
692			    'backlink=s' => \$opt_backlink,
693			    'cachedir=s' => \$opt_cachedir,
694			    'css=s'      => \$opt_css,
695			    'flush'      => \$opt_flush,
696			    'header!'    => \$opt_header,
697			    'help'       => \$opt_help,
698			    'hiddendirs!'=> \$opt_hiddendirs,
699			    'htmldir=s'  => \$opt_htmldir,
700			    'htmlroot=s' => \$opt_htmlroot,
701			    'index!'     => \$opt_index,
702			    'infile=s'   => \$opt_infile,
703			    'libpods=s'  => \$opt_libpods,
704			    'netscape!'  => \$opt_netscape,
705			    'outfile=s'  => \$opt_outfile,
706			    'podpath=s'  => \$opt_podpath,
707			    'podroot=s'  => \$opt_podroot,
708			    'quiet!'     => \$opt_quiet,
709			    'recurse!'   => \$opt_recurse,
710			    'title=s'    => \$opt_title,
711			    'verbose!'   => \$opt_verbose,
712			   );
713    usage("-", "invalid parameters") if not $result;
714
715    usage("-") if defined $opt_help;	# see if the user asked for help
716    $opt_help = "";			# just to make -w shut-up.
717
718    @Podpath  = split(":", $opt_podpath) if defined $opt_podpath;
719    @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
720
721    $Backlink = $opt_backlink if defined $opt_backlink;
722    $Cachedir = $opt_cachedir if defined $opt_cachedir;
723    $Css      = $opt_css      if defined $opt_css;
724    $Header   = $opt_header   if defined $opt_header;
725    $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
726    $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
727    $Doindex  = $opt_index    if defined $opt_index;
728    $Podfile  = $opt_infile   if defined $opt_infile;
729    $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
730    $Htmlfile = $opt_outfile  if defined $opt_outfile;
731    $Podroot  = $opt_podroot  if defined $opt_podroot;
732    $Quiet    = $opt_quiet    if defined $opt_quiet;
733    $Recurse  = $opt_recurse  if defined $opt_recurse;
734    $Title    = $opt_title    if defined $opt_title;
735    $Verbose  = $opt_verbose  if defined $opt_verbose;
736
737    warn "Flushing item and directory caches\n"
738	if $opt_verbose && defined $opt_flush;
739    $Dircache = "$Cachedir/pod2htmd.tmp";
740    $Itemcache = "$Cachedir/pod2htmi.tmp";
741    if (defined $opt_flush) {
742	1 while unlink($Dircache, $Itemcache);
743    }
744}
745
746
747my $Saved_Cache_Key;
748
749sub get_cache {
750    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
751    my @cache_key_args = @_;
752
753    # A first-level cache:
754    # Don't bother reading the cache files if they still apply
755    # and haven't changed since we last read them.
756
757    my $this_cache_key = cache_key(@cache_key_args);
758
759    return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
760
761    # load the cache of %Pages and %Items if possible.  $tests will be
762    # non-zero if successful.
763    my $tests = 0;
764    if (-f $dircache && -f $itemcache) {
765	warn "scanning for item cache\n" if $Verbose;
766	$tests = load_cache($dircache, $itemcache, $podpath, $podroot);
767    }
768
769    # if we didn't succeed in loading the cache then we must (re)build
770    #  %Pages and %Items.
771    if (!$tests) {
772	warn "scanning directories in pod-path\n" if $Verbose;
773	scan_podpath($podroot, $recurse, 0);
774    }
775    $Saved_Cache_Key = cache_key(@cache_key_args);
776}
777
778sub cache_key {
779    my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
780    return join('!', $dircache, $itemcache, $recurse,
781	@$podpath, $podroot, stat($dircache), stat($itemcache));
782}
783
784#
785# load_cache - tries to find if the caches stored in $dircache and $itemcache
786#  are valid caches of %Pages and %Items.  if they are valid then it loads
787#  them and returns a non-zero value.
788#
789sub load_cache {
790    my($dircache, $itemcache, $podpath, $podroot) = @_;
791    my($tests);
792    local $_;
793
794    $tests = 0;
795
796    open(CACHE, "<$itemcache") ||
797	die "$0: error opening $itemcache for reading: $!\n";
798    $/ = "\n";
799
800    # is it the same podpath?
801    $_ = <CACHE>;
802    chomp($_);
803    $tests++ if (join(":", @$podpath) eq $_);
804
805    # is it the same podroot?
806    $_ = <CACHE>;
807    chomp($_);
808    $tests++ if ($podroot eq $_);
809
810    # load the cache if its good
811    if ($tests != 2) {
812	close(CACHE);
813	return 0;
814    }
815
816    warn "loading item cache\n" if $Verbose;
817    while (<CACHE>) {
818	/(.*?) (.*)$/;
819	$Items{$1} = $2;
820    }
821    close(CACHE);
822
823    warn "scanning for directory cache\n" if $Verbose;
824    open(CACHE, "<$dircache") ||
825	die "$0: error opening $dircache for reading: $!\n";
826    $/ = "\n";
827    $tests = 0;
828
829    # is it the same podpath?
830    $_ = <CACHE>;
831    chomp($_);
832    $tests++ if (join(":", @$podpath) eq $_);
833
834    # is it the same podroot?
835    $_ = <CACHE>;
836    chomp($_);
837    $tests++ if ($podroot eq $_);
838
839    # load the cache if its good
840    if ($tests != 2) {
841	close(CACHE);
842	return 0;
843    }
844
845    warn "loading directory cache\n" if $Verbose;
846    while (<CACHE>) {
847	/(.*?) (.*)$/;
848	$Pages{$1} = $2;
849    }
850
851    close(CACHE);
852
853    return 1;
854}
855
856#
857# scan_podpath - scans the directories specified in @podpath for directories,
858#  .pod files, and .pm files.  it also scans the pod files specified in
859#  @Libpods for =item directives.
860#
861sub scan_podpath {
862    my($podroot, $recurse, $append) = @_;
863    my($pwd, $dir);
864    my($libpod, $dirname, $pod, @files, @poddata);
865
866    unless($append) {
867	%Items = ();
868	%Pages = ();
869    }
870
871    # scan each directory listed in @Podpath
872    $pwd = getcwd();
873    chdir($podroot)
874	|| die "$0: error changing to directory $podroot: $!\n";
875    foreach $dir (@Podpath) {
876	scan_dir($dir, $recurse);
877    }
878
879    # scan the pods listed in @Libpods for =item directives
880    foreach $libpod (@Libpods) {
881	# if the page isn't defined then we won't know where to find it
882	# on the system.
883	next unless defined $Pages{$libpod} && $Pages{$libpod};
884
885	# if there is a directory then use the .pod and .pm files within it.
886	# NOTE: Only finds the first so-named directory in the tree.
887#	if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
888	if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
889	    #  find all the .pod and .pm files within the directory
890	    $dirname = $1;
891	    opendir(DIR, $dirname) ||
892		die "$0: error opening directory $dirname: $!\n";
893	    @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
894	    closedir(DIR);
895
896	    # scan each .pod and .pm file for =item directives
897	    foreach $pod (@files) {
898		open my $fh, '<', "$dirname/$pod"
899		    or die "$0: error opening $dirname/$pod for input: $!\n";
900		@poddata = <$fh>;
901		close $fh;
902		clean_data( \@poddata );
903
904		scan_items( \%Items, "$dirname/$pod", @poddata);
905	    }
906
907	    # use the names of files as =item directives too.
908### Don't think this should be done this way - confuses issues.(WL)
909###	    foreach $pod (@files) {
910###		$pod =~ /^(.*)(\.pod|\.pm)$/;
911###		$Items{$1} = "$dirname/$1.html" if $1;
912###	    }
913	} elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
914		 $Pages{$libpod} =~ /([^:]*\.pm):/) {
915	    # scan the .pod or .pm file for =item directives
916	    $pod = $1;
917	    open my $fh, '<', $pod
918		or die "$0: error opening $pod for input: $!\n";
919	    @poddata = <$fh>;
920	    close $fh;
921	    clean_data( \@poddata );
922
923	    scan_items( \%Items, "$pod", @poddata);
924	} else {
925	    warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
926	}
927    }
928    @poddata = ();	# clean-up a bit
929
930    chdir($pwd)
931	|| die "$0: error changing to directory $pwd: $!\n";
932
933    # cache the item list for later use
934    warn "caching items for later use\n" if $Verbose;
935    open my $cache, '>', $Itemcache
936	or die "$0: error open $Itemcache for writing: $!\n";
937
938    print $cache join(":", @Podpath) . "\n$podroot\n";
939    foreach my $key (keys %Items) {
940	print $cache "$key $Items{$key}\n";
941    }
942
943    close $cache or die "error closing $Itemcache: $!";
944
945    # cache the directory list for later use
946    warn "caching directories for later use\n" if $Verbose;
947    open $cache, '>', $Dircache
948	or die "$0: error open $Dircache for writing: $!\n";
949
950    print $cache join(":", @Podpath) . "\n$podroot\n";
951    foreach my $key (keys %Pages) {
952	print $cache "$key $Pages{$key}\n";
953    }
954
955    close $cache or die "error closing $Dircache: $!";
956}
957
958#
959# scan_dir - scans the directory specified in $dir for subdirectories, .pod
960#  files, and .pm files.  notes those that it finds.  this information will
961#  be used later in order to figure out where the pages specified in L<>
962#  links are on the filesystem.
963#
964sub scan_dir {
965    my($dir, $recurse) = @_;
966    my($t, @subdirs, @pods, $pod, $dirname, @dirs);
967    local $_;
968
969    @subdirs = ();
970    @pods = ();
971
972    opendir(DIR, $dir) ||
973	die "$0: error opening directory $dir: $!\n";
974    while (defined($_ = readdir(DIR))) {
975	if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
976	    && ($HiddenDirs || !/^\./)
977	) {         # directory
978	    $Pages{$_}  = "" unless defined $Pages{$_};
979	    $Pages{$_} .= "$dir/$_:";
980	    push(@subdirs, $_);
981	} elsif (/\.pod\z/) {	    	    	    	    # .pod
982	    s/\.pod\z//;
983	    $Pages{$_}  = "" unless defined $Pages{$_};
984	    $Pages{$_} .= "$dir/$_.pod:";
985	    push(@pods, "$dir/$_.pod");
986	} elsif (/\.html\z/) { 	    	    	    	    # .html
987	    s/\.html\z//;
988	    $Pages{$_}  = "" unless defined $Pages{$_};
989	    $Pages{$_} .= "$dir/$_.pod:";
990	} elsif (/\.pm\z/) { 	    	    	    	    # .pm
991	    s/\.pm\z//;
992	    $Pages{$_}  = "" unless defined $Pages{$_};
993	    $Pages{$_} .= "$dir/$_.pm:";
994	    push(@pods, "$dir/$_.pm");
995	} elsif (-T "$dir/$_") {			    # script(?)
996	    local *F;
997	    if (open(F, "$dir/$_")) {
998		my $line;
999		while (defined($line = <F>)) {
1000		    if ($line =~ /^=(?:pod|head1)/) {
1001			$Pages{$_}  = "" unless defined $Pages{$_};
1002			$Pages{$_} .= "$dir/$_.pod:";
1003			last;
1004		    }
1005		}
1006		close(F);
1007	    }
1008	}
1009    }
1010    closedir(DIR);
1011
1012    # recurse on the subdirectories if necessary
1013    if ($recurse) {
1014	foreach my $subdir (@subdirs) {
1015	    scan_dir("$dir/$subdir", $recurse);
1016	}
1017    }
1018}
1019
1020#
1021# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1022#  build an index.
1023#
1024sub scan_headings {
1025    my($sections, @data) = @_;
1026    my($tag, $which_head, $otitle, $listdepth, $index);
1027
1028    local $Ignore = 0;
1029
1030    $listdepth = 0;
1031    $index = "";
1032
1033    # scan for =head directives, note their name, and build an index
1034    #  pointing to each of them.
1035    foreach my $line (@data) {
1036      if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1037        ($tag, $which_head, $otitle) = ($1,$2,$3);
1038
1039        my $title = depod( $otitle );
1040        my $name = anchorify( $title );
1041        $$sections{$name} = 1;
1042        $title = process_text( \$otitle );
1043
1044	    while ($which_head != $listdepth) {
1045		if ($which_head > $listdepth) {
1046		    $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1047		    $listdepth++;
1048		} elsif ($which_head < $listdepth) {
1049		    $listdepth--;
1050		    $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1051		}
1052	    }
1053
1054	    $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1055	              "<a href=\"#" . $name . "\">" .
1056		      $title . "</a></li>";
1057	}
1058    }
1059
1060    # finish off the lists
1061    while ($listdepth--) {
1062	$index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1063    }
1064
1065    # get rid of bogus lists
1066    $index =~ s,\t*<ul>\s*</ul>\n,,g;
1067
1068    return $index;
1069}
1070
1071#
1072# scan_items - scans the pod specified by $pod for =item directives.  we
1073#  will use this information later on in resolving C<> links.
1074#
1075sub scan_items {
1076    my( $itemref, $pod, @poddata ) = @_;
1077    my($i, $item);
1078    local $_;
1079
1080    $pod =~ s/\.pod\z//;
1081    $pod .= ".html" if $pod;
1082
1083    foreach $i (0..$#poddata) {
1084	my $txt = depod( $poddata[$i] );
1085
1086	# figure out what kind of item it is.
1087	# Build string for referencing this item.
1088	if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bulleted list
1089	    next unless $1;
1090	    $item = $1;
1091        } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1092	    $item = $1;
1093	} elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # definition list
1094	    $item = $1;
1095	} else {
1096	    next;
1097	}
1098	my $fid = fragment_id( $item );
1099	$$itemref{$fid} = "$pod" if $fid;
1100    }
1101}
1102
1103#
1104# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1105#
1106sub process_head {
1107    my($fh, $tag, $heading, $hasindex) = @_;
1108
1109    # figure out the level of the =head
1110    $tag =~ /head([1-6])/;
1111    my $level = $1;
1112
1113    finish_list( $fh );
1114
1115    print $fh "<p>\n";
1116    if( $level == 1 && ! $Top ){
1117      print $fh "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1118        if $hasindex and $Backlink;
1119      print $fh "</p>\n<hr />\n"
1120    } else {
1121      print $fh "</p>\n";
1122    }
1123
1124    my $name = anchorify( depod( $heading ) );
1125    my $convert = process_text( \$heading );
1126    print $fh "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1127}
1128
1129
1130#
1131# emit_item_tag - print an =item's text
1132# Note: The global $EmittedItem is used for inhibiting self-references.
1133#
1134my $EmittedItem;
1135
1136sub emit_item_tag {
1137    my( $fh, $otext, $text, $compact ) = @_;
1138    my $item = fragment_id( depod($text) , -generate);
1139    Carp::confess("Undefined fragment '$text' (".depod($text).") from fragment_id() in emit_item_tag() in $Podfile")
1140        if !defined $item;
1141    $EmittedItem = $item;
1142    ### print STDERR "emit_item_tag=$item ($text)\n";
1143
1144    print $fh '<strong>';
1145    if ($Items_Named{$item}++) {
1146	print $fh process_text( \$otext );
1147    } else {
1148        my $name = $item;
1149        $name = anchorify($name);
1150	print $fh qq{<a name="$name" class="item">}, process_text( \$otext ), '</a>';
1151    }
1152    print $fh "</strong>";
1153    undef( $EmittedItem );
1154}
1155
1156sub new_listitem {
1157    my ($fh, $tag) = @_;
1158    # Open tag for definition list as we have something to put in it
1159    if( ($tag ne 'dl') && ($ListNewTerm) ){
1160	print $fh "<dd>\n";
1161	$ListNewTerm = 0;
1162    }
1163
1164    if( $Items_Seen[$Listlevel]++ == 0 ){
1165	# start of new list
1166	push( @Listtype, "$tag" );
1167	print $fh "<$tag>\n";
1168    } else {
1169	# if this is not the first item, close the previous one
1170	if ( $tag eq 'dl' ){
1171	    print $fh "</dd>\n" unless $ListNewTerm;
1172	} else {
1173	    print $fh "</li>\n";
1174	}
1175    }
1176    my $opentag = $tag eq 'dl' ? 'dt' : 'li';
1177    print $fh "<$opentag>";
1178}
1179
1180#
1181# process_item - convert a pod item tag and convert it to HTML format.
1182#
1183sub process_item {
1184    my ($fh, $otext) = @_;
1185
1186    # lots of documents start a list without doing an =over.  this is
1187    # bad!  but, the proper thing to do seems to be to just assume
1188    # they did do an =over.  so warn them once and then continue.
1189    if( $Listlevel == 0 ){
1190	warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1191	process_over();
1192    }
1193
1194    # remove formatting instructions from the text
1195    my $text = depod( $otext );
1196
1197    # all the list variants:
1198    if( $text =~ /\A\*/ ){ # bullet
1199        new_listitem( $fh, 'ul' );
1200        if ($text =~ /\A\*\s+(\S.*)\Z/s ) { # with additional text
1201            my $tag = $1;
1202            $otext =~ s/\A\*\s+//;
1203            emit_item_tag( $fh, $otext, $tag, 1 );
1204            print $fh "\n";
1205        }
1206
1207    } elsif( $text =~ /\A\d+/ ){ # numbered list
1208        new_listitem( $fh, 'ol' );
1209        if ($text =~ /\A(?>\d+\.?)\s*(\S.*)\Z/s ) { # with additional text
1210            my $tag = $1;
1211            $otext =~ s/\A\d+\.?\s*//;
1212            emit_item_tag( $fh, $otext, $tag, 1 );
1213            print $fh "\n";
1214        }
1215
1216    } else {			# definition list
1217        # new_listitem takes care of opening the <dt> tag
1218        new_listitem( $fh, 'dl' );
1219        if ($text =~ /\A(.+)\Z/s ){ # should have text
1220            emit_item_tag( $fh, $otext, $text, 1 );
1221	    # write the definition term and close <dt> tag
1222	    print $fh "</dt>\n";
1223        }
1224        # trigger opening a <dd> tag for the actual definition; will not
1225        # happen if next paragraph is also a definition term (=item)
1226        $ListNewTerm = 1;
1227    }
1228    print $fh "\n";
1229}
1230
1231#
1232# process_over - process a pod over tag and start a corresponding HTML list.
1233#
1234sub process_over {
1235    # start a new list
1236    $Listlevel++;
1237    push( @Items_Seen, 0 );
1238}
1239
1240#
1241# process_back - process a pod back tag and convert it to HTML format.
1242#
1243sub process_back {
1244    my $fh = shift;
1245    if( $Listlevel == 0 ){
1246	warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1247	return;
1248    }
1249
1250    # close off the list.  note, I check to see if $Listtype[$Listlevel] is
1251    # defined because an =item directive may have never appeared and thus
1252    # $Listtype[$Listlevel] may have never been initialized.
1253    $Listlevel--;
1254    if( defined $Listtype[$Listlevel] ){
1255        if ( $Listtype[$Listlevel] eq 'dl' ){
1256            print $fh "</dd>\n" unless $ListNewTerm;
1257        } else {
1258            print $fh "</li>\n";
1259        }
1260        print $fh "</$Listtype[$Listlevel]>\n";
1261        pop( @Listtype );
1262        $ListNewTerm = 0;
1263    }
1264
1265    # clean up item count
1266    pop( @Items_Seen );
1267}
1268
1269#
1270# process_cut - process a pod cut tag, thus start ignoring pod directives.
1271#
1272sub process_cut {
1273    $Ignore = 1;
1274}
1275
1276#
1277# process_pod - process a pod tag, thus stop ignoring pod directives
1278# until we see a corresponding cut.
1279#
1280sub process_pod {
1281    # no need to set $Ignore to 0 cause the main loop did it
1282}
1283
1284#
1285# process_for - process a =for pod tag.  if it's for html, spit
1286# it out verbatim, if illustration, center it, otherwise ignore it.
1287#
1288sub process_for {
1289    my ($fh, $whom, $text) = @_;
1290    if ( $whom =~ /^(pod2)?html$/i) {
1291	print $fh $text;
1292    } elsif ($whom =~ /^illustration$/i) {
1293        1 while chomp $text;
1294	for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1295	  $text .= $ext, last if -r "$text$ext";
1296	}
1297        print $fh qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1298    }
1299}
1300
1301#
1302# process_begin - process a =begin pod tag.  this pushes
1303# whom we're beginning on the begin stack.  if there's a
1304# begin stack, we only print if it us.
1305#
1306sub process_begin {
1307    my ($fh, $whom, $text) = @_;
1308    $whom = lc($whom);
1309    push (@Begin_Stack, $whom);
1310    if ( $whom =~ /^(pod2)?html$/) {
1311	print $fh $text if $text;
1312    }
1313}
1314
1315#
1316# process_end - process a =end pod tag.  pop the
1317# begin stack.  die if we're mismatched.
1318#
1319sub process_end {
1320    my($whom, $text) = @_;
1321    $whom = lc($whom);
1322    if (!defined $Begin_Stack[-1] or $Begin_Stack[-1] ne $whom ) {
1323	Carp::confess("Unmatched begin/end at chunk $Paragraph in pod $Podfile\n")
1324    }
1325    pop( @Begin_Stack );
1326}
1327
1328#
1329# process_pre - indented paragraph, made into <pre></pre>
1330#
1331sub process_pre {
1332    my( $text ) = @_;
1333    my( $rest );
1334    return if $Ignore;
1335
1336    $rest = $$text;
1337
1338    # insert spaces in place of tabs
1339    $rest =~ s#(.+)#
1340	    my $line = $1;
1341            1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1342	    $line;
1343	#eg;
1344
1345    # convert some special chars to HTML escapes
1346    $rest = html_escape($rest);
1347
1348    # try and create links for all occurrences of perl.* within
1349    # the preformatted text.
1350    $rest =~ s{
1351	         (\s*)(perl\w+)
1352	      }{
1353		 if ( defined $Pages{$2} ){	# is a link
1354		     qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1355		 } elsif (defined $Pages{dosify($2)}) {	# is a link
1356		     qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1357		 } else {
1358		     "$1$2";
1359		 }
1360	      }xeg;
1361     $rest =~ s{
1362		 (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1363               }{
1364                  my $url ;
1365                  if ( $Htmlfileurl ne '' ){
1366		     # Here, we take advantage of the knowledge
1367		     # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1368		     # Since $Htmlroot eq '', we need to prepend $Htmldir
1369		     # on the fron of the link to get the absolute path
1370		     # of the link's target. We check for a leading '/'
1371		     # to avoid corrupting links that are #, file:, etc.
1372		     my $old_url = $3 ;
1373		     $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1374 		     $url = relativize_url( "$old_url.html", $Htmlfileurl );
1375	          } else {
1376		     $url = "$3.html" ;
1377		  }
1378		  "$1$url" ;
1379	       }xeg;
1380
1381    # Look for embedded URLs and make them into links.  We don't
1382    # relativize them since they are best left as the author intended.
1383
1384    my $urls = '(' . join ('|', qw{
1385                http
1386                telnet
1387		mailto
1388		news
1389                gopher
1390                file
1391                wais
1392                ftp
1393            } )
1394        . ')';
1395
1396    my $ltrs = '\w';
1397    my $gunk = '/#~:.?+=&%@!\-';
1398    my $punc = '.:!?\-;';
1399    my $any  = "${ltrs}${gunk}${punc}";
1400
1401    $rest =~ s{
1402	\b			# start at word boundary
1403	(			# begin $1  {
1404	    $urls :		# need resource and a colon
1405	    (?!:)		# Ignore File::, among others.
1406	    [$any] +?		# followed by one or more of any valid
1407				#   character, but be conservative and
1408				#   take only what you need to....
1409	)			# end   $1  }
1410	(?=
1411	    &quot; &gt;		# maybe pre-quoted '<a href="...">'
1412	|			# or:
1413	    [$punc]*		# 0 or more punctuation
1414	    (?:			#   followed
1415		[^$any]		#   by a non-url char
1416	    |			#   or
1417		$		#   end of the string
1418	    )			#
1419	|			# or else
1420	    $			#   then end of the string
1421        )
1422      }{<a href="$1">$1</a>}igox;
1423
1424    # text should be as it is (verbatim)
1425    $$text = $rest;
1426}
1427
1428
1429#
1430# pure text processing
1431#
1432# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1433# we don't want this to happen within IS
1434#
1435sub pure_text($){
1436    my $text = shift();
1437    process_puretext( $text, 1 );
1438}
1439
1440sub inIS_text($){
1441    my $text = shift();
1442    process_puretext( $text, 0 );
1443}
1444
1445#
1446# process_puretext - process pure text (without pod-escapes) converting
1447#  double-quotes and handling implicit C<> links.
1448#
1449sub process_puretext {
1450    my($text, $notinIS) = @_;
1451
1452    ## Guessing at func() or [\$\@%&]*var references in plain text is destined
1453    ## to produce some strange looking ref's. uncomment to disable:
1454    ## $notinIS = 0;
1455
1456    my(@words, $lead, $trail);
1457
1458    # keep track of leading and trailing white-space
1459    $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1460    $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1461
1462    # split at space/non-space boundaries
1463    @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1464
1465    # process each word individually
1466    foreach my $word (@words) {
1467	# skip space runs
1468 	next if $word =~ /^\s*$/;
1469	# see if we can infer a link or a function call
1470	#
1471	# NOTE: This is a word based search, it won't automatically
1472	# mark "substr($var, 1, 2)" because the 1st word would be "substr($var"
1473	# User has to enclose those with proper C<>
1474
1475	if( $notinIS && $word =~
1476	    m/
1477		^([a-z_]{2,})                 # The function name
1478		\(
1479		    ([0-9][a-z]*              # Manual page(1) or page(1M)
1480		    |[^)]*[\$\@\%][^)]+       # ($foo), (1, @foo), (%hash)
1481		    |                         # ()
1482		    )
1483		\)
1484		([.,;]?)$                     # a possible punctuation follows
1485	    /xi
1486	) {
1487	    # has parenthesis so should have been a C<> ref
1488            ## try for a pagename (perlXXX(1))?
1489            my( $func, $args, $rest ) = ( $1, $2, $3 || '' );
1490            if( $args =~ /^\d+$/ ){
1491                my $url = page_sect( $word, '' );
1492                if( defined $url ){
1493                    $word = qq(<a href="$url" class="man">the $word manpage</a>$rest);
1494                    next;
1495                }
1496            }
1497            ## try function name for a link, append tt'ed argument list
1498            $word = emit_C( $func, '', "($args)") . $rest;
1499
1500#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1501##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1502##	    # perl variables, should be a C<> ref
1503##	    $word = emit_C( $word );
1504
1505	} elsif ($word =~ m,^\w+://\w,) {
1506	    # looks like a URL
1507            # Don't relativize it: leave it as the author intended
1508	    $word = qq(<a href="$word">$word</a>);
1509	} elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1510	    # looks like an e-mail address
1511	    my ($w1, $w2, $w3) = ("", $word, "");
1512	    ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1513	    ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1514	    $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1515	} else {
1516	    $word = html_escape($word) if $word =~ /["&<>]/;
1517	}
1518    }
1519
1520    # put everything back together
1521    return $lead . join( '', @words ) . $trail;
1522}
1523
1524
1525#
1526# process_text - handles plaintext that appears in the input pod file.
1527# there may be pod commands embedded within the text so those must be
1528# converted to html commands.
1529#
1530
1531sub process_text1($$;$$);
1532sub pattern ($) { $_[0] ? '\s+'.('>' x ($_[0] + 1)) : '>' }
1533sub closing ($) { local($_) = shift; (defined && s/\s+\z//) ? length : 0 }
1534
1535sub process_text {
1536    return if $Ignore;
1537    my( $tref ) = @_;
1538    my $res = process_text1( 0, $tref );
1539    $res =~ s/\s+$//s;
1540    $$tref = $res;
1541}
1542
1543sub process_text_rfc_links {
1544    my $text = shift;
1545
1546    # For every "RFCnnnn" or "RFC nnn", link it to the authoritative
1547    # ource. Do not use the /i modifier here. Require "RFC" to be written in
1548    #  in capital letters.
1549
1550    $text =~ s{
1551	(?<=[^<>[:alpha:]])           # Make sure this is not an URL already
1552	(RFC\s*([0-9]{1,5}))(?![0-9]) # max 5 digits
1553    }
1554    {<a href="http://www.ietf.org/rfc/rfc$2.txt" class="rfc">$1</a>}gx;
1555
1556    $text;
1557}
1558
1559sub process_text1($$;$$){
1560    my( $lev, $rstr, $func, $closing ) = @_;
1561    my $res = '';
1562
1563    unless (defined $func) {
1564	$func = '';
1565	$lev++;
1566    }
1567
1568    if( $func eq 'B' ){
1569	# B<text> - boldface
1570	$res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1571
1572    } elsif( $func eq 'C' ){
1573	# C<code> - can be a ref or <code></code>
1574	# need to extract text
1575	my $par = go_ahead( $rstr, 'C', $closing );
1576
1577	## clean-up of the link target
1578        my $text = depod( $par );
1579
1580	### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1581        ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1582
1583	$res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1584
1585    } elsif( $func eq 'E' ){
1586	# E<x> - convert to character
1587	$$rstr =~ s/^([^>]*)>//;
1588	my $escape = $1;
1589	$escape =~ s/^0?x([\dA-F]+)$/#x$1/i
1590	or $escape =~ s/^0([0-7]+)$/'#'.oct($1)/ei
1591	or $escape =~ s/^(\d+)$/#$1/;
1592	$res = "&$escape;";
1593
1594    } elsif( $func eq 'F' ){
1595	# F<filename> - italicize
1596	$res = '<em class="file">' . process_text1( $lev, $rstr ) . '</em>';
1597
1598    } elsif( $func eq 'I' ){
1599	# I<text> - italicize
1600	$res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1601
1602    } elsif( $func eq 'L' ){
1603	# L<link> - link
1604	## L<text|cross-ref> => produce text, use cross-ref for linking
1605	## L<cross-ref> => make text from cross-ref
1606	## need to extract text
1607	my $par = go_ahead( $rstr, 'L', $closing );
1608
1609        # some L<>'s that shouldn't be:
1610	# a) full-blown URL's are emitted as-is
1611        if( $par =~ m{^\w+://}s ){
1612	    return make_URL_href( $par );
1613	}
1614        # b) C<...> is stripped and treated as C<>
1615        if( $par =~ /^C<(.*)>$/ ){
1616	    my $text = depod( $1 );
1617 	    return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1618	}
1619
1620	# analyze the contents
1621	$par =~ s/\n/ /g;   # undo word-wrapped tags
1622        my $opar = $par;
1623	my $linktext;
1624	if( $par =~ s{^([^|]+)\|}{} ){
1625	    $linktext = $1;
1626	}
1627
1628	# make sure sections start with a /
1629	$par =~ s{^"}{/"};
1630
1631	my( $page, $section, $ident );
1632
1633	# check for link patterns
1634	if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1635            # we've got a name/ident (no quotes)
1636            if (length $2) {
1637                ( $page, $ident ) = ( $1, $2 );
1638            } else {
1639                ( $page, $section ) = ( $1, $2 );
1640            }
1641            ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1642
1643	} elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1644            # even though this should be a "section", we go for ident first
1645	    ( $page, $ident ) = ( $1, $2 );
1646            ### print STDERR "--> L<$par> to page $page, section $section\n";
1647
1648	} elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1649	    ( $page, $section ) = ( '', $par );
1650            ### print STDERR "--> L<$par> to void page, section $section\n";
1651
1652        } else {
1653	    ( $page, $section ) = ( $par, '' );
1654            ### print STDERR "--> L<$par> to page $par, void section\n";
1655	}
1656
1657        # now, either $section or $ident is defined. the convoluted logic
1658        # below tries to resolve L<> according to what the user specified.
1659        # failing this, we try to find the next best thing...
1660        my( $url, $ltext, $fid );
1661
1662        RESOLVE: {
1663            if( defined $ident ){
1664                ## try to resolve $ident as an item
1665	        ( $url, $fid ) = coderef( $page, $ident );
1666                if( $url ){
1667                    if( ! defined( $linktext ) ){
1668                        $linktext = $ident;
1669                        $linktext .= " in " if $ident && $page;
1670                        $linktext .= "the $page manpage" if $page;
1671                    }
1672                    ###  print STDERR "got coderef url=$url\n";
1673                    last RESOLVE;
1674                }
1675                ## no luck: go for a section (auto-quoting!)
1676                $section = $ident;
1677            }
1678            ## now go for a section
1679            my $htmlsection = htmlify( $section );
1680 	    $url = page_sect( $page, $htmlsection );
1681            if( $url ){
1682                if( ! defined( $linktext ) ){
1683                    $linktext = $section;
1684                    $linktext .= " in " if $section && $page;
1685                    $linktext .= "the $page manpage" if $page;
1686                }
1687                ### print STDERR "got page/section url=$url\n";
1688                last RESOLVE;
1689            }
1690            ## no luck: go for an ident
1691            if( $section ){
1692                $ident = $section;
1693            } else {
1694                $ident = $page;
1695                $page  = undef();
1696            }
1697            ( $url, $fid ) = coderef( $page, $ident );
1698            if( $url ){
1699                if( ! defined( $linktext ) ){
1700                    $linktext = $ident;
1701                    $linktext .= " in " if $ident && $page;
1702                    $linktext .= "the $page manpage" if $page;
1703                }
1704                ### print STDERR "got section=>coderef url=$url\n";
1705                last RESOLVE;
1706            }
1707
1708            # warning; show some text.
1709            $linktext = $opar unless defined $linktext;
1710            warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
1711        }
1712
1713        # now we have a URL or just plain code
1714        $$rstr = $linktext . '>' . $$rstr;
1715        if( defined( $url ) ){
1716            $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1717        } else {
1718	    $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1719        }
1720
1721    } elsif( $func eq 'S' ){
1722	# S<text> - non-breaking spaces
1723	$res = process_text1( $lev, $rstr );
1724	$res =~ s/ /&nbsp;/g;
1725
1726    } elsif( $func eq 'X' ){
1727	# X<> - ignore
1728	warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1729	    unless $$rstr =~ s/^[^>]*>// or $Quiet;
1730    } elsif( $func eq 'Z' ){
1731	# Z<> - empty
1732	warn "$0: $Podfile: invalid Z<> in paragraph $Paragraph.\n"
1733	    unless $$rstr =~ s/^>// or $Quiet;
1734
1735    } else {
1736        my $term = pattern $closing;
1737	while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1738	    # all others: either recurse into new function or
1739	    # terminate at closing angle bracket(s)
1740	    my $pt = $1;
1741            $pt .= $2 if !$3 &&  $lev == 1;
1742	    $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1743	    return $res if !$3 && $lev > 1;
1744            if( $3 ){
1745		$res .= process_text1( $lev, $rstr, $3, closing $4 );
1746 	    }
1747	}
1748	if( $lev == 1 ){
1749	    $res .= pure_text( $$rstr );
1750	} elsif( ! $Quiet ) {
1751            my $snippet = substr($$rstr,0,60);
1752            warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph: '$snippet'.\n"
1753
1754	}
1755	$res = process_text_rfc_links($res);
1756    }
1757    return $res;
1758}
1759
1760#
1761# go_ahead: extract text of an IS (can be nested)
1762#
1763sub go_ahead($$$){
1764    my( $rstr, $func, $closing ) = @_;
1765    my $res = '';
1766    my @closing = ($closing);
1767    while( $$rstr =~
1768      s/\A(.*?)(([BCEFILSXZ])<(<+\s+)?|@{[pattern $closing[0]]})//s ){
1769	$res .= $1;
1770	unless( $3 ){
1771	    shift @closing;
1772	    return $res unless @closing;
1773	} else {
1774	    unshift @closing, closing $4;
1775	}
1776	$res .= $2;
1777    }
1778    unless ($Quiet) {
1779        my $snippet = substr($$rstr,0,60);
1780        warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph (go_ahead): '$snippet'.\n"
1781    }
1782    return $res;
1783}
1784
1785#
1786# emit_C - output result of C<text>
1787#    $text is the depod-ed text
1788#
1789sub emit_C($;$$){
1790    my( $text, $nocode, $args ) = @_;
1791    $args = '' unless defined $args;
1792    my $res;
1793    my( $url, $fid ) = coderef( undef(), $text );
1794
1795    # need HTML-safe text
1796    my $linktext = html_escape( "$text$args" );
1797
1798    if( defined( $url ) &&
1799        (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1800	$res = "<a href=\"$url\"><code>$linktext</code></a>";
1801    } elsif( 0 && $nocode ){
1802	$res = $linktext;
1803    } else {
1804	$res = "<code>$linktext</code>";
1805    }
1806    return $res;
1807}
1808
1809#
1810# html_escape: make text safe for HTML
1811#
1812sub html_escape {
1813    my $rest = $_[0];
1814    $rest   =~ s/&/&amp;/g;
1815    $rest   =~ s/</&lt;/g;
1816    $rest   =~ s/>/&gt;/g;
1817    $rest   =~ s/"/&quot;/g;
1818    # &apos; is only in XHTML, not HTML4.  Be conservative
1819    #$rest   =~ s/'/&apos;/g;
1820    return $rest;
1821}
1822
1823
1824#
1825# dosify - convert filenames to 8.3
1826#
1827sub dosify {
1828    my($str) = @_;
1829    return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1830    if ($Is83) {
1831        $str = lc $str;
1832        $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1833        $str =~ s/(\w+)/substr ($1,0,8)/ge;
1834    }
1835    return $str;
1836}
1837
1838#
1839# page_sect - make a URL from the text of a L<>
1840#
1841sub page_sect($$) {
1842    my( $page, $section ) = @_;
1843    my( $linktext, $page83, $link);	# work strings
1844
1845    # check if we know that this is a section in this page
1846    if (!defined $Pages{$page} && defined $Sections{$page}) {
1847	$section = $page;
1848	$page = "";
1849        ### print STDERR "reset page='', section=$section\n";
1850    }
1851
1852    $page83=dosify($page);
1853    $page=$page83 if (defined $Pages{$page83});
1854    if ($page eq "") {
1855        $link = "#" . anchorify( $section );
1856    } elsif ( $page =~ /::/ ) {
1857	$page =~ s,::,/,g;
1858	# Search page cache for an entry keyed under the html page name,
1859	# then look to see what directory that page might be in.  NOTE:
1860	# this will only find one page. A better solution might be to produce
1861	# an intermediate page that is an index to all such pages.
1862	my $page_name = $page ;
1863	$page_name =~ s,^.*/,,s ;
1864	if ( defined( $Pages{ $page_name } ) &&
1865	     $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1866	   ) {
1867	    $page = $1 ;
1868	}
1869	else {
1870	    # NOTE: This branch assumes that all A::B pages are located in
1871	    # $Htmlroot/A/B.html . This is often incorrect, since they are
1872	    # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
1873	    # analyze the contents of %Pages and figure out where any
1874	    # cousins of A::B are, then assume that.  So, if A::B isn't found,
1875	    # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1876	    # lib/A/B.pm. This is also limited, but it's an improvement.
1877	    # Maybe a hints file so that the links point to the correct places
1878	    # nonetheless?
1879
1880	}
1881	$link = "$Htmlroot/$page.html";
1882	$link .= "#" . anchorify( $section ) if ($section);
1883    } elsif (!defined $Pages{$page}) {
1884	$link = "";
1885    } else {
1886	$section = anchorify( $section ) if $section ne "";
1887        ### print STDERR "...section=$section\n";
1888
1889	# if there is a directory by the name of the page, then assume that an
1890	# appropriate section will exist in the subdirectory
1891#	if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1892	if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1893	    $link = "$Htmlroot/$1/$section.html";
1894            ### print STDERR "...link=$link\n";
1895
1896	# since there is no directory by the name of the page, the section will
1897	# have to exist within a .html of the same name.  thus, make sure there
1898	# is a .pod or .pm that might become that .html
1899	} else {
1900	    $section = "#$section" if $section;
1901            ### print STDERR "...section=$section\n";
1902
1903	    # check if there is a .pod with the page name.
1904	    # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
1905	    if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
1906		$link = "$Htmlroot/$1.html$section";
1907	    } else {
1908		$link = "";
1909	    }
1910	}
1911    }
1912
1913    if ($link) {
1914	# Here, we take advantage of the knowledge that $Htmlfileurl ne ''
1915	# implies $Htmlroot eq ''. This means that the link in question
1916	# needs a prefix of $Htmldir if it begins with '/'. The test for
1917	# the initial '/' is done to avoid '#'-only links, and to allow
1918	# for other kinds of links, like file:, ftp:, etc.
1919        my $url ;
1920        if (  $Htmlfileurl ne '' ) {
1921            $link = "$Htmldir$link" if $link =~ m{^/}s;
1922            $url = relativize_url( $link, $Htmlfileurl );
1923# print( "  b: [$link,$Htmlfileurl,$url]\n" );
1924	}
1925	else {
1926            $url = $link ;
1927	}
1928	return $url;
1929
1930    } else {
1931	return undef();
1932    }
1933}
1934
1935#
1936# relativize_url - convert an absolute URL to one relative to a base URL.
1937# Assumes both end in a filename.
1938#
1939sub relativize_url {
1940    my ($dest,$source) = @_ ;
1941
1942    my ($dest_volume,$dest_directory,$dest_file) =
1943        File::Spec::Unix->splitpath( $dest ) ;
1944    $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1945
1946    my ($source_volume,$source_directory,$source_file) =
1947        File::Spec::Unix->splitpath( $source ) ;
1948    $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1949
1950    my $rel_path = '' ;
1951    if ( $dest ne '' ) {
1952       $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1953    }
1954
1955    if ( $rel_path ne ''                &&
1956         substr( $rel_path, -1 ) ne '/' &&
1957         substr( $dest_file, 0, 1 ) ne '#'
1958        ) {
1959        $rel_path .= "/$dest_file" ;
1960    }
1961    else {
1962        $rel_path .= "$dest_file" ;
1963    }
1964
1965    return $rel_path ;
1966}
1967
1968
1969#
1970# coderef - make URL from the text of a C<>
1971#
1972sub coderef($$){
1973    my( $page, $item ) = @_;
1974    my( $url );
1975
1976    my $fid = fragment_id( $item );
1977
1978    if( defined( $page ) && $page ne "" ){
1979	# we have been given a $page...
1980	$page =~ s{::}{/}g;
1981
1982        Carp::confess("Undefined fragment '$item' from fragment_id() in coderef() in $Podfile")
1983            if !defined $fid;
1984	# Do we take it? Item could be a section!
1985	my $base = $Items{$fid} || "";
1986	$base =~ s{[^/]*/}{};
1987	if( $base ne "$page.html" ){
1988            ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
1989	    $page = undef();
1990	}
1991
1992    } else {
1993        # no page - local items precede cached items
1994	if( defined( $fid ) ){
1995	    if(  exists $Local_Items{$fid} ){
1996		$page = $Local_Items{$fid};
1997	    } else {
1998		$page = $Items{$fid};
1999	    }
2000	}
2001    }
2002
2003    # if there was a pod file that we found earlier with an appropriate
2004    # =item directive, then create a link to that page.
2005    if( defined $page ){
2006	if( $page ){
2007            if( exists $Pages{$page} and $Pages{$page} =~ /([^:]*)\.[^:.]*:/){
2008		$page = $1 . '.html';
2009	    }
2010	    my $link = "$Htmlroot/$page#" . anchorify($fid);
2011
2012	    # Here, we take advantage of the knowledge that $Htmlfileurl
2013	    # ne '' implies $Htmlroot eq ''.
2014	    if (  $Htmlfileurl ne '' ) {
2015		$link = "$Htmldir$link" ;
2016		$url = relativize_url( $link, $Htmlfileurl ) ;
2017	    } else {
2018		$url = $link ;
2019	    }
2020	} else {
2021	    $url = "#" . anchorify($fid);
2022	}
2023
2024	confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
2025    }
2026    return( $url, $fid );
2027}
2028
2029
2030
2031#
2032# Adapted from Nick Ing-Simmons' PodToHtml package.
2033sub relative_url {
2034    my $source_file = shift ;
2035    my $destination_file = shift;
2036
2037    my $source = URI::file->new_abs($source_file);
2038    my $uo = URI::file->new($destination_file,$source)->abs;
2039    return $uo->rel->as_string;
2040}
2041
2042
2043#
2044# finish_list - finish off any pending HTML lists.  this should be called
2045# after the entire pod file has been read and converted.
2046#
2047sub finish_list {
2048    my $fh = shift;
2049    if( $Listlevel ){
2050	warn "$0: $Podfile: unterminated list(s) at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
2051	while( $Listlevel ){
2052            process_back( $fh );
2053        }
2054    }
2055}
2056
2057#
2058# htmlify - converts a pod section specification to a suitable section
2059# specification for HTML. Note that we keep spaces and special characters
2060# except ", ? (Netscape problem) and the hyphen (writer's problem...).
2061#
2062sub htmlify {
2063    my( $heading) = @_;
2064    $heading =~ s/(\s+)/ /g;
2065    $heading =~ s/\s+\Z//;
2066    $heading =~ s/\A\s+//;
2067    # The hyphen is a disgrace to the English language.
2068    # $heading =~ s/[-"?]//g;
2069    $heading =~ s/["?]//g;
2070    $heading = lc( $heading );
2071    return $heading;
2072}
2073
2074#
2075# similar to htmlify, but turns non-alphanumerics into underscores
2076#
2077sub anchorify {
2078    my ($anchor) = @_;
2079    $anchor = htmlify($anchor);
2080    $anchor =~ s/\W/_/g;
2081    return $anchor;
2082}
2083
2084#
2085# depod - convert text by eliminating all interior sequences
2086# Note: can be called with copy or modify semantics
2087#
2088my %E2c;
2089$E2c{lt}     = '<';
2090$E2c{gt}     = '>';
2091$E2c{sol}    = '/';
2092$E2c{verbar} = '|';
2093$E2c{amp}    = '&'; # in Tk's pods
2094
2095sub depod1($;$$);
2096
2097sub depod($){
2098    my $string;
2099    if( ref( $_[0] ) ){
2100	$string =  ${$_[0]};
2101        ${$_[0]} = depod1( \$string );
2102    } else {
2103	$string =  $_[0];
2104        depod1( \$string );
2105    }
2106}
2107
2108sub depod1($;$$){
2109  my( $rstr, $func, $closing ) = @_;
2110  my $res = '';
2111  return $res unless defined $$rstr;
2112  if( ! defined( $func ) ){
2113      # skip to next begin of an interior sequence
2114      while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2115         # recurse into its text
2116	  $res .= $1 . depod1( $rstr, $2, closing $3);
2117      }
2118      $res .= $$rstr;
2119  } elsif( $func eq 'E' ){
2120      # E<x> - convert to character
2121      $$rstr =~ s/^([^>]*)>//;
2122      $res .= $E2c{$1} || "";
2123  } elsif( $func eq 'X' ){
2124      # X<> - ignore
2125      $$rstr =~ s/^[^>]*>//;
2126  } elsif( $func eq 'Z' ){
2127      # Z<> - empty
2128      $$rstr =~ s/^>//;
2129  } else {
2130      # all others: either recurse into new function or
2131      # terminate at closing angle bracket
2132      my $term = pattern $closing;
2133      while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2134	  $res .= $1;
2135	  last unless $3;
2136          $res .= depod1( $rstr, $3, closing $4 );
2137      }
2138      ## If we're here and $2 ne '>': undelimited interior sequence.
2139      ## Ignored, as this is called without proper indication of where we are.
2140      ## Rely on process_text to produce diagnostics.
2141  }
2142  return $res;
2143}
2144
2145{
2146    my %seen;   # static fragment record hash
2147
2148sub fragment_id_readable {
2149    my $text     = shift;
2150    my $generate = shift;   # optional flag
2151
2152    my $orig = $text;
2153
2154    # leave the words for the fragment identifier,
2155    # change everything else to underbars.
2156    $text =~ s/[^A-Za-z0-9_]+/_/g; # do not use \W to avoid locale dependency.
2157    $text =~ s/_{2,}/_/g;
2158    $text =~ s/\A_//;
2159    $text =~ s/_\Z//;
2160
2161    unless ($text)
2162    {
2163        # Nothing left after removing punctuation, so leave it as is
2164        # E.g. if option is named: "=item -#"
2165
2166        $text = $orig;
2167    }
2168
2169    if ($generate) {
2170        if ( exists $seen{$text} ) {
2171            # This already exists, make it unique
2172            $seen{$text}++;
2173            $text = $text . $seen{$text};
2174        } else {
2175            $seen{$text} = 1;  # first time seen this fragment
2176        }
2177    }
2178
2179    $text;
2180}}
2181
2182my @HC;
2183sub fragment_id_obfuscated {  # This was the old "_2d_2d__"
2184    my $text     = shift;
2185    my $generate = shift;   # optional flag
2186
2187    # text? Normalize by obfuscating the fragment id to make it unique
2188    $text =~ s/\s+/_/sg;
2189
2190    $text =~ s{(\W)}{
2191        defined( $HC[ord($1)] ) ? $HC[ord($1)]
2192        : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2193    $text = substr( $text, 0, 50 );
2194
2195    $text;
2196}
2197
2198#
2199# fragment_id - construct a fragment identifier from:
2200#   a) =item text
2201#   b) contents of C<...>
2202#
2203
2204sub fragment_id {
2205    my $text     = shift;
2206    my $generate = shift;   # optional flag
2207
2208    $text =~ s/\s+\Z//s;
2209    if( $text ){
2210	# a method or function?
2211	return $1 if $text =~ /(\w+)\s*\(/;
2212	return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2213
2214	# a variable name?
2215	return $1 if $text =~ /^([\$\@%*]\S+)/;
2216
2217	# some pattern matching operator?
2218	return $1 if $text =~ m|^(\w+/).*/\w*$|;
2219
2220	# fancy stuff... like "do { }"
2221	return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2222
2223	# honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2224	# and some funnies with ... Module ...
2225	return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2226	return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2227
2228	return fragment_id_readable($text, $generate);
2229    } else {
2230	return;
2231    }
2232}
2233
2234#
2235# make_URL_href - generate HTML href from URL
2236# Special treatment for CGI queries.
2237#
2238sub make_URL_href($){
2239    my( $url ) = @_;
2240    if( $url !~
2241        s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2242        $url = "<a href=\"$url\">$url</a>";
2243    }
2244    return $url;
2245}
2246
22471;
2248