1package Pod::Simple::HTML;
2use strict;
3use warnings;
4use Pod::Simple::PullParser ();
5our @ISA = ('Pod::Simple::PullParser');
6our $VERSION = '3.45';
7BEGIN {
8  if(defined &DEBUG) { } # no-op
9  elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10  else { *DEBUG = sub () {0}; }
11}
12
13our $Doctype_decl ||= '';  # No.  Just No.  Don't even ask me for it.
14 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
15 #    "http://www.w3.org/TR/html4/loose.dtd">\n};
16
17our $Content_decl ||=
18 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
19
20our $HTML_EXTENSION;
21$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22our $Computerese;
23$Computerese =  "" unless defined $Computerese;
24our $LamePad;
25$LamePad = '' unless defined $LamePad;
26
27our $Linearization_Limit;
28$Linearization_Limit = 120 unless defined $Linearization_Limit;
29 # headings/items longer than that won't get an <a name="...">
30our $Perldoc_URL_Prefix;
31$Perldoc_URL_Prefix  = 'https://metacpan.org/pod/'
32 unless defined $Perldoc_URL_Prefix;
33our $Perldoc_URL_Postfix;
34$Perldoc_URL_Postfix = ''
35 unless defined $Perldoc_URL_Postfix;
36
37
38our $Man_URL_Prefix  = 'http://man.he.net/man';
39our $Man_URL_Postfix = '';
40
41our $Title_Prefix;
42$Title_Prefix  = '' unless defined $Title_Prefix;
43our $Title_Postfix;
44$Title_Postfix = '' unless defined $Title_Postfix;
45our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46  # 'item-text' stuff in the index doesn't quite work, and may
47  # not be a good idea anyhow.
48
49
50__PACKAGE__->_accessorize(
51 'perldoc_url_prefix',
52   # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
53   #  to put before the "Foo%3a%3aBar".
54   # (for singleton mode only?)
55 'perldoc_url_postfix',
56   # what to put after "Foo%3a%3aBar" in the URL.  Normally "".
57
58 'man_url_prefix',
59   # In turning L<crontab(5)> into http://whatever/man/1/crontab, what
60   #  to put before the "1/crontab".
61 'man_url_postfix',
62   #  what to put after the "1/crontab" in the URL. Normally "".
63
64 'batch_mode', # whether we're in batch mode
65 'batch_mode_current_level',
66    # When in batch mode, how deep the current module is: 1 for "LWP",
67    #  2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68
69 'title_prefix',  'title_postfix',
70  # What to put before and after the title in the head.
71  # Should already be &-escaped
72
73 'html_h_level',
74
75 'html_header_before_title',
76 'html_header_after_title',
77 'html_footer',
78 'top_anchor',
79
80 'index', # whether to add an index at the top of each page
81    # (actually it's a table-of-contents, but we'll call it an index,
82    #  out of apparently longstanding habit)
83
84 'html_css', # URL of CSS file to point to
85 'html_javascript', # URL of Javascript file to point to
86
87 'force_title',   # should already be &-escaped
88 'default_title', # should already be &-escaped
89);
90
91#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92my @_to_accept;
93
94our %Tagmap = (
95  'Verbatim'  => "\n<pre$Computerese>",
96  '/Verbatim' => "</pre>\n",
97  'VerbatimFormatted'  => "\n<pre$Computerese>",
98  '/VerbatimFormatted' => "</pre>\n",
99  'VerbatimB'  => "<b>",
100  '/VerbatimB' => "</b>",
101  'VerbatimI'  => "<i>",
102  '/VerbatimI' => "</i>",
103  'VerbatimBI'  => "<b><i>",
104  '/VerbatimBI' => "</i></b>",
105
106
107  'Data'  => "\n",
108  '/Data' => "\n",
109
110  'head1' => "\n<h1>",  # And also stick in an <a name="...">
111  'head2' => "\n<h2>",  #  ''
112  'head3' => "\n<h3>",  #  ''
113  'head4' => "\n<h4>",  #  ''
114  'head5' => "\n<h5>",  #  ''
115  'head6' => "\n<h6>",  #  ''
116  '/head1' => "</a></h1>\n",
117  '/head2' => "</a></h2>\n",
118  '/head3' => "</a></h3>\n",
119  '/head4' => "</a></h4>\n",
120  '/head5' => "</a></h5>\n",
121  '/head6' => "</a></h6>\n",
122
123  'X'  => "<!--\n\tINDEX: ",
124  '/X' => "\n-->",
125
126  changes(qw(
127    Para=p
128    B=b I=i
129    over-bullet=ul
130    over-number=ol
131    over-text=dl
132    over-block=blockquote
133    item-bullet=li
134    item-number=li
135    item-text=dt
136  )),
137  changes2(
138    map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139    qw[
140      sample=samp
141      definition=dfn
142      keyboard=kbd
143      variable=var
144      citation=cite
145      abbreviation=abbr
146      acronym=acronym
147      subscript=sub
148      superscript=sup
149      big=big
150      small=small
151      underline=u
152      strikethrough=s
153      preformat=pre
154      teletype=tt
155    ]  # no point in providing a way to get <q>...</q>, I think
156  ),
157
158  '/item-bullet' => "</li>$LamePad\n",
159  '/item-number' => "</li>$LamePad\n",
160  '/item-text'   => "</a></dt>$LamePad\n",
161  'item-body'    => "\n<dd>",
162  '/item-body'   => "</dd>\n",
163
164
165  'B'      =>  "<b>",                  '/B'     =>  "</b>",
166  'I'      =>  "<i>",                  '/I'     =>  "</i>",
167  'F'      =>  "<em$Computerese>",     '/F'     =>  "</em>",
168  'C'      =>  "<code$Computerese>",   '/C'     =>  "</code>",
169  'L'  =>  "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
170  '/L' =>  "</a>",
171);
172
173sub changes {
174  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
175     ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
176  } @_;
177}
178sub changes2 {
179  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
180     ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
181  } @_;
182}
183
184#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
185sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
186 # Just so we can run from the command line.  No options.
187 #  For that, use perldoc!
188#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189
190sub new {
191  my $new = shift->SUPER::new(@_);
192  #$new->nix_X_codes(1);
193  $new->nbsp_for_S(1);
194  $new->accept_targets( 'html', 'HTML' );
195  $new->accept_codes('VerbatimFormatted');
196  $new->accept_codes(@_to_accept);
197  DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
198
199  $new->perldoc_url_prefix(  $Perldoc_URL_Prefix  );
200  $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
201  $new->man_url_prefix(  $Man_URL_Prefix  );
202  $new->man_url_postfix( $Man_URL_Postfix );
203  $new->title_prefix(  $Title_Prefix  );
204  $new->title_postfix( $Title_Postfix );
205
206  $new->html_header_before_title(
207   qq[$Doctype_decl<html><head><title>]
208  );
209  $new->html_header_after_title( join "\n" =>
210    "</title>",
211    $Content_decl,
212    "</head>\n<body class='pod'>",
213    $new->version_tag_comment,
214    "<!-- start doc -->\n",
215  );
216  $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
217  $new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" );
218
219  $new->{'Tagmap'} = {%Tagmap};
220
221  return $new;
222}
223
224sub __adjust_html_h_levels {
225  my ($self) = @_;
226  my $Tagmap = $self->{'Tagmap'};
227
228  my $add = $self->html_h_level;
229  return unless defined $add;
230  return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
231
232  $add -= 1;
233  for (1 .. 6) {
234    $Tagmap->{"head$_"}  =~ s/$_/$_ + $add/e;
235    $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
236  }
237}
238
239sub batch_mode_page_object_init {
240  my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
241  DEBUG and print STDERR "Initting $self\n  for $module\n",
242    "  in $infile\n  out $outfile\n  depth $depth\n";
243  $self->batch_mode(1);
244  $self->batch_mode_current_level($depth);
245  return $self;
246}
247
248sub run {
249  my $self = $_[0];
250  return $self->do_middle if $self->bare_output;
251  return
252   $self->do_beginning && $self->do_middle && $self->do_end;
253}
254
255#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256
257sub do_beginning {
258  my $self = $_[0];
259
260  my $title;
261
262  if(defined $self->force_title) {
263    $title = $self->force_title;
264    DEBUG and print STDERR "Forcing title to be $title\n";
265  } else {
266    # Actually try looking for the title in the document:
267    $title = $self->get_short_title();
268    unless($self->content_seen) {
269      DEBUG and print STDERR "No content seen in search for title.\n";
270      return;
271    }
272    $self->{'Title'} = $title;
273
274    if(defined $title and $title =~ m/\S/) {
275      $title = $self->title_prefix . esc($title) . $self->title_postfix;
276    } else {
277      $title = $self->default_title;
278      $title = '' unless defined $title;
279      DEBUG and print STDERR "Title defaults to $title\n";
280    }
281  }
282
283
284  my $after = $self->html_header_after_title  || '';
285  if($self->html_css) {
286    my $link =
287    $self->html_css =~ m/</
288     ? $self->html_css # It's a big blob of markup, let's drop it in
289     : sprintf(        # It's just a URL, so let's wrap it up
290      qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
291      $self->html_css,
292    );
293    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
294  }
295  $self->_add_top_anchor(\$after);
296
297  if($self->html_javascript) {
298    my $link =
299    $self->html_javascript =~ m/</
300     ? $self->html_javascript # It's a big blob of markup, let's drop it in
301     : sprintf(        # It's just a URL, so let's wrap it up
302      qq[<script type="text/javascript" src="%s"></script>\n],
303      $self->html_javascript,
304    );
305    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
306  }
307
308  print {$self->{'output_fh'}}
309    $self->html_header_before_title || '',
310    $title, # already escaped
311    $after,
312  ;
313
314  DEBUG and print STDERR "Returning from do_beginning...\n";
315  return 1;
316}
317
318sub _add_top_anchor {
319  my($self, $text_r) = @_;
320  unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
321    $$text_r .= $self->top_anchor || '';
322  }
323  return;
324}
325
326sub version_tag_comment {
327  my $self = shift;
328  return sprintf
329   "<!--\n  generated by %s v%s,\n  using %s v%s,\n  under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
330   esc(
331    ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
332    $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)),
333   ), $self->_modnote(),
334  ;
335}
336
337sub _modnote {
338  my $class = ref($_[0]) || $_[0];
339  return join "\n   " => grep m/\S/, split "\n",
340
341qq{
342If you want to change this HTML document, you probably shouldn't do that
343by changing it directly.  Instead, see about changing the calling options
344to $class, and/or subclassing $class,
345then reconverting this document from the Pod source.
346When in doubt, email the author of $class for advice.
347See 'perldoc $class' for more info.
348};
349
350}
351
352sub do_end {
353  my $self = $_[0];
354  print {$self->{'output_fh'}}  $self->html_footer || '';
355  return 1;
356}
357
358# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
359# Normally this would just be a call to _do_middle_main_loop -- but we
360#  have to do some elaborate things to emit all the content and then
361#  summarize it and output it /before/ the content that it's a summary of.
362
363sub do_middle {
364  my $self = $_[0];
365  return $self->_do_middle_main_loop unless $self->index;
366
367  if( $self->output_string ) {
368    # An efficiency hack
369    my $out = $self->output_string; #it's a reference to it
370    my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
371    $$out .= $sneakytag;
372    $self->_do_middle_main_loop;
373    $sneakytag = quotemeta($sneakytag);
374    my $index = $self->index_as_html();
375    if( $$out =~ s/$sneakytag/$index/s ) {
376      # Expected case
377      DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
378    } else {
379      DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
380      # I don't think this should ever happen.
381    }
382    return 1;
383  }
384
385  unless( $self->output_fh ) {
386    require Carp;
387    Carp::confess("Parser object \$p doesn't seem to have any output object!  I don't know how to deal with that.");
388  }
389
390  # If we get here, we're outputting to a FH.  So we need to do some magic.
391  # Namely, divert all content to a string, which we output after the index.
392  my $fh = $self->output_fh;
393  my $content = '';
394  {
395    # Our horrible bait and switch:
396    $self->output_string( \$content );
397    $self->_do_middle_main_loop;
398    $self->abandon_output_string();
399    $self->output_fh($fh);
400  }
401  print $fh $self->index_as_html();
402  print $fh $content;
403
404  return 1;
405}
406
407###########################################################################
408
409sub index_as_html {
410  my $self = $_[0];
411  # This is meant to be called AFTER the input document has been parsed!
412
413  my $points = $self->{'PSHTML_index_points'} || [];
414
415  @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
416   # There's no point in having a 0-item or 1-item index, I dare say.
417
418  my(@out) = qq{\n<div class='indexgroup'>};
419  my $level = 0;
420
421  my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
422  foreach my $p (@$points, ['head0', '(end)']) {
423    ($tagname, $text) = @$p;
424    $anchorname = $self->section_escape($text);
425    if( $tagname =~ m{^head(\d+)$} ) {
426      $target_level = 0 + $1;
427    } else {  # must be some kinda list item
428      if($previous_tagname =~ m{^head\d+$} ) {
429        $target_level = $level + 1;
430      } else {
431        $target_level = $level;  # no change needed
432      }
433    }
434
435    # Get to target_level by opening or closing ULs
436    while($level > $target_level)
437     { --$level; push @out, ("  " x $level) . "</ul>"; }
438    while($level < $target_level)
439     { ++$level; push @out, ("  " x ($level-1))
440       . "<ul   class='indexList indexList$level'>"; }
441
442    $previous_tagname = $tagname;
443    next unless $level;
444
445    $indent = '  '  x $level;
446    push @out, sprintf
447      "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
448      $indent, $level, esc($anchorname), esc($text)
449    ;
450  }
451  push @out, "</div>\n";
452  return join "\n", @out;
453}
454
455###########################################################################
456
457sub _do_middle_main_loop {
458  my $self = $_[0];
459  my $fh = $self->{'output_fh'};
460  my $tagmap = $self->{'Tagmap'};
461
462  $self->__adjust_html_h_levels;
463
464  my($token, $type, $tagname, $linkto, $linktype);
465  my @stack;
466  my $dont_wrap = 0;
467
468  while($token = $self->get_token) {
469
470    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
471    if( ($type = $token->type) eq 'start' ) {
472      if(($tagname = $token->tagname) eq 'L') {
473        $linktype = $token->attr('type') || 'insane';
474
475        $linkto = $self->do_link($token);
476
477        if(defined $linkto and length $linkto) {
478          esc($linkto);
479            #   (Yes, SGML-escaping applies on top of %-escaping!
480            #   But it's rarely noticeable in practice.)
481          print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
482        } else {
483          print $fh "<a>"; # Yes, an 'a' element with no attributes!
484        }
485
486      } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
487        print $fh $tagmap->{$tagname} || next;
488
489        my @to_unget;
490        while(1) {
491          push @to_unget, $self->get_token;
492          last if $to_unget[-1]->is_end
493              and $to_unget[-1]->tagname eq $tagname;
494
495          # TODO: support for X<...>'s found in here?  (maybe hack into linearize_tokens)
496        }
497
498        my $name = $self->linearize_tokens(@to_unget);
499        $name = $self->do_section($name, $token) if defined $name;
500
501        print $fh "<a ";
502        if ($tagname =~ m/^head\d$/s) {
503            print $fh "class='u'", $self->index
504                ? " href='#___top' title='click to go to top of document'\n"
505                : "\n";
506        }
507
508        if(defined $name) {
509          my $esc = esc(  $self->section_name_tidy( $name ) );
510          print $fh qq[name="$esc"];
511          DEBUG and print STDERR "Linearized ", scalar(@to_unget),
512           " tokens as \"$name\".\n";
513          push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
514           if $ToIndex{ $tagname };
515            # Obviously, this discards all formatting codes (saving
516            #  just their content), but ahwell.
517
518        } else {  # ludicrously long, so nevermind
519          DEBUG and print STDERR "Linearized ", scalar(@to_unget),
520           " tokens, but it was too long, so nevermind.\n";
521        }
522        print $fh "\n>";
523        $self->unget_token(@to_unget);
524
525      } elsif ($tagname eq 'Data') {
526        my $next = $self->get_token;
527        next unless defined $next;
528        unless( $next->type eq 'text' ) {
529          $self->unget_token($next);
530          next;
531        }
532        DEBUG and print STDERR "    raw text ", $next->text, "\n";
533        # The parser sometimes preserves newlines and sometimes doesn't!
534        (my $text = $next->text) =~ s/\n\z//;
535        print $fh $text, "\n";
536        next;
537
538      } else {
539        if( $tagname =~ m/^over-/s ) {
540          push @stack, '';
541        } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
542          print $fh $stack[-1];
543          $stack[-1] = '';
544        }
545        print $fh $tagmap->{$tagname} || next;
546        ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
547          or $tagname eq 'X';
548      }
549
550    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
551    } elsif( $type eq 'end' ) {
552      if( ($tagname = $token->tagname) =~ m/^over-/s ) {
553        if( my $end = pop @stack ) {
554          print $fh $end;
555        }
556      } elsif( $tagname =~ m/^item-/s and @stack) {
557        $stack[-1] = $tagmap->{"/$tagname"};
558        if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
559          $self->unget_token($next);
560          if( $next->type eq 'start' ) {
561            print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
562            $stack[-1] = $tagmap->{"/item-body"};
563          }
564        }
565        next;
566      }
567      print $fh $tagmap->{"/$tagname"} || next;
568      --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
569
570    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
571    } elsif( $type eq 'text' ) {
572      esc($type = $token->text);  # reuse $type, why not
573      $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
574      print $fh $type;
575    }
576
577  }
578  return 1;
579}
580
581###########################################################################
582#
583
584sub do_section {
585  my($self, $name, $token) = @_;
586  return $name;
587}
588
589sub do_link {
590  my($self, $token) = @_;
591  my $type = $token->attr('type');
592  if(!defined $type) {
593    $self->whine("Typeless L!?", $token->attr('start_line'));
594  } elsif( $type eq 'pod') { return $self->do_pod_link($token);
595  } elsif( $type eq 'url') { return $self->do_url_link($token);
596  } elsif( $type eq 'man') { return $self->do_man_link($token);
597  } else {
598    $self->whine("L of unknown type $type!?", $token->attr('start_line'));
599  }
600  return 'FNORG'; # should never get called
601}
602
603# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
604
605sub do_url_link { return $_[1]->attr('to') }
606
607sub do_man_link {
608  my ($self, $link) = @_;
609  my $to = $link->attr('to');
610  my $frag = $link->attr('section');
611
612  return undef unless defined $to and length $to; # should never happen
613
614  $frag = $self->section_escape($frag)
615   if defined $frag and length($frag .= ''); # (stringify)
616
617  DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
618
619  return $self->resolve_man_page_link($to, $frag);
620}
621
622
623sub do_pod_link {
624  # And now things get really messy...
625  my($self, $link) = @_;
626  my $to = $link->attr('to');
627  my $section = $link->attr('section');
628  return undef unless(  # should never happen
629    (defined $to and length $to) or
630    (defined $section and length $section)
631  );
632
633  $section = $self->section_escape($section)
634   if defined $section and length($section .= ''); # (stringify)
635
636  DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
637   $to || "(nil)",  $section || "(nil)";
638
639  {
640    # An early hack:
641    my $complete_url = $self->resolve_pod_link_by_table($to, $section);
642    if( $complete_url ) {
643      DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
644        $complete_url, "\n  (Returning that.)\n";
645      return $complete_url;
646    } else {
647      DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
648       " didn't return anything interesting.\n";
649    }
650  }
651
652  if(defined $to and length $to) {
653    # Give this routine first hack again
654    my $there = $self->resolve_pod_link_by_table($to);
655    if(defined $there and length $there) {
656      DEBUG > 1
657       and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
658    } else {
659      $there =
660        $self->resolve_pod_page_link($to, $section);
661         # (I pass it the section value, but I don't see a
662         #  particular reason it'd use it.)
663      DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
664      unless( defined $there and length $there ) {
665        DEBUG and print STDERR "Can't resolve $to\n";
666        return undef;
667      }
668      # resolve_pod_page_link returning undef is how it
669      #  can signal that it gives up on making a link
670    }
671    $to = $there;
672  }
673
674  #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
675
676  my $out = (defined $to and length $to) ? $to : '';
677  $out .= "#" . $section if defined $section and length $section;
678
679  unless(length $out) { # sanity check
680    DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
681     $to || "(nil)",  $section || "(nil)";
682    return undef;
683  }
684
685  DEBUG and print STDERR "Resolved to $out\n";
686  return $out;
687}
688
689
690# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
691
692sub section_escape {
693  my($self, $section) = @_;
694  return $self->section_url_escape(
695    $self->section_name_tidy($section)
696  );
697}
698
699sub section_name_tidy {
700  my($self, $section) = @_;
701  $section =~ s/^\s+//;
702  $section =~ s/\s+$//;
703  $section =~ tr/ /_/;
704  if ($] ge 5.006) {
705    $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
706  } elsif ('A' eq chr(65)) { # But not on early EBCDIC
707    $section =~ tr/\x00-\x1F\x80-\x9F//d;
708  }
709  $section = $self->unicode_escape_url($section);
710  $section = '_' unless length $section;
711  return $section;
712}
713
714sub section_url_escape  { shift->general_url_escape(@_) }
715sub pagepath_url_escape { shift->general_url_escape(@_) }
716sub manpage_url_escape  { shift->general_url_escape(@_) }
717
718sub general_url_escape {
719  my($self, $string) = @_;
720
721  $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
722     # express Unicode things as urlencode(utf(orig)).
723
724  # A pretty conservative escaping, behoovey even for query components
725  #  of a URL (see RFC 2396)
726
727  if ($] ge 5.007_003) {
728    $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
729  } else { # Is broken for non-ASCII platforms on early perls
730    $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
731  }
732   # Yes, stipulate the list without a range, so that this can work right on
733   #  all charsets that this module happens to run under.
734
735  return $string;
736}
737
738#--------------------------------------------------------------------------
739#
740# Oh look, a yawning portal to Hell!  Let's play touch football right by it!
741#
742
743sub resolve_pod_page_link {
744  # resolve_pod_page_link must return a properly escaped URL
745  my $self = shift;
746  return $self->batch_mode()
747   ? $self->resolve_pod_page_link_batch_mode(@_)
748   : $self->resolve_pod_page_link_singleton_mode(@_)
749  ;
750}
751
752sub resolve_pod_page_link_singleton_mode {
753  my($self, $it) = @_;
754  return undef unless defined $it and length $it;
755  my $url = $self->pagepath_url_escape($it);
756
757  $url =~ s{::$}{}s; # probably never comes up anyway
758  $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
759
760  return undef unless length $url;
761  return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
762}
763
764sub resolve_pod_page_link_batch_mode {
765  my($self, $to) = @_;
766  DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
767  my @path = grep length($_), split m/::/s, $to, -1;
768  unless( @path ) { # sanity
769    DEBUG and print STDERR "Very odd!  Splitting $to gives (nil)!\n";
770    return undef;
771  }
772  $self->batch_mode_rectify_path(\@path);
773  my $out = join('/', map $self->pagepath_url_escape($_), @path)
774    . $HTML_EXTENSION;
775  DEBUG > 1 and print STDERR " => $out\n";
776  return $out;
777}
778
779sub batch_mode_rectify_path {
780  my($self, $pathbits) = @_;
781  my $level = $self->batch_mode_current_level;
782  $level--; # how many levels up to go to get to the root
783  if($level < 1) {
784    unshift @$pathbits, '.'; # just to be pretty
785  } else {
786    unshift @$pathbits, ('..') x $level;
787  }
788  return;
789}
790
791sub resolve_man_page_link {
792  my ($self, $to, $frag) = @_;
793  my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
794
795  return undef unless defined $page and length $page;
796  $section ||= 1;
797
798  return $self->man_url_prefix . "$section/"
799      . $self->manpage_url_escape($page)
800      . $self->man_url_postfix;
801}
802
803#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804
805sub resolve_pod_link_by_table {
806  # A crazy hack to allow specifying custom L<foo> => URL mappings
807
808  return unless $_[0]->{'podhtml_LOT'};  # An optimizy shortcut
809
810  my($self, $to, $section) = @_;
811
812  # TODO: add a method that actually populates podhtml_LOT from a file?
813
814  if(defined $section) {
815    $to = '' unless defined $to and length $to;
816    return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
817  } else {
818    return $self->{'podhtml_LOT'}{$to};            # quite possibly undef!
819  }
820  return;
821}
822
823###########################################################################
824
825sub linearize_tokens {  # self, tokens
826  my $self = shift;
827  my $out = '';
828
829  my $t;
830  while($t = shift @_) {
831    if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
832      $out .= $t; # a string, or some insane thing
833    } elsif($t->is_text) {
834      $out .= $t->text;
835    } elsif($t->is_start and $t->tag eq 'X') {
836      # Ignore until the end of this X<...> sequence:
837      my $x_open = 1;
838      while($x_open) {
839        next if( ($t = shift @_)->is_text );
840        if(   $t->is_start and $t->tag eq 'X') { ++$x_open }
841        elsif($t->is_end   and $t->tag eq 'X') { --$x_open }
842      }
843    }
844  }
845  return undef if length $out > $Linearization_Limit;
846  return $out;
847}
848
849#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
850
851sub unicode_escape_url {
852  my($self, $string) = @_;
853  $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
854    #  Turn char 1234 into "(1234)"
855  return $string;
856}
857
858#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
859sub esc { # a function.
860  if(defined wantarray) {
861    if(wantarray) {
862      @_ = splice @_; # break aliasing
863    } else {
864      my $x = shift;
865      if ($] ge 5.007_003) {
866        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
867      } else { # Is broken for non-ASCII platforms on early perls
868        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
869      }
870      return $x;
871    }
872  }
873  foreach my $x (@_) {
874    # Escape things very cautiously:
875    if (defined $x) {
876      if ($] ge 5.007_003) {
877        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
878      } else { # Is broken for non-ASCII platforms on early perls
879        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
880      }
881    }
882    # Leave out "- so that "--" won't make it thru in X-generated comments
883    #  with text in them.
884
885    # Yes, stipulate the list without a range, so that this can work right on
886    #  all charsets that this module happens to run under.
887  }
888  return @_;
889}
890
891#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
892
8931;
894__END__
895
896=head1 NAME
897
898Pod::Simple::HTML - convert Pod to HTML
899
900=head1 SYNOPSIS
901
902  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
903
904
905=head1 DESCRIPTION
906
907This class is for making an HTML rendering of a Pod document.
908
909This is a subclass of L<Pod::Simple::PullParser> and inherits all its
910methods (and options).
911
912Note that if you want to do a batch conversion of a lot of Pod
913documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
914
915
916
917=head1 CALLING FROM THE COMMAND LINE
918
919TODO
920
921  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
922
923
924
925=head1 CALLING FROM PERL
926
927=head2 Minimal code
928
929  use Pod::Simple::HTML;
930  my $p = Pod::Simple::HTML->new;
931  $p->output_string(\my $html);
932  $p->parse_file('path/to/Module/Name.pm');
933  open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
934  print $out $html;
935
936=head2 More detailed example
937
938  use Pod::Simple::HTML;
939
940Set the content type:
941
942  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
943
944  my $p = Pod::Simple::HTML->new;
945
946Include a single javascript source:
947
948  $p->html_javascript('http://abc.com/a.js');
949
950Or insert multiple javascript source in the header
951(or for that matter include anything, thought this is not recommended)
952
953  $p->html_javascript('
954      <script type="text/javascript" src="http://abc.com/b.js"></script>
955      <script type="text/javascript" src="http://abc.com/c.js"></script>');
956
957Include a single css source in the header:
958
959  $p->html_css('/style.css');
960
961or insert multiple css sources:
962
963  $p->html_css('
964      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css">
965      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">');
966
967Tell the parser where should the output go. In this case it will be placed in the $html variable:
968
969  my $html;
970  $p->output_string(\$html);
971
972Parse and process a file with pod in it:
973
974  $p->parse_file('path/to/Module/Name.pm');
975
976=head1 METHODS
977
978TODO
979all (most?) accessorized methods
980
981The following variables need to be set B<before> the call to the ->new constructor.
982
983Set the string that is included before the opening <html> tag:
984
985  $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
986    "http://www.w3.org/TR/html4/loose.dtd">\n};
987
988Set the content-type in the HTML head: (defaults to ISO-8859-1)
989
990  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
991
992Set the value that will be embedded in the opening tags of F, C tags and verbatim text.
993F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "")
994
995  $Pod::Simple::HTML::Computerese =  ' class="some_class_name';
996
997=head2 html_css
998
999=head2 html_javascript
1000
1001=head2 title_prefix
1002
1003=head2 title_postfix
1004
1005=head2 html_header_before_title
1006
1007This includes everything before the <title> opening tag including the Document type
1008and including the opening <title> tag. The following call will set it to be a simple HTML
1009file:
1010
1011  $p->html_header_before_title('<html><head><title>');
1012
1013=head2 top_anchor
1014
1015By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML.
1016You can change it by calling
1017
1018  $p->top_anchor('<a name="zz" >');
1019
1020=head2 html_h_level
1021
1022Normally =head1 will become <h1>, =head2 will become <h2> etc.
1023Using the html_h_level method will change these levels setting the h level
1024of =head1 tags:
1025
1026  $p->html_h_level(3);
1027
1028Will make sure that =head1 will become <h3> and =head2 will become <h4> etc...
1029
1030
1031=head2 index
1032
1033Set it to some true value if you want to have an index (in reality a table of contents)
1034to be added at the top of the generated HTML.
1035
1036  $p->index(1);
1037
1038=head2 html_header_after_title
1039
1040Includes the closing tag of </title> and through the rest of the head
1041till the opening of the body
1042
1043  $p->html_header_after_title('</title>...</head><body id="my_id">');
1044
1045=head2 html_footer
1046
1047The very end of the document:
1048
1049  $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
1050
1051=head1 SUBCLASSING
1052
1053Can use any of the methods described above but for further customization
1054one needs to override some of the methods:
1055
1056  package My::Pod;
1057  use strict;
1058  use warnings;
1059
1060  use base 'Pod::Simple::HTML';
1061
1062  # needs to return a URL string such
1063  # http://some.other.com/page.html
1064  # #anchor_in_the_same_file
1065  # /internal/ref.html
1066  sub do_pod_link {
1067    # My::Pod object and Pod::Simple::PullParserStartToken object
1068    my ($self, $link) = @_;
1069
1070    say $link->tagname;          # will be L for links
1071    say $link->attr('to');       #
1072    say $link->attr('type');     # will be 'pod' always
1073    say $link->attr('section');
1074
1075    # Links local to our web site
1076    if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') {
1077      my $to = $link->attr('to');
1078      if ($to =~ /^Padre::/) {
1079          $to =~ s{::}{/}g;
1080          return "/docs/Padre/$to.html";
1081      }
1082    }
1083
1084    # all other links are generated by the parent class
1085    my $ret = $self->SUPER::do_pod_link($link);
1086    return $ret;
1087  }
1088
1089  1;
1090
1091Meanwhile in script.pl:
1092
1093  use My::Pod;
1094
1095  my $p = My::Pod->new;
1096
1097  my $html;
1098  $p->output_string(\$html);
1099  $p->parse_file('path/to/Module/Name.pm');
1100  open my $out, '>', 'out.html' or die;
1101  print $out $html;
1102
1103TODO
1104
1105maybe override do_beginning do_end
1106
1107=head1 SEE ALSO
1108
1109L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
1110
1111TODO: a corpus of sample Pod input and HTML output?  Or common
1112idioms?
1113
1114=head1 SUPPORT
1115
1116Questions or discussion about POD and Pod::Simple should be sent to the
1117pod-people@perl.org mail list. Send an empty email to
1118pod-people-subscribe@perl.org to subscribe.
1119
1120This module is managed in an open GitHub repository,
1121L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
1122to clone L<https://github.com/perl-pod/pod-simple.git> and send patches!
1123
1124Patches against Pod::Simple are welcome. Please send bug reports to
1125<bug-pod-simple@rt.cpan.org>.
1126
1127=head1 COPYRIGHT AND DISCLAIMERS
1128
1129Copyright (c) 2002-2004 Sean M. Burke.
1130
1131This library is free software; you can redistribute it and/or modify it
1132under the same terms as Perl itself.
1133
1134This program is distributed in the hope that it will be useful, but
1135without any warranty; without even the implied warranty of
1136merchantability or fitness for a particular purpose.
1137
1138=head1 ACKNOWLEDGEMENTS
1139
1140Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
1141L<Linux man pages online|http://man.he.net/> site for man page links.
1142
1143Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
1144site for Perl module links.
1145
1146=head1 AUTHOR
1147
1148Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
1149But don't bother him, he's retired.
1150
1151Pod::Simple is maintained by:
1152
1153=over
1154
1155=item * Allison Randal C<allison@perl.org>
1156
1157=item * Hans Dieter Pearcey C<hdp@cpan.org>
1158
1159=item * David E. Wheeler C<dwheeler@cpan.org>
1160
1161=back
1162
1163=cut
1164