1require 5;
2package Pod::Simple::HTML;
3use strict;
4use Pod::Simple::PullParser ();
5use vars qw(
6  @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
7  $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix
8  $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
9  $Doctype_decl  $Content_decl
10);
11@ISA = ('Pod::Simple::PullParser');
12$VERSION = '3.35';
13BEGIN {
14  if(defined &DEBUG) { } # no-op
15  elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
16  else { *DEBUG = sub () {0}; }
17}
18
19$Doctype_decl ||= '';  # No.  Just No.  Don't even ask me for it.
20 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
21 #    "http://www.w3.org/TR/html4/loose.dtd">\n};
22
23$Content_decl ||=
24 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
25
26$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
27$Computerese =  "" unless defined $Computerese;
28$LamePad = '' unless defined $LamePad;
29
30$Linearization_Limit = 120 unless defined $Linearization_Limit;
31 # headings/items longer than that won't get an <a name="...">
32$Perldoc_URL_Prefix  = 'http://search.cpan.org/perldoc?'
33 unless defined $Perldoc_URL_Prefix;
34$Perldoc_URL_Postfix = ''
35 unless defined $Perldoc_URL_Postfix;
36
37
38$Man_URL_Prefix  = 'http://man.he.net/man';
39$Man_URL_Postfix = '';
40
41$Title_Prefix  = '' unless defined $Title_Prefix;
42$Title_Postfix = '' unless defined $Title_Postfix;
43%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
44  # 'item-text' stuff in the index doesn't quite work, and may
45  # not be a good idea anyhow.
46
47
48__PACKAGE__->_accessorize(
49 'perldoc_url_prefix',
50   # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
51   #  to put before the "Foo%3a%3aBar".
52   # (for singleton mode only?)
53 'perldoc_url_postfix',
54   # what to put after "Foo%3a%3aBar" in the URL.  Normally "".
55
56 'man_url_prefix',
57   # In turning L<crontab(5)> into http://whatever/man/1/crontab, what
58   #  to put before the "1/crontab".
59 'man_url_postfix',
60   #  what to put after the "1/crontab" in the URL. Normally "".
61
62 'batch_mode', # whether we're in batch mode
63 'batch_mode_current_level',
64    # When in batch mode, how deep the current module is: 1 for "LWP",
65    #  2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
66
67 'title_prefix',  'title_postfix',
68  # What to put before and after the title in the head.
69  # Should already be &-escaped
70
71 'html_h_level',
72
73 'html_header_before_title',
74 'html_header_after_title',
75 'html_footer',
76 'top_anchor',
77
78 'index', # whether to add an index at the top of each page
79    # (actually it's a table-of-contents, but we'll call it an index,
80    #  out of apparently longstanding habit)
81
82 'html_css', # URL of CSS file to point to
83 'html_javascript', # URL of Javascript file to point to
84
85 'force_title',   # should already be &-escaped
86 'default_title', # should already be &-escaped
87);
88
89#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90my @_to_accept;
91
92%Tagmap = (
93  'Verbatim'  => "\n<pre$Computerese>",
94  '/Verbatim' => "</pre>\n",
95  'VerbatimFormatted'  => "\n<pre$Computerese>",
96  '/VerbatimFormatted' => "</pre>\n",
97  'VerbatimB'  => "<b>",
98  '/VerbatimB' => "</b>",
99  'VerbatimI'  => "<i>",
100  '/VerbatimI' => "</i>",
101  'VerbatimBI'  => "<b><i>",
102  '/VerbatimBI' => "</i></b>",
103
104
105  'Data'  => "\n",
106  '/Data' => "\n",
107
108  'head1' => "\n<h1>",  # And also stick in an <a name="...">
109  'head2' => "\n<h2>",  #  ''
110  'head3' => "\n<h3>",  #  ''
111  'head4' => "\n<h4>",  #  ''
112  '/head1' => "</a></h1>\n",
113  '/head2' => "</a></h2>\n",
114  '/head3' => "</a></h3>\n",
115  '/head4' => "</a></h4>\n",
116
117  'X'  => "<!--\n\tINDEX: ",
118  '/X' => "\n-->",
119
120  changes(qw(
121    Para=p
122    B=b I=i
123    over-bullet=ul
124    over-number=ol
125    over-text=dl
126    over-block=blockquote
127    item-bullet=li
128    item-number=li
129    item-text=dt
130  )),
131  changes2(
132    map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
133    qw[
134      sample=samp
135      definition=dfn
136      keyboard=kbd
137      variable=var
138      citation=cite
139      abbreviation=abbr
140      acronym=acronym
141      subscript=sub
142      superscript=sup
143      big=big
144      small=small
145      underline=u
146      strikethrough=s
147      preformat=pre
148      teletype=tt
149    ]  # no point in providing a way to get <q>...</q>, I think
150  ),
151
152  '/item-bullet' => "</li>$LamePad\n",
153  '/item-number' => "</li>$LamePad\n",
154  '/item-text'   => "</a></dt>$LamePad\n",
155  'item-body'    => "\n<dd>",
156  '/item-body'   => "</dd>\n",
157
158
159  'B'      =>  "<b>",                  '/B'     =>  "</b>",
160  'I'      =>  "<i>",                  '/I'     =>  "</i>",
161  'F'      =>  "<em$Computerese>",     '/F'     =>  "</em>",
162  'C'      =>  "<code$Computerese>",   '/C'     =>  "</code>",
163  'L'  =>  "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
164  '/L' =>  "</a>",
165);
166
167sub changes {
168  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
169     ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
170  } @_;
171}
172sub changes2 {
173  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
174     ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
175  } @_;
176}
177
178#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
179sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
180 # Just so we can run from the command line.  No options.
181 #  For that, use perldoc!
182#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183
184sub new {
185  my $new = shift->SUPER::new(@_);
186  #$new->nix_X_codes(1);
187  $new->nbsp_for_S(1);
188  $new->accept_targets( 'html', 'HTML' );
189  $new->accept_codes('VerbatimFormatted');
190  $new->accept_codes(@_to_accept);
191  DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
192
193  $new->perldoc_url_prefix(  $Perldoc_URL_Prefix  );
194  $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
195  $new->man_url_prefix(  $Man_URL_Prefix  );
196  $new->man_url_postfix( $Man_URL_Postfix );
197  $new->title_prefix(  $Title_Prefix  );
198  $new->title_postfix( $Title_Postfix );
199
200  $new->html_header_before_title(
201   qq[$Doctype_decl<html><head><title>]
202  );
203  $new->html_header_after_title( join "\n" =>
204    "</title>",
205    $Content_decl,
206    "</head>\n<body class='pod'>",
207    $new->version_tag_comment,
208    "<!-- start doc -->\n",
209  );
210  $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
211  $new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" );
212
213  $new->{'Tagmap'} = {%Tagmap};
214
215  return $new;
216}
217
218sub __adjust_html_h_levels {
219  my ($self) = @_;
220  my $Tagmap = $self->{'Tagmap'};
221
222  my $add = $self->html_h_level;
223  return unless defined $add;
224  return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
225
226  $add -= 1;
227  for (1 .. 4) {
228    $Tagmap->{"head$_"}  =~ s/$_/$_ + $add/e;
229    $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
230  }
231}
232
233sub batch_mode_page_object_init {
234  my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
235  DEBUG and print STDERR "Initting $self\n  for $module\n",
236    "  in $infile\n  out $outfile\n  depth $depth\n";
237  $self->batch_mode(1);
238  $self->batch_mode_current_level($depth);
239  return $self;
240}
241
242sub run {
243  my $self = $_[0];
244  return $self->do_middle if $self->bare_output;
245  return
246   $self->do_beginning && $self->do_middle && $self->do_end;
247}
248
249#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
250
251sub do_beginning {
252  my $self = $_[0];
253
254  my $title;
255
256  if(defined $self->force_title) {
257    $title = $self->force_title;
258    DEBUG and print STDERR "Forcing title to be $title\n";
259  } else {
260    # Actually try looking for the title in the document:
261    $title = $self->get_short_title();
262    unless($self->content_seen) {
263      DEBUG and print STDERR "No content seen in search for title.\n";
264      return;
265    }
266    $self->{'Title'} = $title;
267
268    if(defined $title and $title =~ m/\S/) {
269      $title = $self->title_prefix . esc($title) . $self->title_postfix;
270    } else {
271      $title = $self->default_title;
272      $title = '' unless defined $title;
273      DEBUG and print STDERR "Title defaults to $title\n";
274    }
275  }
276
277
278  my $after = $self->html_header_after_title  || '';
279  if($self->html_css) {
280    my $link =
281    $self->html_css =~ m/</
282     ? $self->html_css # It's a big blob of markup, let's drop it in
283     : sprintf(        # It's just a URL, so let's wrap it up
284      qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
285      $self->html_css,
286    );
287    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
288  }
289  $self->_add_top_anchor(\$after);
290
291  if($self->html_javascript) {
292    my $link =
293    $self->html_javascript =~ m/</
294     ? $self->html_javascript # It's a big blob of markup, let's drop it in
295     : sprintf(        # It's just a URL, so let's wrap it up
296      qq[<script type="text/javascript" src="%s"></script>\n],
297      $self->html_javascript,
298    );
299    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
300  }
301
302  print {$self->{'output_fh'}}
303    $self->html_header_before_title || '',
304    $title, # already escaped
305    $after,
306  ;
307
308  DEBUG and print STDERR "Returning from do_beginning...\n";
309  return 1;
310}
311
312sub _add_top_anchor {
313  my($self, $text_r) = @_;
314  unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
315    $$text_r .= $self->top_anchor || '';
316  }
317  return;
318}
319
320sub version_tag_comment {
321  my $self = shift;
322  return sprintf
323   "<!--\n  generated by %s v%s,\n  using %s v%s,\n  under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
324   esc(
325    ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
326    $], scalar(gmtime),
327   ), $self->_modnote(),
328  ;
329}
330
331sub _modnote {
332  my $class = ref($_[0]) || $_[0];
333  return join "\n   " => grep m/\S/, split "\n",
334
335qq{
336If you want to change this HTML document, you probably shouldn't do that
337by changing it directly.  Instead, see about changing the calling options
338to $class, and/or subclassing $class,
339then reconverting this document from the Pod source.
340When in doubt, email the author of $class for advice.
341See 'perldoc $class' for more info.
342};
343
344}
345
346sub do_end {
347  my $self = $_[0];
348  print {$self->{'output_fh'}}  $self->html_footer || '';
349  return 1;
350}
351
352# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
353# Normally this would just be a call to _do_middle_main_loop -- but we
354#  have to do some elaborate things to emit all the content and then
355#  summarize it and output it /before/ the content that it's a summary of.
356
357sub do_middle {
358  my $self = $_[0];
359  return $self->_do_middle_main_loop unless $self->index;
360
361  if( $self->output_string ) {
362    # An efficiency hack
363    my $out = $self->output_string; #it's a reference to it
364    my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
365    $$out .= $sneakytag;
366    $self->_do_middle_main_loop;
367    $sneakytag = quotemeta($sneakytag);
368    my $index = $self->index_as_html();
369    if( $$out =~ s/$sneakytag/$index/s ) {
370      # Expected case
371      DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
372    } else {
373      DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
374      # I don't think this should ever happen.
375    }
376    return 1;
377  }
378
379  unless( $self->output_fh ) {
380    require Carp;
381    Carp::confess("Parser object \$p doesn't seem to have any output object!  I don't know how to deal with that.");
382  }
383
384  # If we get here, we're outputting to a FH.  So we need to do some magic.
385  # Namely, divert all content to a string, which we output after the index.
386  my $fh = $self->output_fh;
387  my $content = '';
388  {
389    # Our horrible bait and switch:
390    $self->output_string( \$content );
391    $self->_do_middle_main_loop;
392    $self->abandon_output_string();
393    $self->output_fh($fh);
394  }
395  print $fh $self->index_as_html();
396  print $fh $content;
397
398  return 1;
399}
400
401###########################################################################
402
403sub index_as_html {
404  my $self = $_[0];
405  # This is meant to be called AFTER the input document has been parsed!
406
407  my $points = $self->{'PSHTML_index_points'} || [];
408
409  @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
410   # There's no point in having a 0-item or 1-item index, I dare say.
411
412  my(@out) = qq{\n<div class='indexgroup'>};
413  my $level = 0;
414
415  my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
416  foreach my $p (@$points, ['head0', '(end)']) {
417    ($tagname, $text) = @$p;
418    $anchorname = $self->section_escape($text);
419    if( $tagname =~ m{^head(\d+)$} ) {
420      $target_level = 0 + $1;
421    } else {  # must be some kinda list item
422      if($previous_tagname =~ m{^head\d+$} ) {
423        $target_level = $level + 1;
424      } else {
425        $target_level = $level;  # no change needed
426      }
427    }
428
429    # Get to target_level by opening or closing ULs
430    while($level > $target_level)
431     { --$level; push @out, ("  " x $level) . "</ul>"; }
432    while($level < $target_level)
433     { ++$level; push @out, ("  " x ($level-1))
434       . "<ul   class='indexList indexList$level'>"; }
435
436    $previous_tagname = $tagname;
437    next unless $level;
438
439    $indent = '  '  x $level;
440    push @out, sprintf
441      "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
442      $indent, $level, esc($anchorname), esc($text)
443    ;
444  }
445  push @out, "</div>\n";
446  return join "\n", @out;
447}
448
449###########################################################################
450
451sub _do_middle_main_loop {
452  my $self = $_[0];
453  my $fh = $self->{'output_fh'};
454  my $tagmap = $self->{'Tagmap'};
455
456  $self->__adjust_html_h_levels;
457
458  my($token, $type, $tagname, $linkto, $linktype);
459  my @stack;
460  my $dont_wrap = 0;
461
462  while($token = $self->get_token) {
463
464    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
465    if( ($type = $token->type) eq 'start' ) {
466      if(($tagname = $token->tagname) eq 'L') {
467        $linktype = $token->attr('type') || 'insane';
468
469        $linkto = $self->do_link($token);
470
471        if(defined $linkto and length $linkto) {
472          esc($linkto);
473            #   (Yes, SGML-escaping applies on top of %-escaping!
474            #   But it's rarely noticeable in practice.)
475          print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
476        } else {
477          print $fh "<a>"; # Yes, an 'a' element with no attributes!
478        }
479
480      } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
481        print $fh $tagmap->{$tagname} || next;
482
483        my @to_unget;
484        while(1) {
485          push @to_unget, $self->get_token;
486          last if $to_unget[-1]->is_end
487              and $to_unget[-1]->tagname eq $tagname;
488
489          # TODO: support for X<...>'s found in here?  (maybe hack into linearize_tokens)
490        }
491
492        my $name = $self->linearize_tokens(@to_unget);
493        $name = $self->do_section($name, $token) if defined $name;
494
495        print $fh "<a ";
496        if ($tagname =~ m/^head\d$/s) {
497            print $fh "class='u'", $self->index
498                ? " href='#___top' title='click to go to top of document'\n"
499                : "\n";
500        }
501
502        if(defined $name) {
503          my $esc = esc(  $self->section_name_tidy( $name ) );
504          print $fh qq[name="$esc"];
505          DEBUG and print STDERR "Linearized ", scalar(@to_unget),
506           " tokens as \"$name\".\n";
507          push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
508           if $ToIndex{ $tagname };
509            # Obviously, this discards all formatting codes (saving
510            #  just their content), but ahwell.
511
512        } else {  # ludicrously long, so nevermind
513          DEBUG and print STDERR "Linearized ", scalar(@to_unget),
514           " tokens, but it was too long, so nevermind.\n";
515        }
516        print $fh "\n>";
517        $self->unget_token(@to_unget);
518
519      } elsif ($tagname eq 'Data') {
520        my $next = $self->get_token;
521        next unless defined $next;
522        unless( $next->type eq 'text' ) {
523          $self->unget_token($next);
524          next;
525        }
526        DEBUG and print STDERR "    raw text ", $next->text, "\n";
527        # The parser sometimes preserves newlines and sometimes doesn't!
528        (my $text = $next->text) =~ s/\n\z//;
529        print $fh $text, "\n";
530        next;
531
532      } else {
533        if( $tagname =~ m/^over-/s ) {
534          push @stack, '';
535        } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
536          print $fh $stack[-1];
537          $stack[-1] = '';
538        }
539        print $fh $tagmap->{$tagname} || next;
540        ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
541          or $tagname eq 'X';
542      }
543
544    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
545    } elsif( $type eq 'end' ) {
546      if( ($tagname = $token->tagname) =~ m/^over-/s ) {
547        if( my $end = pop @stack ) {
548          print $fh $end;
549        }
550      } elsif( $tagname =~ m/^item-/s and @stack) {
551        $stack[-1] = $tagmap->{"/$tagname"};
552        if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
553          $self->unget_token($next);
554          if( $next->type eq 'start' ) {
555            print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
556            $stack[-1] = $tagmap->{"/item-body"};
557          }
558        }
559        next;
560      }
561      print $fh $tagmap->{"/$tagname"} || next;
562      --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
563
564    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
565    } elsif( $type eq 'text' ) {
566      esc($type = $token->text);  # reuse $type, why not
567      $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
568      print $fh $type;
569    }
570
571  }
572  return 1;
573}
574
575###########################################################################
576#
577
578sub do_section {
579  my($self, $name, $token) = @_;
580  return $name;
581}
582
583sub do_link {
584  my($self, $token) = @_;
585  my $type = $token->attr('type');
586  if(!defined $type) {
587    $self->whine("Typeless L!?", $token->attr('start_line'));
588  } elsif( $type eq 'pod') { return $self->do_pod_link($token);
589  } elsif( $type eq 'url') { return $self->do_url_link($token);
590  } elsif( $type eq 'man') { return $self->do_man_link($token);
591  } else {
592    $self->whine("L of unknown type $type!?", $token->attr('start_line'));
593  }
594  return 'FNORG'; # should never get called
595}
596
597# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
598
599sub do_url_link { return $_[1]->attr('to') }
600
601sub do_man_link {
602  my ($self, $link) = @_;
603  my $to = $link->attr('to');
604  my $frag = $link->attr('section');
605
606  return undef unless defined $to and length $to; # should never happen
607
608  $frag = $self->section_escape($frag)
609   if defined $frag and length($frag .= ''); # (stringify)
610
611  DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
612
613  return $self->resolve_man_page_link($to, $frag);
614}
615
616
617sub do_pod_link {
618  # And now things get really messy...
619  my($self, $link) = @_;
620  my $to = $link->attr('to');
621  my $section = $link->attr('section');
622  return undef unless(  # should never happen
623    (defined $to and length $to) or
624    (defined $section and length $section)
625  );
626
627  $section = $self->section_escape($section)
628   if defined $section and length($section .= ''); # (stringify)
629
630  DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
631   $to || "(nil)",  $section || "(nil)";
632
633  {
634    # An early hack:
635    my $complete_url = $self->resolve_pod_link_by_table($to, $section);
636    if( $complete_url ) {
637      DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
638        $complete_url, "\n  (Returning that.)\n";
639      return $complete_url;
640    } else {
641      DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
642       " didn't return anything interesting.\n";
643    }
644  }
645
646  if(defined $to and length $to) {
647    # Give this routine first hack again
648    my $there = $self->resolve_pod_link_by_table($to);
649    if(defined $there and length $there) {
650      DEBUG > 1
651       and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
652    } else {
653      $there =
654        $self->resolve_pod_page_link($to, $section);
655         # (I pass it the section value, but I don't see a
656         #  particular reason it'd use it.)
657      DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
658      unless( defined $there and length $there ) {
659        DEBUG and print STDERR "Can't resolve $to\n";
660        return undef;
661      }
662      # resolve_pod_page_link returning undef is how it
663      #  can signal that it gives up on making a link
664    }
665    $to = $there;
666  }
667
668  #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
669
670  my $out = (defined $to and length $to) ? $to : '';
671  $out .= "#" . $section if defined $section and length $section;
672
673  unless(length $out) { # sanity check
674    DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
675     $to || "(nil)",  $section || "(nil)";
676    return undef;
677  }
678
679  DEBUG and print STDERR "Resolved to $out\n";
680  return $out;
681}
682
683
684# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
685
686sub section_escape {
687  my($self, $section) = @_;
688  return $self->section_url_escape(
689    $self->section_name_tidy($section)
690  );
691}
692
693sub section_name_tidy {
694  my($self, $section) = @_;
695  $section =~ s/^\s+//;
696  $section =~ s/\s+$//;
697  $section =~ tr/ /_/;
698  if ($] ge 5.006) {
699    $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
700  } elsif ('A' eq chr(65)) { # But not on early EBCDIC
701    $section =~ tr/\x00-\x1F\x80-\x9F//d;
702  }
703  $section = $self->unicode_escape_url($section);
704  $section = '_' unless length $section;
705  return $section;
706}
707
708sub section_url_escape  { shift->general_url_escape(@_) }
709sub pagepath_url_escape { shift->general_url_escape(@_) }
710sub manpage_url_escape  { shift->general_url_escape(@_) }
711
712sub general_url_escape {
713  my($self, $string) = @_;
714
715  $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
716     # express Unicode things as urlencode(utf(orig)).
717
718  # A pretty conservative escaping, behoovey even for query components
719  #  of a URL (see RFC 2396)
720
721  if ($] ge 5.007_003) {
722    $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
723  } else { # Is broken for non-ASCII platforms on early perls
724    $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
725  }
726   # Yes, stipulate the list without a range, so that this can work right on
727   #  all charsets that this module happens to run under.
728
729  return $string;
730}
731
732#--------------------------------------------------------------------------
733#
734# Oh look, a yawning portal to Hell!  Let's play touch football right by it!
735#
736
737sub resolve_pod_page_link {
738  # resolve_pod_page_link must return a properly escaped URL
739  my $self = shift;
740  return $self->batch_mode()
741   ? $self->resolve_pod_page_link_batch_mode(@_)
742   : $self->resolve_pod_page_link_singleton_mode(@_)
743  ;
744}
745
746sub resolve_pod_page_link_singleton_mode {
747  my($self, $it) = @_;
748  return undef unless defined $it and length $it;
749  my $url = $self->pagepath_url_escape($it);
750
751  $url =~ s{::$}{}s; # probably never comes up anyway
752  $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
753
754  return undef unless length $url;
755  return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
756}
757
758sub resolve_pod_page_link_batch_mode {
759  my($self, $to) = @_;
760  DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
761  my @path = grep length($_), split m/::/s, $to, -1;
762  unless( @path ) { # sanity
763    DEBUG and print STDERR "Very odd!  Splitting $to gives (nil)!\n";
764    return undef;
765  }
766  $self->batch_mode_rectify_path(\@path);
767  my $out = join('/', map $self->pagepath_url_escape($_), @path)
768    . $HTML_EXTENSION;
769  DEBUG > 1 and print STDERR " => $out\n";
770  return $out;
771}
772
773sub batch_mode_rectify_path {
774  my($self, $pathbits) = @_;
775  my $level = $self->batch_mode_current_level;
776  $level--; # how many levels up to go to get to the root
777  if($level < 1) {
778    unshift @$pathbits, '.'; # just to be pretty
779  } else {
780    unshift @$pathbits, ('..') x $level;
781  }
782  return;
783}
784
785sub resolve_man_page_link {
786  my ($self, $to, $frag) = @_;
787  my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
788
789  return undef unless defined $page and length $page;
790  $section ||= 1;
791
792  return $self->man_url_prefix . "$section/"
793      . $self->manpage_url_escape($page)
794      . $self->man_url_postfix;
795}
796
797#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
798
799sub resolve_pod_link_by_table {
800  # A crazy hack to allow specifying custom L<foo> => URL mappings
801
802  return unless $_[0]->{'podhtml_LOT'};  # An optimizy shortcut
803
804  my($self, $to, $section) = @_;
805
806  # TODO: add a method that actually populates podhtml_LOT from a file?
807
808  if(defined $section) {
809    $to = '' unless defined $to and length $to;
810    return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
811  } else {
812    return $self->{'podhtml_LOT'}{$to};            # quite possibly undef!
813  }
814  return;
815}
816
817###########################################################################
818
819sub linearize_tokens {  # self, tokens
820  my $self = shift;
821  my $out = '';
822
823  my $t;
824  while($t = shift @_) {
825    if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
826      $out .= $t; # a string, or some insane thing
827    } elsif($t->is_text) {
828      $out .= $t->text;
829    } elsif($t->is_start and $t->tag eq 'X') {
830      # Ignore until the end of this X<...> sequence:
831      my $x_open = 1;
832      while($x_open) {
833        next if( ($t = shift @_)->is_text );
834        if(   $t->is_start and $t->tag eq 'X') { ++$x_open }
835        elsif($t->is_end   and $t->tag eq 'X') { --$x_open }
836      }
837    }
838  }
839  return undef if length $out > $Linearization_Limit;
840  return $out;
841}
842
843#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
844
845sub unicode_escape_url {
846  my($self, $string) = @_;
847  $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
848    #  Turn char 1234 into "(1234)"
849  return $string;
850}
851
852#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
853sub esc { # a function.
854  if(defined wantarray) {
855    if(wantarray) {
856      @_ = splice @_; # break aliasing
857    } else {
858      my $x = shift;
859      if ($] ge 5.007_003) {
860        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
861      } else { # Is broken for non-ASCII platforms on early perls
862        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
863      }
864      return $x;
865    }
866  }
867  foreach my $x (@_) {
868    # Escape things very cautiously:
869    if (defined $x) {
870      if ($] ge 5.007_003) {
871        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
872      } else { # Is broken for non-ASCII platforms on early perls
873        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
874      }
875    }
876    # Leave out "- so that "--" won't make it thru in X-generated comments
877    #  with text in them.
878
879    # Yes, stipulate the list without a range, so that this can work right on
880    #  all charsets that this module happens to run under.
881  }
882  return @_;
883}
884
885#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
886
8871;
888__END__
889
890=head1 NAME
891
892Pod::Simple::HTML - convert Pod to HTML
893
894=head1 SYNOPSIS
895
896  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
897
898
899=head1 DESCRIPTION
900
901This class is for making an HTML rendering of a Pod document.
902
903This is a subclass of L<Pod::Simple::PullParser> and inherits all its
904methods (and options).
905
906Note that if you want to do a batch conversion of a lot of Pod
907documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
908
909
910
911=head1 CALLING FROM THE COMMAND LINE
912
913TODO
914
915  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
916
917
918
919=head1 CALLING FROM PERL
920
921=head2 Minimal code
922
923  use Pod::Simple::HTML;
924  my $p = Pod::Simple::HTML->new;
925  $p->output_string(\my $html);
926  $p->parse_file('path/to/Module/Name.pm');
927  open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
928  print $out $html;
929
930=head2 More detailed example
931
932  use Pod::Simple::HTML;
933
934Set the content type:
935
936  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
937
938  my $p = Pod::Simple::HTML->new;
939
940Include a single javascript source:
941
942  $p->html_javascript('http://abc.com/a.js');
943
944Or insert multiple javascript source in the header
945(or for that matter include anything, thought this is not recommended)
946
947  $p->html_javascript('
948      <script type="text/javascript" src="http://abc.com/b.js"></script>
949      <script type="text/javascript" src="http://abc.com/c.js"></script>');
950
951Include a single css source in the header:
952
953  $p->html_css('/style.css');
954
955or insert multiple css sources:
956
957  $p->html_css('
958      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css">
959      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">');
960
961Tell the parser where should the output go. In this case it will be placed in the $html variable:
962
963  my $html;
964  $p->output_string(\$html);
965
966Parse and process a file with pod in it:
967
968  $p->parse_file('path/to/Module/Name.pm');
969
970=head1 METHODS
971
972TODO
973all (most?) accessorized methods
974
975The following variables need to be set B<before> the call to the ->new constructor.
976
977Set the string that is included before the opening <html> tag:
978
979  $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
980	 "http://www.w3.org/TR/html4/loose.dtd">\n};
981
982Set the content-type in the HTML head: (defaults to ISO-8859-1)
983
984  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
985
986Set the value that will be embedded in the opening tags of F, C tags and verbatim text.
987F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "")
988
989  $Pod::Simple::HTML::Computerese =  ' class="some_class_name';
990
991=head2 html_css
992
993=head2 html_javascript
994
995=head2 title_prefix
996
997=head2 title_postfix
998
999=head2 html_header_before_title
1000
1001This includes everything before the <title> opening tag including the Document type
1002and including the opening <title> tag. The following call will set it to be a simple HTML
1003file:
1004
1005  $p->html_header_before_title('<html><head><title>');
1006
1007=head2 top_anchor
1008
1009By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML.
1010You can change it by calling
1011
1012  $p->top_anchor('<a name="zz" >');
1013
1014=head2 html_h_level
1015
1016Normally =head1 will become <h1>, =head2 will become <h2> etc.
1017Using the html_h_level method will change these levels setting the h level
1018of =head1 tags:
1019
1020  $p->html_h_level(3);
1021
1022Will make sure that =head1 will become <h3> and =head2 will become <h4> etc...
1023
1024
1025=head2 index
1026
1027Set it to some true value if you want to have an index (in reality a table of contents)
1028to be added at the top of the generated HTML.
1029
1030  $p->index(1);
1031
1032=head2 html_header_after_title
1033
1034Includes the closing tag of </title> and through the rest of the head
1035till the opening of the body
1036
1037  $p->html_header_after_title('</title>...</head><body id="my_id">');
1038
1039=head2 html_footer
1040
1041The very end of the document:
1042
1043  $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
1044
1045=head1 SUBCLASSING
1046
1047Can use any of the methods described above but for further customization
1048one needs to override some of the methods:
1049
1050  package My::Pod;
1051  use strict;
1052  use warnings;
1053
1054  use base 'Pod::Simple::HTML';
1055
1056  # needs to return a URL string such
1057  # http://some.other.com/page.html
1058  # #anchor_in_the_same_file
1059  # /internal/ref.html
1060  sub do_pod_link {
1061    # My::Pod object and Pod::Simple::PullParserStartToken object
1062    my ($self, $link) = @_;
1063
1064    say $link->tagname;          # will be L for links
1065    say $link->attr('to');       #
1066    say $link->attr('type');     # will be 'pod' always
1067    say $link->attr('section');
1068
1069    # Links local to our web site
1070    if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') {
1071      my $to = $link->attr('to');
1072      if ($to =~ /^Padre::/) {
1073          $to =~ s{::}{/}g;
1074          return "/docs/Padre/$to.html";
1075      }
1076    }
1077
1078    # all other links are generated by the parent class
1079    my $ret = $self->SUPER::do_pod_link($link);
1080    return $ret;
1081  }
1082
1083  1;
1084
1085Meanwhile in script.pl:
1086
1087  use My::Pod;
1088
1089  my $p = My::Pod->new;
1090
1091  my $html;
1092  $p->output_string(\$html);
1093  $p->parse_file('path/to/Module/Name.pm');
1094  open my $out, '>', 'out.html' or die;
1095  print $out $html;
1096
1097TODO
1098
1099maybe override do_beginning do_end
1100
1101=head1 SEE ALSO
1102
1103L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
1104
1105TODO: a corpus of sample Pod input and HTML output?  Or common
1106idioms?
1107
1108=head1 SUPPORT
1109
1110Questions or discussion about POD and Pod::Simple should be sent to the
1111pod-people@perl.org mail list. Send an empty email to
1112pod-people-subscribe@perl.org to subscribe.
1113
1114This module is managed in an open GitHub repository,
1115L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
1116to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
1117
1118Patches against Pod::Simple are welcome. Please send bug reports to
1119<bug-pod-simple@rt.cpan.org>.
1120
1121=head1 COPYRIGHT AND DISCLAIMERS
1122
1123Copyright (c) 2002-2004 Sean M. Burke.
1124
1125This library is free software; you can redistribute it and/or modify it
1126under the same terms as Perl itself.
1127
1128This program is distributed in the hope that it will be useful, but
1129without any warranty; without even the implied warranty of
1130merchantability or fitness for a particular purpose.
1131
1132=head1 ACKNOWLEDGEMENTS
1133
1134Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
1135L<Linux man pages online|http://man.he.net/> site for man page links.
1136
1137Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
1138site for Perl module links.
1139
1140=head1 AUTHOR
1141
1142Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
1143But don't bother him, he's retired.
1144
1145Pod::Simple is maintained by:
1146
1147=over
1148
1149=item * Allison Randal C<allison@perl.org>
1150
1151=item * Hans Dieter Pearcey C<hdp@cpan.org>
1152
1153=item * David E. Wheeler C<dwheeler@cpan.org>
1154
1155=back
1156
1157=cut
1158