1package Pod::Simple::HTML; 2use strict; 3use warnings; 4use Pod::Simple::PullParser (); 5our @ISA = ('Pod::Simple::PullParser'); 6our $VERSION = '3.45'; 7BEGIN { 8 if(defined &DEBUG) { } # no-op 9 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } 10 else { *DEBUG = sub () {0}; } 11} 12 13our $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. 14 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 15 # "http://www.w3.org/TR/html4/loose.dtd">\n}; 16 17our $Content_decl ||= 18 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >}; 19 20our $HTML_EXTENSION; 21$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; 22our $Computerese; 23$Computerese = "" unless defined $Computerese; 24our $LamePad; 25$LamePad = '' unless defined $LamePad; 26 27our $Linearization_Limit; 28$Linearization_Limit = 120 unless defined $Linearization_Limit; 29 # headings/items longer than that won't get an <a name="..."> 30our $Perldoc_URL_Prefix; 31$Perldoc_URL_Prefix = 'https://metacpan.org/pod/' 32 unless defined $Perldoc_URL_Prefix; 33our $Perldoc_URL_Postfix; 34$Perldoc_URL_Postfix = '' 35 unless defined $Perldoc_URL_Postfix; 36 37 38our $Man_URL_Prefix = 'http://man.he.net/man'; 39our $Man_URL_Postfix = ''; 40 41our $Title_Prefix; 42$Title_Prefix = '' unless defined $Title_Prefix; 43our $Title_Postfix; 44$Title_Postfix = '' unless defined $Title_Postfix; 45our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text 46 # 'item-text' stuff in the index doesn't quite work, and may 47 # not be a good idea anyhow. 48 49 50__PACKAGE__->_accessorize( 51 'perldoc_url_prefix', 52 # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what 53 # to put before the "Foo%3a%3aBar". 54 # (for singleton mode only?) 55 'perldoc_url_postfix', 56 # what to put after "Foo%3a%3aBar" in the URL. Normally "". 57 58 'man_url_prefix', 59 # In turning L<crontab(5)> into http://whatever/man/1/crontab, what 60 # to put before the "1/crontab". 61 'man_url_postfix', 62 # what to put after the "1/crontab" in the URL. Normally "". 63 64 'batch_mode', # whether we're in batch mode 65 'batch_mode_current_level', 66 # When in batch mode, how deep the current module is: 1 for "LWP", 67 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc 68 69 'title_prefix', 'title_postfix', 70 # What to put before and after the title in the head. 71 # Should already be &-escaped 72 73 'html_h_level', 74 75 'html_header_before_title', 76 'html_header_after_title', 77 'html_footer', 78 'top_anchor', 79 80 'index', # whether to add an index at the top of each page 81 # (actually it's a table-of-contents, but we'll call it an index, 82 # out of apparently longstanding habit) 83 84 'html_css', # URL of CSS file to point to 85 'html_javascript', # URL of Javascript file to point to 86 87 'force_title', # should already be &-escaped 88 'default_title', # should already be &-escaped 89); 90 91#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 92my @_to_accept; 93 94our %Tagmap = ( 95 'Verbatim' => "\n<pre$Computerese>", 96 '/Verbatim' => "</pre>\n", 97 'VerbatimFormatted' => "\n<pre$Computerese>", 98 '/VerbatimFormatted' => "</pre>\n", 99 'VerbatimB' => "<b>", 100 '/VerbatimB' => "</b>", 101 'VerbatimI' => "<i>", 102 '/VerbatimI' => "</i>", 103 'VerbatimBI' => "<b><i>", 104 '/VerbatimBI' => "</i></b>", 105 106 107 'Data' => "\n", 108 '/Data' => "\n", 109 110 'head1' => "\n<h1>", # And also stick in an <a name="..."> 111 'head2' => "\n<h2>", # '' 112 'head3' => "\n<h3>", # '' 113 'head4' => "\n<h4>", # '' 114 'head5' => "\n<h5>", # '' 115 'head6' => "\n<h6>", # '' 116 '/head1' => "</a></h1>\n", 117 '/head2' => "</a></h2>\n", 118 '/head3' => "</a></h3>\n", 119 '/head4' => "</a></h4>\n", 120 '/head5' => "</a></h5>\n", 121 '/head6' => "</a></h6>\n", 122 123 'X' => "<!--\n\tINDEX: ", 124 '/X' => "\n-->", 125 126 changes(qw( 127 Para=p 128 B=b I=i 129 over-bullet=ul 130 over-number=ol 131 over-text=dl 132 over-block=blockquote 133 item-bullet=li 134 item-number=li 135 item-text=dt 136 )), 137 changes2( 138 map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } 139 qw[ 140 sample=samp 141 definition=dfn 142 keyboard=kbd 143 variable=var 144 citation=cite 145 abbreviation=abbr 146 acronym=acronym 147 subscript=sub 148 superscript=sup 149 big=big 150 small=small 151 underline=u 152 strikethrough=s 153 preformat=pre 154 teletype=tt 155 ] # no point in providing a way to get <q>...</q>, I think 156 ), 157 158 '/item-bullet' => "</li>$LamePad\n", 159 '/item-number' => "</li>$LamePad\n", 160 '/item-text' => "</a></dt>$LamePad\n", 161 'item-body' => "\n<dd>", 162 '/item-body' => "</dd>\n", 163 164 165 'B' => "<b>", '/B' => "</b>", 166 'I' => "<i>", '/I' => "</i>", 167 'F' => "<em$Computerese>", '/F' => "</em>", 168 'C' => "<code$Computerese>", '/C' => "</code>", 169 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used! 170 '/L' => "</a>", 171); 172 173sub changes { 174 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 175 ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_" 176 } @_; 177} 178sub changes2 { 179 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 180 ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_" 181 } @_; 182} 183 184#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 185sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } 186 # Just so we can run from the command line. No options. 187 # For that, use perldoc! 188#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 189 190sub new { 191 my $new = shift->SUPER::new(@_); 192 #$new->nix_X_codes(1); 193 $new->nbsp_for_S(1); 194 $new->accept_targets( 'html', 'HTML' ); 195 $new->accept_codes('VerbatimFormatted'); 196 $new->accept_codes(@_to_accept); 197 DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n"; 198 199 $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); 200 $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); 201 $new->man_url_prefix( $Man_URL_Prefix ); 202 $new->man_url_postfix( $Man_URL_Postfix ); 203 $new->title_prefix( $Title_Prefix ); 204 $new->title_postfix( $Title_Postfix ); 205 206 $new->html_header_before_title( 207 qq[$Doctype_decl<html><head><title>] 208 ); 209 $new->html_header_after_title( join "\n" => 210 "</title>", 211 $Content_decl, 212 "</head>\n<body class='pod'>", 213 $new->version_tag_comment, 214 "<!-- start doc -->\n", 215 ); 216 $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 217 $new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" ); 218 219 $new->{'Tagmap'} = {%Tagmap}; 220 221 return $new; 222} 223 224sub __adjust_html_h_levels { 225 my ($self) = @_; 226 my $Tagmap = $self->{'Tagmap'}; 227 228 my $add = $self->html_h_level; 229 return unless defined $add; 230 return if ($self->{'Adjusted_html_h_levels'}||0) == $add; 231 232 $add -= 1; 233 for (1 .. 6) { 234 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e; 235 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e; 236 } 237} 238 239sub batch_mode_page_object_init { 240 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; 241 DEBUG and print STDERR "Initting $self\n for $module\n", 242 " in $infile\n out $outfile\n depth $depth\n"; 243 $self->batch_mode(1); 244 $self->batch_mode_current_level($depth); 245 return $self; 246} 247 248sub run { 249 my $self = $_[0]; 250 return $self->do_middle if $self->bare_output; 251 return 252 $self->do_beginning && $self->do_middle && $self->do_end; 253} 254 255#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 256 257sub do_beginning { 258 my $self = $_[0]; 259 260 my $title; 261 262 if(defined $self->force_title) { 263 $title = $self->force_title; 264 DEBUG and print STDERR "Forcing title to be $title\n"; 265 } else { 266 # Actually try looking for the title in the document: 267 $title = $self->get_short_title(); 268 unless($self->content_seen) { 269 DEBUG and print STDERR "No content seen in search for title.\n"; 270 return; 271 } 272 $self->{'Title'} = $title; 273 274 if(defined $title and $title =~ m/\S/) { 275 $title = $self->title_prefix . esc($title) . $self->title_postfix; 276 } else { 277 $title = $self->default_title; 278 $title = '' unless defined $title; 279 DEBUG and print STDERR "Title defaults to $title\n"; 280 } 281 } 282 283 284 my $after = $self->html_header_after_title || ''; 285 if($self->html_css) { 286 my $link = 287 $self->html_css =~ m/</ 288 ? $self->html_css # It's a big blob of markup, let's drop it in 289 : sprintf( # It's just a URL, so let's wrap it up 290 qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n], 291 $self->html_css, 292 ); 293 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 294 } 295 $self->_add_top_anchor(\$after); 296 297 if($self->html_javascript) { 298 my $link = 299 $self->html_javascript =~ m/</ 300 ? $self->html_javascript # It's a big blob of markup, let's drop it in 301 : sprintf( # It's just a URL, so let's wrap it up 302 qq[<script type="text/javascript" src="%s"></script>\n], 303 $self->html_javascript, 304 ); 305 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 306 } 307 308 print {$self->{'output_fh'}} 309 $self->html_header_before_title || '', 310 $title, # already escaped 311 $after, 312 ; 313 314 DEBUG and print STDERR "Returning from do_beginning...\n"; 315 return 1; 316} 317 318sub _add_top_anchor { 319 my($self, $text_r) = @_; 320 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack 321 $$text_r .= $self->top_anchor || ''; 322 } 323 return; 324} 325 326sub version_tag_comment { 327 my $self = shift; 328 return sprintf 329 "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n", 330 esc( 331 ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), 332 $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)), 333 ), $self->_modnote(), 334 ; 335} 336 337sub _modnote { 338 my $class = ref($_[0]) || $_[0]; 339 return join "\n " => grep m/\S/, split "\n", 340 341qq{ 342If you want to change this HTML document, you probably shouldn't do that 343by changing it directly. Instead, see about changing the calling options 344to $class, and/or subclassing $class, 345then reconverting this document from the Pod source. 346When in doubt, email the author of $class for advice. 347See 'perldoc $class' for more info. 348}; 349 350} 351 352sub do_end { 353 my $self = $_[0]; 354 print {$self->{'output_fh'}} $self->html_footer || ''; 355 return 1; 356} 357 358# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 359# Normally this would just be a call to _do_middle_main_loop -- but we 360# have to do some elaborate things to emit all the content and then 361# summarize it and output it /before/ the content that it's a summary of. 362 363sub do_middle { 364 my $self = $_[0]; 365 return $self->_do_middle_main_loop unless $self->index; 366 367 if( $self->output_string ) { 368 # An efficiency hack 369 my $out = $self->output_string; #it's a reference to it 370 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; 371 $$out .= $sneakytag; 372 $self->_do_middle_main_loop; 373 $sneakytag = quotemeta($sneakytag); 374 my $index = $self->index_as_html(); 375 if( $$out =~ s/$sneakytag/$index/s ) { 376 # Expected case 377 DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n"; 378 } else { 379 DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n"; 380 # I don't think this should ever happen. 381 } 382 return 1; 383 } 384 385 unless( $self->output_fh ) { 386 require Carp; 387 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); 388 } 389 390 # If we get here, we're outputting to a FH. So we need to do some magic. 391 # Namely, divert all content to a string, which we output after the index. 392 my $fh = $self->output_fh; 393 my $content = ''; 394 { 395 # Our horrible bait and switch: 396 $self->output_string( \$content ); 397 $self->_do_middle_main_loop; 398 $self->abandon_output_string(); 399 $self->output_fh($fh); 400 } 401 print $fh $self->index_as_html(); 402 print $fh $content; 403 404 return 1; 405} 406 407########################################################################### 408 409sub index_as_html { 410 my $self = $_[0]; 411 # This is meant to be called AFTER the input document has been parsed! 412 413 my $points = $self->{'PSHTML_index_points'} || []; 414 415 @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n]; 416 # There's no point in having a 0-item or 1-item index, I dare say. 417 418 my(@out) = qq{\n<div class='indexgroup'>}; 419 my $level = 0; 420 421 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); 422 foreach my $p (@$points, ['head0', '(end)']) { 423 ($tagname, $text) = @$p; 424 $anchorname = $self->section_escape($text); 425 if( $tagname =~ m{^head(\d+)$} ) { 426 $target_level = 0 + $1; 427 } else { # must be some kinda list item 428 if($previous_tagname =~ m{^head\d+$} ) { 429 $target_level = $level + 1; 430 } else { 431 $target_level = $level; # no change needed 432 } 433 } 434 435 # Get to target_level by opening or closing ULs 436 while($level > $target_level) 437 { --$level; push @out, (" " x $level) . "</ul>"; } 438 while($level < $target_level) 439 { ++$level; push @out, (" " x ($level-1)) 440 . "<ul class='indexList indexList$level'>"; } 441 442 $previous_tagname = $tagname; 443 next unless $level; 444 445 $indent = ' ' x $level; 446 push @out, sprintf 447 "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>", 448 $indent, $level, esc($anchorname), esc($text) 449 ; 450 } 451 push @out, "</div>\n"; 452 return join "\n", @out; 453} 454 455########################################################################### 456 457sub _do_middle_main_loop { 458 my $self = $_[0]; 459 my $fh = $self->{'output_fh'}; 460 my $tagmap = $self->{'Tagmap'}; 461 462 $self->__adjust_html_h_levels; 463 464 my($token, $type, $tagname, $linkto, $linktype); 465 my @stack; 466 my $dont_wrap = 0; 467 468 while($token = $self->get_token) { 469 470 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 471 if( ($type = $token->type) eq 'start' ) { 472 if(($tagname = $token->tagname) eq 'L') { 473 $linktype = $token->attr('type') || 'insane'; 474 475 $linkto = $self->do_link($token); 476 477 if(defined $linkto and length $linkto) { 478 esc($linkto); 479 # (Yes, SGML-escaping applies on top of %-escaping! 480 # But it's rarely noticeable in practice.) 481 print $fh qq{<a href="$linkto" class="podlink$linktype"\n>}; 482 } else { 483 print $fh "<a>"; # Yes, an 'a' element with no attributes! 484 } 485 486 } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { 487 print $fh $tagmap->{$tagname} || next; 488 489 my @to_unget; 490 while(1) { 491 push @to_unget, $self->get_token; 492 last if $to_unget[-1]->is_end 493 and $to_unget[-1]->tagname eq $tagname; 494 495 # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) 496 } 497 498 my $name = $self->linearize_tokens(@to_unget); 499 $name = $self->do_section($name, $token) if defined $name; 500 501 print $fh "<a "; 502 if ($tagname =~ m/^head\d$/s) { 503 print $fh "class='u'", $self->index 504 ? " href='#___top' title='click to go to top of document'\n" 505 : "\n"; 506 } 507 508 if(defined $name) { 509 my $esc = esc( $self->section_name_tidy( $name ) ); 510 print $fh qq[name="$esc"]; 511 DEBUG and print STDERR "Linearized ", scalar(@to_unget), 512 " tokens as \"$name\".\n"; 513 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] 514 if $ToIndex{ $tagname }; 515 # Obviously, this discards all formatting codes (saving 516 # just their content), but ahwell. 517 518 } else { # ludicrously long, so nevermind 519 DEBUG and print STDERR "Linearized ", scalar(@to_unget), 520 " tokens, but it was too long, so nevermind.\n"; 521 } 522 print $fh "\n>"; 523 $self->unget_token(@to_unget); 524 525 } elsif ($tagname eq 'Data') { 526 my $next = $self->get_token; 527 next unless defined $next; 528 unless( $next->type eq 'text' ) { 529 $self->unget_token($next); 530 next; 531 } 532 DEBUG and print STDERR " raw text ", $next->text, "\n"; 533 # The parser sometimes preserves newlines and sometimes doesn't! 534 (my $text = $next->text) =~ s/\n\z//; 535 print $fh $text, "\n"; 536 next; 537 538 } else { 539 if( $tagname =~ m/^over-/s ) { 540 push @stack, ''; 541 } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { 542 print $fh $stack[-1]; 543 $stack[-1] = ''; 544 } 545 print $fh $tagmap->{$tagname} || next; 546 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" 547 or $tagname eq 'X'; 548 } 549 550 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 551 } elsif( $type eq 'end' ) { 552 if( ($tagname = $token->tagname) =~ m/^over-/s ) { 553 if( my $end = pop @stack ) { 554 print $fh $end; 555 } 556 } elsif( $tagname =~ m/^item-/s and @stack) { 557 $stack[-1] = $tagmap->{"/$tagname"}; 558 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { 559 $self->unget_token($next); 560 if( $next->type eq 'start' ) { 561 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; 562 $stack[-1] = $tagmap->{"/item-body"}; 563 } 564 } 565 next; 566 } 567 print $fh $tagmap->{"/$tagname"} || next; 568 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; 569 570 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 571 } elsif( $type eq 'text' ) { 572 esc($type = $token->text); # reuse $type, why not 573 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; 574 print $fh $type; 575 } 576 577 } 578 return 1; 579} 580 581########################################################################### 582# 583 584sub do_section { 585 my($self, $name, $token) = @_; 586 return $name; 587} 588 589sub do_link { 590 my($self, $token) = @_; 591 my $type = $token->attr('type'); 592 if(!defined $type) { 593 $self->whine("Typeless L!?", $token->attr('start_line')); 594 } elsif( $type eq 'pod') { return $self->do_pod_link($token); 595 } elsif( $type eq 'url') { return $self->do_url_link($token); 596 } elsif( $type eq 'man') { return $self->do_man_link($token); 597 } else { 598 $self->whine("L of unknown type $type!?", $token->attr('start_line')); 599 } 600 return 'FNORG'; # should never get called 601} 602 603# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 604 605sub do_url_link { return $_[1]->attr('to') } 606 607sub do_man_link { 608 my ($self, $link) = @_; 609 my $to = $link->attr('to'); 610 my $frag = $link->attr('section'); 611 612 return undef unless defined $to and length $to; # should never happen 613 614 $frag = $self->section_escape($frag) 615 if defined $frag and length($frag .= ''); # (stringify) 616 617 DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n"; 618 619 return $self->resolve_man_page_link($to, $frag); 620} 621 622 623sub do_pod_link { 624 # And now things get really messy... 625 my($self, $link) = @_; 626 my $to = $link->attr('to'); 627 my $section = $link->attr('section'); 628 return undef unless( # should never happen 629 (defined $to and length $to) or 630 (defined $section and length $section) 631 ); 632 633 $section = $self->section_escape($section) 634 if defined $section and length($section .= ''); # (stringify) 635 636 DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n", 637 $to || "(nil)", $section || "(nil)"; 638 639 { 640 # An early hack: 641 my $complete_url = $self->resolve_pod_link_by_table($to, $section); 642 if( $complete_url ) { 643 DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ", 644 $complete_url, "\n (Returning that.)\n"; 645 return $complete_url; 646 } else { 647 DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)", 648 " didn't return anything interesting.\n"; 649 } 650 } 651 652 if(defined $to and length $to) { 653 # Give this routine first hack again 654 my $there = $self->resolve_pod_link_by_table($to); 655 if(defined $there and length $there) { 656 DEBUG > 1 657 and print STDERR "resolve_pod_link_by_table(T) gives $there\n"; 658 } else { 659 $there = 660 $self->resolve_pod_page_link($to, $section); 661 # (I pass it the section value, but I don't see a 662 # particular reason it'd use it.) 663 DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n"; 664 unless( defined $there and length $there ) { 665 DEBUG and print STDERR "Can't resolve $to\n"; 666 return undef; 667 } 668 # resolve_pod_page_link returning undef is how it 669 # can signal that it gives up on making a link 670 } 671 $to = $there; 672 } 673 674 #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n"; 675 676 my $out = (defined $to and length $to) ? $to : ''; 677 $out .= "#" . $section if defined $section and length $section; 678 679 unless(length $out) { # sanity check 680 DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n", 681 $to || "(nil)", $section || "(nil)"; 682 return undef; 683 } 684 685 DEBUG and print STDERR "Resolved to $out\n"; 686 return $out; 687} 688 689 690# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 691 692sub section_escape { 693 my($self, $section) = @_; 694 return $self->section_url_escape( 695 $self->section_name_tidy($section) 696 ); 697} 698 699sub section_name_tidy { 700 my($self, $section) = @_; 701 $section =~ s/^\s+//; 702 $section =~ s/\s+$//; 703 $section =~ tr/ /_/; 704 if ($] ge 5.006) { 705 $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters 706 } elsif ('A' eq chr(65)) { # But not on early EBCDIC 707 $section =~ tr/\x00-\x1F\x80-\x9F//d; 708 } 709 $section = $self->unicode_escape_url($section); 710 $section = '_' unless length $section; 711 return $section; 712} 713 714sub section_url_escape { shift->general_url_escape(@_) } 715sub pagepath_url_escape { shift->general_url_escape(@_) } 716sub manpage_url_escape { shift->general_url_escape(@_) } 717 718sub general_url_escape { 719 my($self, $string) = @_; 720 721 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; 722 # express Unicode things as urlencode(utf(orig)). 723 724 # A pretty conservative escaping, behoovey even for query components 725 # of a URL (see RFC 2396) 726 727 if ($] ge 5.007_003) { 728 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg; 729 } else { # Is broken for non-ASCII platforms on early perls 730 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; 731 } 732 # Yes, stipulate the list without a range, so that this can work right on 733 # all charsets that this module happens to run under. 734 735 return $string; 736} 737 738#-------------------------------------------------------------------------- 739# 740# Oh look, a yawning portal to Hell! Let's play touch football right by it! 741# 742 743sub resolve_pod_page_link { 744 # resolve_pod_page_link must return a properly escaped URL 745 my $self = shift; 746 return $self->batch_mode() 747 ? $self->resolve_pod_page_link_batch_mode(@_) 748 : $self->resolve_pod_page_link_singleton_mode(@_) 749 ; 750} 751 752sub resolve_pod_page_link_singleton_mode { 753 my($self, $it) = @_; 754 return undef unless defined $it and length $it; 755 my $url = $self->pagepath_url_escape($it); 756 757 $url =~ s{::$}{}s; # probably never comes up anyway 758 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? 759 760 return undef unless length $url; 761 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; 762} 763 764sub resolve_pod_page_link_batch_mode { 765 my($self, $to) = @_; 766 DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n"; 767 my @path = grep length($_), split m/::/s, $to, -1; 768 unless( @path ) { # sanity 769 DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n"; 770 return undef; 771 } 772 $self->batch_mode_rectify_path(\@path); 773 my $out = join('/', map $self->pagepath_url_escape($_), @path) 774 . $HTML_EXTENSION; 775 DEBUG > 1 and print STDERR " => $out\n"; 776 return $out; 777} 778 779sub batch_mode_rectify_path { 780 my($self, $pathbits) = @_; 781 my $level = $self->batch_mode_current_level; 782 $level--; # how many levels up to go to get to the root 783 if($level < 1) { 784 unshift @$pathbits, '.'; # just to be pretty 785 } else { 786 unshift @$pathbits, ('..') x $level; 787 } 788 return; 789} 790 791sub resolve_man_page_link { 792 my ($self, $to, $frag) = @_; 793 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/; 794 795 return undef unless defined $page and length $page; 796 $section ||= 1; 797 798 return $self->man_url_prefix . "$section/" 799 . $self->manpage_url_escape($page) 800 . $self->man_url_postfix; 801} 802 803#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 804 805sub resolve_pod_link_by_table { 806 # A crazy hack to allow specifying custom L<foo> => URL mappings 807 808 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut 809 810 my($self, $to, $section) = @_; 811 812 # TODO: add a method that actually populates podhtml_LOT from a file? 813 814 if(defined $section) { 815 $to = '' unless defined $to and length $to; 816 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! 817 } else { 818 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! 819 } 820 return; 821} 822 823########################################################################### 824 825sub linearize_tokens { # self, tokens 826 my $self = shift; 827 my $out = ''; 828 829 my $t; 830 while($t = shift @_) { 831 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { 832 $out .= $t; # a string, or some insane thing 833 } elsif($t->is_text) { 834 $out .= $t->text; 835 } elsif($t->is_start and $t->tag eq 'X') { 836 # Ignore until the end of this X<...> sequence: 837 my $x_open = 1; 838 while($x_open) { 839 next if( ($t = shift @_)->is_text ); 840 if( $t->is_start and $t->tag eq 'X') { ++$x_open } 841 elsif($t->is_end and $t->tag eq 'X') { --$x_open } 842 } 843 } 844 } 845 return undef if length $out > $Linearization_Limit; 846 return $out; 847} 848 849#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 850 851sub unicode_escape_url { 852 my($self, $string) = @_; 853 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; 854 # Turn char 1234 into "(1234)" 855 return $string; 856} 857 858#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 859sub esc { # a function. 860 if(defined wantarray) { 861 if(wantarray) { 862 @_ = splice @_; # break aliasing 863 } else { 864 my $x = shift; 865 if ($] ge 5.007_003) { 866 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg; 867 } else { # Is broken for non-ASCII platforms on early perls 868 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; 869 } 870 return $x; 871 } 872 } 873 foreach my $x (@_) { 874 # Escape things very cautiously: 875 if (defined $x) { 876 if ($] ge 5.007_003) { 877 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg 878 } else { # Is broken for non-ASCII platforms on early perls 879 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg 880 } 881 } 882 # Leave out "- so that "--" won't make it thru in X-generated comments 883 # with text in them. 884 885 # Yes, stipulate the list without a range, so that this can work right on 886 # all charsets that this module happens to run under. 887 } 888 return @_; 889} 890 891#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 892 8931; 894__END__ 895 896=head1 NAME 897 898Pod::Simple::HTML - convert Pod to HTML 899 900=head1 SYNOPSIS 901 902 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod 903 904 905=head1 DESCRIPTION 906 907This class is for making an HTML rendering of a Pod document. 908 909This is a subclass of L<Pod::Simple::PullParser> and inherits all its 910methods (and options). 911 912Note that if you want to do a batch conversion of a lot of Pod 913documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>. 914 915 916 917=head1 CALLING FROM THE COMMAND LINE 918 919TODO 920 921 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html 922 923 924 925=head1 CALLING FROM PERL 926 927=head2 Minimal code 928 929 use Pod::Simple::HTML; 930 my $p = Pod::Simple::HTML->new; 931 $p->output_string(\my $html); 932 $p->parse_file('path/to/Module/Name.pm'); 933 open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n"; 934 print $out $html; 935 936=head2 More detailed example 937 938 use Pod::Simple::HTML; 939 940Set the content type: 941 942 $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; 943 944 my $p = Pod::Simple::HTML->new; 945 946Include a single javascript source: 947 948 $p->html_javascript('http://abc.com/a.js'); 949 950Or insert multiple javascript source in the header 951(or for that matter include anything, thought this is not recommended) 952 953 $p->html_javascript(' 954 <script type="text/javascript" src="http://abc.com/b.js"></script> 955 <script type="text/javascript" src="http://abc.com/c.js"></script>'); 956 957Include a single css source in the header: 958 959 $p->html_css('/style.css'); 960 961or insert multiple css sources: 962 963 $p->html_css(' 964 <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css"> 965 <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">'); 966 967Tell the parser where should the output go. In this case it will be placed in the $html variable: 968 969 my $html; 970 $p->output_string(\$html); 971 972Parse and process a file with pod in it: 973 974 $p->parse_file('path/to/Module/Name.pm'); 975 976=head1 METHODS 977 978TODO 979all (most?) accessorized methods 980 981The following variables need to be set B<before> the call to the ->new constructor. 982 983Set the string that is included before the opening <html> tag: 984 985 $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 986 "http://www.w3.org/TR/html4/loose.dtd">\n}; 987 988Set the content-type in the HTML head: (defaults to ISO-8859-1) 989 990 $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; 991 992Set the value that will be embedded in the opening tags of F, C tags and verbatim text. 993F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "") 994 995 $Pod::Simple::HTML::Computerese = ' class="some_class_name'; 996 997=head2 html_css 998 999=head2 html_javascript 1000 1001=head2 title_prefix 1002 1003=head2 title_postfix 1004 1005=head2 html_header_before_title 1006 1007This includes everything before the <title> opening tag including the Document type 1008and including the opening <title> tag. The following call will set it to be a simple HTML 1009file: 1010 1011 $p->html_header_before_title('<html><head><title>'); 1012 1013=head2 top_anchor 1014 1015By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML. 1016You can change it by calling 1017 1018 $p->top_anchor('<a name="zz" >'); 1019 1020=head2 html_h_level 1021 1022Normally =head1 will become <h1>, =head2 will become <h2> etc. 1023Using the html_h_level method will change these levels setting the h level 1024of =head1 tags: 1025 1026 $p->html_h_level(3); 1027 1028Will make sure that =head1 will become <h3> and =head2 will become <h4> etc... 1029 1030 1031=head2 index 1032 1033Set it to some true value if you want to have an index (in reality a table of contents) 1034to be added at the top of the generated HTML. 1035 1036 $p->index(1); 1037 1038=head2 html_header_after_title 1039 1040Includes the closing tag of </title> and through the rest of the head 1041till the opening of the body 1042 1043 $p->html_header_after_title('</title>...</head><body id="my_id">'); 1044 1045=head2 html_footer 1046 1047The very end of the document: 1048 1049 $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 1050 1051=head1 SUBCLASSING 1052 1053Can use any of the methods described above but for further customization 1054one needs to override some of the methods: 1055 1056 package My::Pod; 1057 use strict; 1058 use warnings; 1059 1060 use base 'Pod::Simple::HTML'; 1061 1062 # needs to return a URL string such 1063 # http://some.other.com/page.html 1064 # #anchor_in_the_same_file 1065 # /internal/ref.html 1066 sub do_pod_link { 1067 # My::Pod object and Pod::Simple::PullParserStartToken object 1068 my ($self, $link) = @_; 1069 1070 say $link->tagname; # will be L for links 1071 say $link->attr('to'); # 1072 say $link->attr('type'); # will be 'pod' always 1073 say $link->attr('section'); 1074 1075 # Links local to our web site 1076 if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') { 1077 my $to = $link->attr('to'); 1078 if ($to =~ /^Padre::/) { 1079 $to =~ s{::}{/}g; 1080 return "/docs/Padre/$to.html"; 1081 } 1082 } 1083 1084 # all other links are generated by the parent class 1085 my $ret = $self->SUPER::do_pod_link($link); 1086 return $ret; 1087 } 1088 1089 1; 1090 1091Meanwhile in script.pl: 1092 1093 use My::Pod; 1094 1095 my $p = My::Pod->new; 1096 1097 my $html; 1098 $p->output_string(\$html); 1099 $p->parse_file('path/to/Module/Name.pm'); 1100 open my $out, '>', 'out.html' or die; 1101 print $out $html; 1102 1103TODO 1104 1105maybe override do_beginning do_end 1106 1107=head1 SEE ALSO 1108 1109L<Pod::Simple>, L<Pod::Simple::HTMLBatch> 1110 1111TODO: a corpus of sample Pod input and HTML output? Or common 1112idioms? 1113 1114=head1 SUPPORT 1115 1116Questions or discussion about POD and Pod::Simple should be sent to the 1117pod-people@perl.org mail list. Send an empty email to 1118pod-people-subscribe@perl.org to subscribe. 1119 1120This module is managed in an open GitHub repository, 1121L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or 1122to clone L<https://github.com/perl-pod/pod-simple.git> and send patches! 1123 1124Patches against Pod::Simple are welcome. Please send bug reports to 1125<bug-pod-simple@rt.cpan.org>. 1126 1127=head1 COPYRIGHT AND DISCLAIMERS 1128 1129Copyright (c) 2002-2004 Sean M. Burke. 1130 1131This library is free software; you can redistribute it and/or modify it 1132under the same terms as Perl itself. 1133 1134This program is distributed in the hope that it will be useful, but 1135without any warranty; without even the implied warranty of 1136merchantability or fitness for a particular purpose. 1137 1138=head1 ACKNOWLEDGEMENTS 1139 1140Thanks to L<Hurricane Electric|http://he.net/> for permission to use its 1141L<Linux man pages online|http://man.he.net/> site for man page links. 1142 1143Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the 1144site for Perl module links. 1145 1146=head1 AUTHOR 1147 1148Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. 1149But don't bother him, he's retired. 1150 1151Pod::Simple is maintained by: 1152 1153=over 1154 1155=item * Allison Randal C<allison@perl.org> 1156 1157=item * Hans Dieter Pearcey C<hdp@cpan.org> 1158 1159=item * David E. Wheeler C<dwheeler@cpan.org> 1160 1161=back 1162 1163=cut 1164