1 2require 5; 3package Pod::Simple::HTML; 4use strict; 5use Pod::Simple::PullParser (); 6use vars qw( 7 @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION 8 $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix 9 $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex 10 $Doctype_decl $Content_decl 11); 12@ISA = ('Pod::Simple::PullParser'); 13$VERSION = '3.28'; 14 15BEGIN { 16 if(defined &DEBUG) { } # no-op 17 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } 18 else { *DEBUG = sub () {0}; } 19} 20 21$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. 22 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 23 # "http://www.w3.org/TR/html4/loose.dtd">\n}; 24 25$Content_decl ||= 26 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >}; 27 28$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; 29$Computerese = "" unless defined $Computerese; 30$LamePad = '' unless defined $LamePad; 31 32$Linearization_Limit = 120 unless defined $Linearization_Limit; 33 # headings/items longer than that won't get an <a name="..."> 34$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?' 35 unless defined $Perldoc_URL_Prefix; 36$Perldoc_URL_Postfix = '' 37 unless defined $Perldoc_URL_Postfix; 38 39 40$Man_URL_Prefix = 'http://man.he.net/man'; 41$Man_URL_Postfix = ''; 42 43$Title_Prefix = '' unless defined $Title_Prefix; 44$Title_Postfix = '' unless defined $Title_Postfix; 45%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text 46 # 'item-text' stuff in the index doesn't quite work, and may 47 # not be a good idea anyhow. 48 49 50__PACKAGE__->_accessorize( 51 'perldoc_url_prefix', 52 # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what 53 # to put before the "Foo%3a%3aBar". 54 # (for singleton mode only?) 55 'perldoc_url_postfix', 56 # what to put after "Foo%3a%3aBar" in the URL. Normally "". 57 58 'man_url_prefix', 59 # In turning L<crontab(5)> into http://whatever/man/1/crontab, what 60 # to put before the "1/crontab". 61 'man_url_postfix', 62 # what to put after the "1/crontab" in the URL. Normally "". 63 64 'batch_mode', # whether we're in batch mode 65 'batch_mode_current_level', 66 # When in batch mode, how deep the current module is: 1 for "LWP", 67 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc 68 69 'title_prefix', 'title_postfix', 70 # What to put before and after the title in the head. 71 # Should already be &-escaped 72 73 'html_h_level', 74 75 'html_header_before_title', 76 'html_header_after_title', 77 'html_footer', 78 79 'index', # whether to add an index at the top of each page 80 # (actually it's a table-of-contents, but we'll call it an index, 81 # out of apparently longstanding habit) 82 83 'html_css', # URL of CSS file to point to 84 'html_javascript', # URL of Javascript file to point to 85 86 'force_title', # should already be &-escaped 87 'default_title', # should already be &-escaped 88); 89 90#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 91my @_to_accept; 92 93%Tagmap = ( 94 'Verbatim' => "\n<pre$Computerese>", 95 '/Verbatim' => "</pre>\n", 96 'VerbatimFormatted' => "\n<pre$Computerese>", 97 '/VerbatimFormatted' => "</pre>\n", 98 'VerbatimB' => "<b>", 99 '/VerbatimB' => "</b>", 100 'VerbatimI' => "<i>", 101 '/VerbatimI' => "</i>", 102 'VerbatimBI' => "<b><i>", 103 '/VerbatimBI' => "</i></b>", 104 105 106 'Data' => "\n", 107 '/Data' => "\n", 108 109 'head1' => "\n<h1>", # And also stick in an <a name="..."> 110 'head2' => "\n<h2>", # '' 111 'head3' => "\n<h3>", # '' 112 'head4' => "\n<h4>", # '' 113 '/head1' => "</a></h1>\n", 114 '/head2' => "</a></h2>\n", 115 '/head3' => "</a></h3>\n", 116 '/head4' => "</a></h4>\n", 117 118 'X' => "<!--\n\tINDEX: ", 119 '/X' => "\n-->", 120 121 changes(qw( 122 Para=p 123 B=b I=i 124 over-bullet=ul 125 over-number=ol 126 over-text=dl 127 over-block=blockquote 128 item-bullet=li 129 item-number=li 130 item-text=dt 131 )), 132 changes2( 133 map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } 134 qw[ 135 sample=samp 136 definition=dfn 137 keyboard=kbd 138 variable=var 139 citation=cite 140 abbreviation=abbr 141 acronym=acronym 142 subscript=sub 143 superscript=sup 144 big=big 145 small=small 146 underline=u 147 strikethrough=s 148 preformat=pre 149 teletype=tt 150 ] # no point in providing a way to get <q>...</q>, I think 151 ), 152 153 '/item-bullet' => "</li>$LamePad\n", 154 '/item-number' => "</li>$LamePad\n", 155 '/item-text' => "</a></dt>$LamePad\n", 156 'item-body' => "\n<dd>", 157 '/item-body' => "</dd>\n", 158 159 160 'B' => "<b>", '/B' => "</b>", 161 'I' => "<i>", '/I' => "</i>", 162 'F' => "<em$Computerese>", '/F' => "</em>", 163 'C' => "<code$Computerese>", '/C' => "</code>", 164 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used! 165 '/L' => "</a>", 166); 167 168sub changes { 169 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 170 ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_" 171 } @_; 172} 173sub changes2 { 174 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 175 ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_" 176 } @_; 177} 178 179#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 180sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } 181 # Just so we can run from the command line. No options. 182 # For that, use perldoc! 183#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 185sub new { 186 my $new = shift->SUPER::new(@_); 187 #$new->nix_X_codes(1); 188 $new->nbsp_for_S(1); 189 $new->accept_targets( 'html', 'HTML' ); 190 $new->accept_codes('VerbatimFormatted'); 191 $new->accept_codes(@_to_accept); 192 DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n"; 193 194 $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); 195 $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); 196 $new->man_url_prefix( $Man_URL_Prefix ); 197 $new->man_url_postfix( $Man_URL_Postfix ); 198 $new->title_prefix( $Title_Prefix ); 199 $new->title_postfix( $Title_Postfix ); 200 201 $new->html_header_before_title( 202 qq[$Doctype_decl<html><head><title>] 203 ); 204 $new->html_header_after_title( join "\n" => 205 "</title>", 206 $Content_decl, 207 "</head>\n<body class='pod'>", 208 $new->version_tag_comment, 209 "<!-- start doc -->\n", 210 ); 211 $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 212 213 $new->{'Tagmap'} = {%Tagmap}; 214 215 return $new; 216} 217 218sub __adjust_html_h_levels { 219 my ($self) = @_; 220 my $Tagmap = $self->{'Tagmap'}; 221 222 my $add = $self->html_h_level; 223 return unless defined $add; 224 return if ($self->{'Adjusted_html_h_levels'}||0) == $add; 225 226 $add -= 1; 227 for (1 .. 4) { 228 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e; 229 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e; 230 } 231} 232 233sub batch_mode_page_object_init { 234 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; 235 DEBUG and print "Initting $self\n for $module\n", 236 " in $infile\n out $outfile\n depth $depth\n"; 237 $self->batch_mode(1); 238 $self->batch_mode_current_level($depth); 239 return $self; 240} 241 242sub run { 243 my $self = $_[0]; 244 return $self->do_middle if $self->bare_output; 245 return 246 $self->do_beginning && $self->do_middle && $self->do_end; 247} 248 249#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 250 251sub do_beginning { 252 my $self = $_[0]; 253 254 my $title; 255 256 if(defined $self->force_title) { 257 $title = $self->force_title; 258 DEBUG and print "Forcing title to be $title\n"; 259 } else { 260 # Actually try looking for the title in the document: 261 $title = $self->get_short_title(); 262 unless($self->content_seen) { 263 DEBUG and print "No content seen in search for title.\n"; 264 return; 265 } 266 $self->{'Title'} = $title; 267 268 if(defined $title and $title =~ m/\S/) { 269 $title = $self->title_prefix . esc($title) . $self->title_postfix; 270 } else { 271 $title = $self->default_title; 272 $title = '' unless defined $title; 273 DEBUG and print "Title defaults to $title\n"; 274 } 275 } 276 277 278 my $after = $self->html_header_after_title || ''; 279 if($self->html_css) { 280 my $link = 281 $self->html_css =~ m/</ 282 ? $self->html_css # It's a big blob of markup, let's drop it in 283 : sprintf( # It's just a URL, so let's wrap it up 284 qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n], 285 $self->html_css, 286 ); 287 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 288 } 289 $self->_add_top_anchor(\$after); 290 291 if($self->html_javascript) { 292 my $link = 293 $self->html_javascript =~ m/</ 294 ? $self->html_javascript # It's a big blob of markup, let's drop it in 295 : sprintf( # It's just a URL, so let's wrap it up 296 qq[<script type="text/javascript" src="%s"></script>\n], 297 $self->html_javascript, 298 ); 299 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 300 } 301 302 print {$self->{'output_fh'}} 303 $self->html_header_before_title || '', 304 $title, # already escaped 305 $after, 306 ; 307 308 DEBUG and print "Returning from do_beginning...\n"; 309 return 1; 310} 311 312sub _add_top_anchor { 313 my($self, $text_r) = @_; 314 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack 315 $$text_r .= "<a name='___top' class='dummyTopAnchor' ></a>\n"; 316 } 317 return; 318} 319 320sub version_tag_comment { 321 my $self = shift; 322 return sprintf 323 "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n", 324 esc( 325 ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), 326 $], scalar(gmtime), 327 ), $self->_modnote(), 328 ; 329} 330 331sub _modnote { 332 my $class = ref($_[0]) || $_[0]; 333 return join "\n " => grep m/\S/, split "\n", 334 335qq{ 336If you want to change this HTML document, you probably shouldn't do that 337by changing it directly. Instead, see about changing the calling options 338to $class, and/or subclassing $class, 339then reconverting this document from the Pod source. 340When in doubt, email the author of $class for advice. 341See 'perldoc $class' for more info. 342}; 343 344} 345 346sub do_end { 347 my $self = $_[0]; 348 print {$self->{'output_fh'}} $self->html_footer || ''; 349 return 1; 350} 351 352# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 353# Normally this would just be a call to _do_middle_main_loop -- but we 354# have to do some elaborate things to emit all the content and then 355# summarize it and output it /before/ the content that it's a summary of. 356 357sub do_middle { 358 my $self = $_[0]; 359 return $self->_do_middle_main_loop unless $self->index; 360 361 if( $self->output_string ) { 362 # An efficiency hack 363 my $out = $self->output_string; #it's a reference to it 364 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; 365 $$out .= $sneakytag; 366 $self->_do_middle_main_loop; 367 $sneakytag = quotemeta($sneakytag); 368 my $index = $self->index_as_html(); 369 if( $$out =~ s/$sneakytag/$index/s ) { 370 # Expected case 371 DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n"; 372 } else { 373 DEBUG and print "Odd, couldn't find where to insert the index in the output!\n"; 374 # I don't think this should ever happen. 375 } 376 return 1; 377 } 378 379 unless( $self->output_fh ) { 380 require Carp; 381 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); 382 } 383 384 # If we get here, we're outputting to a FH. So we need to do some magic. 385 # Namely, divert all content to a string, which we output after the index. 386 my $fh = $self->output_fh; 387 my $content = ''; 388 { 389 # Our horrible bait and switch: 390 $self->output_string( \$content ); 391 $self->_do_middle_main_loop; 392 $self->abandon_output_string(); 393 $self->output_fh($fh); 394 } 395 print $fh $self->index_as_html(); 396 print $fh $content; 397 398 return 1; 399} 400 401########################################################################### 402 403sub index_as_html { 404 my $self = $_[0]; 405 # This is meant to be called AFTER the input document has been parsed! 406 407 my $points = $self->{'PSHTML_index_points'} || []; 408 409 @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n]; 410 # There's no point in having a 0-item or 1-item index, I dare say. 411 412 my(@out) = qq{\n<div class='indexgroup'>}; 413 my $level = 0; 414 415 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); 416 foreach my $p (@$points, ['head0', '(end)']) { 417 ($tagname, $text) = @$p; 418 $anchorname = $self->section_escape($text); 419 if( $tagname =~ m{^head(\d+)$} ) { 420 $target_level = 0 + $1; 421 } else { # must be some kinda list item 422 if($previous_tagname =~ m{^head\d+$} ) { 423 $target_level = $level + 1; 424 } else { 425 $target_level = $level; # no change needed 426 } 427 } 428 429 # Get to target_level by opening or closing ULs 430 while($level > $target_level) 431 { --$level; push @out, (" " x $level) . "</ul>"; } 432 while($level < $target_level) 433 { ++$level; push @out, (" " x ($level-1)) 434 . "<ul class='indexList indexList$level'>"; } 435 436 $previous_tagname = $tagname; 437 next unless $level; 438 439 $indent = ' ' x $level; 440 push @out, sprintf 441 "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>", 442 $indent, $level, esc($anchorname), esc($text) 443 ; 444 } 445 push @out, "</div>\n"; 446 return join "\n", @out; 447} 448 449########################################################################### 450 451sub _do_middle_main_loop { 452 my $self = $_[0]; 453 my $fh = $self->{'output_fh'}; 454 my $tagmap = $self->{'Tagmap'}; 455 456 $self->__adjust_html_h_levels; 457 458 my($token, $type, $tagname, $linkto, $linktype); 459 my @stack; 460 my $dont_wrap = 0; 461 462 while($token = $self->get_token) { 463 464 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 465 if( ($type = $token->type) eq 'start' ) { 466 if(($tagname = $token->tagname) eq 'L') { 467 $linktype = $token->attr('type') || 'insane'; 468 469 $linkto = $self->do_link($token); 470 471 if(defined $linkto and length $linkto) { 472 esc($linkto); 473 # (Yes, SGML-escaping applies on top of %-escaping! 474 # But it's rarely noticeable in practice.) 475 print $fh qq{<a href="$linkto" class="podlink$linktype"\n>}; 476 } else { 477 print $fh "<a>"; # Yes, an 'a' element with no attributes! 478 } 479 480 } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { 481 print $fh $tagmap->{$tagname} || next; 482 483 my @to_unget; 484 while(1) { 485 push @to_unget, $self->get_token; 486 last if $to_unget[-1]->is_end 487 and $to_unget[-1]->tagname eq $tagname; 488 489 # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) 490 } 491 492 my $name = $self->linearize_tokens(@to_unget); 493 $name = $self->do_section($name, $token) if defined $name; 494 495 print $fh "<a "; 496 if ($tagname =~ m/^head\d$/s) { 497 print $fh "class='u'", $self->index 498 ? " href='#___top' title='click to go to top of document'\n" 499 : "\n"; 500 } 501 502 if(defined $name) { 503 my $esc = esc( $self->section_name_tidy( $name ) ); 504 print $fh qq[name="$esc"]; 505 DEBUG and print "Linearized ", scalar(@to_unget), 506 " tokens as \"$name\".\n"; 507 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] 508 if $ToIndex{ $tagname }; 509 # Obviously, this discards all formatting codes (saving 510 # just their content), but ahwell. 511 512 } else { # ludicrously long, so nevermind 513 DEBUG and print "Linearized ", scalar(@to_unget), 514 " tokens, but it was too long, so nevermind.\n"; 515 } 516 print $fh "\n>"; 517 $self->unget_token(@to_unget); 518 519 } elsif ($tagname eq 'Data') { 520 my $next = $self->get_token; 521 next unless defined $next; 522 unless( $next->type eq 'text' ) { 523 $self->unget_token($next); 524 next; 525 } 526 DEBUG and print " raw text ", $next->text, "\n"; 527 print $fh "\n" . $next->text . "\n"; 528 next; 529 530 } else { 531 if( $tagname =~ m/^over-/s ) { 532 push @stack, ''; 533 } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { 534 print $fh $stack[-1]; 535 $stack[-1] = ''; 536 } 537 print $fh $tagmap->{$tagname} || next; 538 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" 539 or $tagname eq 'X'; 540 } 541 542 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 543 } elsif( $type eq 'end' ) { 544 if( ($tagname = $token->tagname) =~ m/^over-/s ) { 545 if( my $end = pop @stack ) { 546 print $fh $end; 547 } 548 } elsif( $tagname =~ m/^item-/s and @stack) { 549 $stack[-1] = $tagmap->{"/$tagname"}; 550 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { 551 $self->unget_token($next); 552 if( $next->type eq 'start' ) { 553 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; 554 $stack[-1] = $tagmap->{"/item-body"}; 555 } 556 } 557 next; 558 } 559 print $fh $tagmap->{"/$tagname"} || next; 560 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; 561 562 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 563 } elsif( $type eq 'text' ) { 564 esc($type = $token->text); # reuse $type, why not 565 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; 566 print $fh $type; 567 } 568 569 } 570 return 1; 571} 572 573########################################################################### 574# 575 576sub do_section { 577 my($self, $name, $token) = @_; 578 return $name; 579} 580 581sub do_link { 582 my($self, $token) = @_; 583 my $type = $token->attr('type'); 584 if(!defined $type) { 585 $self->whine("Typeless L!?", $token->attr('start_line')); 586 } elsif( $type eq 'pod') { return $self->do_pod_link($token); 587 } elsif( $type eq 'url') { return $self->do_url_link($token); 588 } elsif( $type eq 'man') { return $self->do_man_link($token); 589 } else { 590 $self->whine("L of unknown type $type!?", $token->attr('start_line')); 591 } 592 return 'FNORG'; # should never get called 593} 594 595# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 596 597sub do_url_link { return $_[1]->attr('to') } 598 599sub do_man_link { 600 my ($self, $link) = @_; 601 my $to = $link->attr('to'); 602 my $frag = $link->attr('section'); 603 604 return undef unless defined $to and length $to; # should never happen 605 606 $frag = $self->section_escape($frag) 607 if defined $frag and length($frag .= ''); # (stringify) 608 609 DEBUG and print "Resolving \"$to/$frag\"\n\n"; 610 611 return $self->resolve_man_page_link($to, $frag); 612} 613 614 615sub do_pod_link { 616 # And now things get really messy... 617 my($self, $link) = @_; 618 my $to = $link->attr('to'); 619 my $section = $link->attr('section'); 620 return undef unless( # should never happen 621 (defined $to and length $to) or 622 (defined $section and length $section) 623 ); 624 625 $section = $self->section_escape($section) 626 if defined $section and length($section .= ''); # (stringify) 627 628 DEBUG and printf "Resolving \"%s\" \"%s\"...\n", 629 $to || "(nil)", $section || "(nil)"; 630 631 { 632 # An early hack: 633 my $complete_url = $self->resolve_pod_link_by_table($to, $section); 634 if( $complete_url ) { 635 DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ", 636 $complete_url, "\n (Returning that.)\n"; 637 return $complete_url; 638 } else { 639 DEBUG > 4 and print " resolve_pod_link_by_table(T,S)", 640 " didn't return anything interesting.\n"; 641 } 642 } 643 644 if(defined $to and length $to) { 645 # Give this routine first hack again 646 my $there = $self->resolve_pod_link_by_table($to); 647 if(defined $there and length $there) { 648 DEBUG > 1 649 and print "resolve_pod_link_by_table(T) gives $there\n"; 650 } else { 651 $there = 652 $self->resolve_pod_page_link($to, $section); 653 # (I pass it the section value, but I don't see a 654 # particular reason it'd use it.) 655 DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n"; 656 unless( defined $there and length $there ) { 657 DEBUG and print "Can't resolve $to\n"; 658 return undef; 659 } 660 # resolve_pod_page_link returning undef is how it 661 # can signal that it gives up on making a link 662 } 663 $to = $there; 664 } 665 666 #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n"; 667 668 my $out = (defined $to and length $to) ? $to : ''; 669 $out .= "#" . $section if defined $section and length $section; 670 671 unless(length $out) { # sanity check 672 DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n", 673 $to || "(nil)", $section || "(nil)"; 674 return undef; 675 } 676 677 DEBUG and print "Resolved to $out\n"; 678 return $out; 679} 680 681 682# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 683 684sub section_escape { 685 my($self, $section) = @_; 686 return $self->section_url_escape( 687 $self->section_name_tidy($section) 688 ); 689} 690 691sub section_name_tidy { 692 my($self, $section) = @_; 693 $section =~ s/^\s+//; 694 $section =~ s/\s+$//; 695 $section =~ tr/ /_/; 696 $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters 697 $section = $self->unicode_escape_url($section); 698 $section = '_' unless length $section; 699 return $section; 700} 701 702sub section_url_escape { shift->general_url_escape(@_) } 703sub pagepath_url_escape { shift->general_url_escape(@_) } 704sub manpage_url_escape { shift->general_url_escape(@_) } 705 706sub general_url_escape { 707 my($self, $string) = @_; 708 709 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; 710 # express Unicode things as urlencode(utf(orig)). 711 712 # A pretty conservative escaping, behoovey even for query components 713 # of a URL (see RFC 2396) 714 715 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; 716 # Yes, stipulate the list without a range, so that this can work right on 717 # all charsets that this module happens to run under. 718 # Altho, hmm, what about that ord? Presumably that won't work right 719 # under non-ASCII charsets. Something should be done 720 # about that, I guess? 721 722 return $string; 723} 724 725#-------------------------------------------------------------------------- 726# 727# Oh look, a yawning portal to Hell! Let's play touch football right by it! 728# 729 730sub resolve_pod_page_link { 731 # resolve_pod_page_link must return a properly escaped URL 732 my $self = shift; 733 return $self->batch_mode() 734 ? $self->resolve_pod_page_link_batch_mode(@_) 735 : $self->resolve_pod_page_link_singleton_mode(@_) 736 ; 737} 738 739sub resolve_pod_page_link_singleton_mode { 740 my($self, $it) = @_; 741 return undef unless defined $it and length $it; 742 my $url = $self->pagepath_url_escape($it); 743 744 $url =~ s{::$}{}s; # probably never comes up anyway 745 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? 746 747 return undef unless length $url; 748 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; 749} 750 751sub resolve_pod_page_link_batch_mode { 752 my($self, $to) = @_; 753 DEBUG > 1 and print " During batch mode, resolving $to ...\n"; 754 my @path = grep length($_), split m/::/s, $to, -1; 755 unless( @path ) { # sanity 756 DEBUG and print "Very odd! Splitting $to gives (nil)!\n"; 757 return undef; 758 } 759 $self->batch_mode_rectify_path(\@path); 760 my $out = join('/', map $self->pagepath_url_escape($_), @path) 761 . $HTML_EXTENSION; 762 DEBUG > 1 and print " => $out\n"; 763 return $out; 764} 765 766sub batch_mode_rectify_path { 767 my($self, $pathbits) = @_; 768 my $level = $self->batch_mode_current_level; 769 $level--; # how many levels up to go to get to the root 770 if($level < 1) { 771 unshift @$pathbits, '.'; # just to be pretty 772 } else { 773 unshift @$pathbits, ('..') x $level; 774 } 775 return; 776} 777 778sub resolve_man_page_link { 779 my ($self, $to, $frag) = @_; 780 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/; 781 782 return undef unless defined $page and length $page; 783 $section ||= 1; 784 785 return $self->man_url_prefix . "$section/" 786 . $self->manpage_url_escape($page) 787 . $self->man_url_postfix; 788} 789 790#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 791 792sub resolve_pod_link_by_table { 793 # A crazy hack to allow specifying custom L<foo> => URL mappings 794 795 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut 796 797 my($self, $to, $section) = @_; 798 799 # TODO: add a method that actually populates podhtml_LOT from a file? 800 801 if(defined $section) { 802 $to = '' unless defined $to and length $to; 803 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! 804 } else { 805 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! 806 } 807 return; 808} 809 810########################################################################### 811 812sub linearize_tokens { # self, tokens 813 my $self = shift; 814 my $out = ''; 815 816 my $t; 817 while($t = shift @_) { 818 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { 819 $out .= $t; # a string, or some insane thing 820 } elsif($t->is_text) { 821 $out .= $t->text; 822 } elsif($t->is_start and $t->tag eq 'X') { 823 # Ignore until the end of this X<...> sequence: 824 my $x_open = 1; 825 while($x_open) { 826 next if( ($t = shift @_)->is_text ); 827 if( $t->is_start and $t->tag eq 'X') { ++$x_open } 828 elsif($t->is_end and $t->tag eq 'X') { --$x_open } 829 } 830 } 831 } 832 return undef if length $out > $Linearization_Limit; 833 return $out; 834} 835 836#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 837 838sub unicode_escape_url { 839 my($self, $string) = @_; 840 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; 841 # Turn char 1234 into "(1234)" 842 return $string; 843} 844 845#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 846sub esc { # a function. 847 if(defined wantarray) { 848 if(wantarray) { 849 @_ = splice @_; # break aliasing 850 } else { 851 my $x = shift; 852 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; 853 return $x; 854 } 855 } 856 foreach my $x (@_) { 857 # Escape things very cautiously: 858 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg 859 if defined $x; 860 # Leave out "- so that "--" won't make it thru in X-generated comments 861 # with text in them. 862 863 # Yes, stipulate the list without a range, so that this can work right on 864 # all charsets that this module happens to run under. 865 # Altho, hmm, what about that ord? Presumably that won't work right 866 # under non-ASCII charsets. Something should be done about that. 867 } 868 return @_; 869} 870 871#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 872 8731; 874__END__ 875 876=head1 NAME 877 878Pod::Simple::HTML - convert Pod to HTML 879 880=head1 SYNOPSIS 881 882 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod 883 884 885=head1 DESCRIPTION 886 887This class is for making an HTML rendering of a Pod document. 888 889This is a subclass of L<Pod::Simple::PullParser> and inherits all its 890methods (and options). 891 892Note that if you want to do a batch conversion of a lot of Pod 893documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>. 894 895 896 897=head1 CALLING FROM THE COMMAND LINE 898 899TODO 900 901 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html 902 903 904 905=head1 CALLING FROM PERL 906 907=head2 Minimal code 908 909 use Pod::Simple::HTML; 910 my $p = Pod::Simple::HTML->new; 911 $p->output_string(\my $html); 912 $p->parse_file('path/to/Module/Name.pm'); 913 open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n"; 914 print $out $html; 915 916=head2 More detailed example 917 918 use Pod::Simple::HTML; 919 920Set the content type: 921 922 $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; 923 924 my $p = Pod::Simple::HTML->new; 925 926Include a single javascript source: 927 928 $p->html_javascript('http://abc.com/a.js'); 929 930Or insert multiple javascript source in the header 931(or for that matter include anything, thought this is not recommended) 932 933 $p->html_javascript(' 934 <script type="text/javascript" src="http://abc.com/b.js"></script> 935 <script type="text/javascript" src="http://abc.com/c.js"></script>'); 936 937Include a single css source in the header: 938 939 $p->html_css('/style.css'); 940 941or insert multiple css sources: 942 943 $p->html_css(' 944 <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css"> 945 <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">'); 946 947Tell the parser where should the output go. In this case it will be placed in the $html variable: 948 949 my $html; 950 $p->output_string(\$html); 951 952Parse and process a file with pod in it: 953 954 $p->parse_file('path/to/Module/Name.pm'); 955 956=head1 METHODS 957 958TODO 959all (most?) accessorized methods 960 961The following variables need to be set B<before> the call to the ->new constructor. 962 963Set the string that is included before the opening <html> tag: 964 965 $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 966 "http://www.w3.org/TR/html4/loose.dtd">\n}; 967 968Set the content-type in the HTML head: (defaults to ISO-8859-1) 969 970 $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; 971 972Set the value that will be ebedded in the opening tags of F, C tags and verbatim text. 973F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "") 974 975 $Pod::Simple::HTML::Computerese = ' class="some_class_name'; 976 977=head2 html_css 978 979=head2 html_javascript 980 981=head2 title_prefix 982 983=head2 title_postfix 984 985=head2 html_header_before_title 986 987This includes everything before the <title> opening tag including the Document type 988and including the opening <title> tag. The following call will set it to be a simple HTML 989file: 990 991 $p->html_header_before_title('<html><head><title>'); 992 993=head2 html_h_level 994 995Normally =head1 will become <h1>, =head2 will become <h2> etc. 996Using the html_h_level method will change these levels setting the h level 997of =head1 tags: 998 999 $p->html_h_level(3); 1000 1001Will make sure that =head1 will become <h3> and =head2 will become <h4> etc... 1002 1003 1004=head2 index 1005 1006Set it to some true value if you want to have an index (in reality a table of contents) 1007to be added at the top of the generated HTML. 1008 1009 $p->index(1); 1010 1011=head2 html_header_after_title 1012 1013Includes the closing tag of </title> and through the rest of the head 1014till the opening of the body 1015 1016 $p->html_header_after_title('</title>...</head><body id="my_id">'); 1017 1018=head2 html_footer 1019 1020The very end of the document: 1021 1022 $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 1023 1024=head1 SUBCLASSING 1025 1026Can use any of the methods described above but for further customization 1027one needs to override some of the methods: 1028 1029 package My::Pod; 1030 use strict; 1031 use warnings; 1032 1033 use base 'Pod::Simple::HTML'; 1034 1035 # needs to return a URL string such 1036 # http://some.other.com/page.html 1037 # #anchor_in_the_same_file 1038 # /internal/ref.html 1039 sub do_pod_link { 1040 # My::Pod object and Pod::Simple::PullParserStartToken object 1041 my ($self, $link) = @_; 1042 1043 say $link->tagname; # will be L for links 1044 say $link->attr('to'); # 1045 say $link->attr('type'); # will be 'pod' always 1046 say $link->attr('section'); 1047 1048 # Links local to our web site 1049 if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') { 1050 my $to = $link->attr('to'); 1051 if ($to =~ /^Padre::/) { 1052 $to =~ s{::}{/}g; 1053 return "/docs/Padre/$to.html"; 1054 } 1055 } 1056 1057 # all other links are generated by the parent class 1058 my $ret = $self->SUPER::do_pod_link($link); 1059 return $ret; 1060 } 1061 1062 1; 1063 1064Meanwhile in script.pl: 1065 1066 use My::Pod; 1067 1068 my $p = My::Pod->new; 1069 1070 my $html; 1071 $p->output_string(\$html); 1072 $p->parse_file('path/to/Module/Name.pm'); 1073 open my $out, '>', 'out.html' or die; 1074 print $out $html; 1075 1076TODO 1077 1078maybe override do_beginning do_end 1079 1080=head1 SEE ALSO 1081 1082L<Pod::Simple>, L<Pod::Simple::HTMLBatch> 1083 1084TODO: a corpus of sample Pod input and HTML output? Or common 1085idioms? 1086 1087=head1 SUPPORT 1088 1089Questions or discussion about POD and Pod::Simple should be sent to the 1090pod-people@perl.org mail list. Send an empty email to 1091pod-people-subscribe@perl.org to subscribe. 1092 1093This module is managed in an open GitHub repository, 1094L<https://github.com/theory/pod-simple/>. Feel free to fork and contribute, or 1095to clone L<git://github.com/theory/pod-simple.git> and send patches! 1096 1097Patches against Pod::Simple are welcome. Please send bug reports to 1098<bug-pod-simple@rt.cpan.org>. 1099 1100=head1 COPYRIGHT AND DISCLAIMERS 1101 1102Copyright (c) 2002-2004 Sean M. Burke. 1103 1104This library is free software; you can redistribute it and/or modify it 1105under the same terms as Perl itself. 1106 1107This program is distributed in the hope that it will be useful, but 1108without any warranty; without even the implied warranty of 1109merchantability or fitness for a particular purpose. 1110 1111=head1 ACKNOWLEDGEMENTS 1112 1113Thanks to L<Hurricane Electric|http://he.net/> for permission to use its 1114L<Linux man pages online|http://man.he.net/> site for man page links. 1115 1116Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the 1117site for Perl module links. 1118 1119=head1 AUTHOR 1120 1121Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. 1122But don't bother him, he's retired. 1123 1124Pod::Simple is maintained by: 1125 1126=over 1127 1128=item * Allison Randal C<allison@perl.org> 1129 1130=item * Hans Dieter Pearcey C<hdp@cpan.org> 1131 1132=item * David E. Wheeler C<dwheeler@cpan.org> 1133 1134=back 1135 1136=cut 1137