1 2require 5; 3package Pod::Simple::HTML; 4use strict; 5use Pod::Simple::PullParser (); 6use vars qw( 7 @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION 8 $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix 9 $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex 10 $Doctype_decl $Content_decl 11); 12@ISA = ('Pod::Simple::PullParser'); 13$VERSION = '3.14'; 14 15BEGIN { 16 if(defined &DEBUG) { } # no-op 17 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } 18 else { *DEBUG = sub () {0}; } 19} 20 21$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. 22 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 23 # "http://www.w3.org/TR/html4/loose.dtd">\n}; 24 25$Content_decl ||= 26 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >}; 27 28$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; 29$Computerese = "" unless defined $Computerese; 30$LamePad = '' unless defined $LamePad; 31 32$Linearization_Limit = 120 unless defined $Linearization_Limit; 33 # headings/items longer than that won't get an <a name="..."> 34$Perldoc_URL_Prefix = 'http://search.cpan.org/perldoc?' 35 unless defined $Perldoc_URL_Prefix; 36$Perldoc_URL_Postfix = '' 37 unless defined $Perldoc_URL_Postfix; 38 39 40$Man_URL_Prefix = 'http://man.he.net/man'; 41$Man_URL_Postfix = ''; 42 43$Title_Prefix = '' unless defined $Title_Prefix; 44$Title_Postfix = '' unless defined $Title_Postfix; 45%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text 46 # 'item-text' stuff in the index doesn't quite work, and may 47 # not be a good idea anyhow. 48 49 50__PACKAGE__->_accessorize( 51 'perldoc_url_prefix', 52 # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what 53 # to put before the "Foo%3a%3aBar". 54 # (for singleton mode only?) 55 'perldoc_url_postfix', 56 # what to put after "Foo%3a%3aBar" in the URL. Normally "". 57 58 'man_url_prefix', 59 # In turning L<crontab(5)> into http://whatever/man/1/crontab, what 60 # to put before the "1/crontab". 61 'man_url_postfix', 62 # what to put after the "1/crontab" in the URL. Normally "". 63 64 'batch_mode', # whether we're in batch mode 65 'batch_mode_current_level', 66 # When in batch mode, how deep the current module is: 1 for "LWP", 67 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc 68 69 'title_prefix', 'title_postfix', 70 # What to put before and after the title in the head. 71 # Should already be &-escaped 72 73 'html_h_level', 74 75 'html_header_before_title', 76 'html_header_after_title', 77 'html_footer', 78 79 'index', # whether to add an index at the top of each page 80 # (actually it's a table-of-contents, but we'll call it an index, 81 # out of apparently longstanding habit) 82 83 'html_css', # URL of CSS file to point to 84 'html_javascript', # URL of CSS file to point to 85 86 'force_title', # should already be &-escaped 87 'default_title', # should already be &-escaped 88); 89 90#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 91my @_to_accept; 92 93%Tagmap = ( 94 'Verbatim' => "\n<pre$Computerese>", 95 '/Verbatim' => "</pre>\n", 96 'VerbatimFormatted' => "\n<pre$Computerese>", 97 '/VerbatimFormatted' => "</pre>\n", 98 'VerbatimB' => "<b>", 99 '/VerbatimB' => "</b>", 100 'VerbatimI' => "<i>", 101 '/VerbatimI' => "</i>", 102 'VerbatimBI' => "<b><i>", 103 '/VerbatimBI' => "</i></b>", 104 105 106 'Data' => "\n", 107 '/Data' => "\n", 108 109 'head1' => "\n<h1>", # And also stick in an <a name="..."> 110 'head2' => "\n<h2>", # '' 111 'head3' => "\n<h3>", # '' 112 'head4' => "\n<h4>", # '' 113 '/head1' => "</a></h1>\n", 114 '/head2' => "</a></h2>\n", 115 '/head3' => "</a></h3>\n", 116 '/head4' => "</a></h4>\n", 117 118 'X' => "<!--\n\tINDEX: ", 119 '/X' => "\n-->", 120 121 changes(qw( 122 Para=p 123 B=b I=i 124 over-bullet=ul 125 over-number=ol 126 over-text=dl 127 over-block=blockquote 128 item-bullet=li 129 item-number=li 130 item-text=dt 131 )), 132 changes2( 133 map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } 134 qw[ 135 sample=samp 136 definition=dfn 137 kbd=keyboard 138 variable=var 139 citation=cite 140 abbreviation=abbr 141 acronym=acronym 142 subscript=sub 143 superscript=sup 144 big=big 145 small=small 146 underline=u 147 strikethrough=s 148 ] # no point in providing a way to get <q>...</q>, I think 149 ), 150 151 '/item-bullet' => "</li>$LamePad\n", 152 '/item-number' => "</li>$LamePad\n", 153 '/item-text' => "</a></dt>$LamePad\n", 154 'item-body' => "\n<dd>", 155 '/item-body' => "</dd>\n", 156 157 158 'B' => "<b>", '/B' => "</b>", 159 'I' => "<i>", '/I' => "</i>", 160 'F' => "<em$Computerese>", '/F' => "</em>", 161 'C' => "<code$Computerese>", '/C' => "</code>", 162 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used! 163 '/L' => "</a>", 164); 165 166sub changes { 167 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 168 ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_" 169 } @_; 170} 171sub changes2 { 172 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 173 ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_" 174 } @_; 175} 176 177#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 178sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } 179 # Just so we can run from the command line. No options. 180 # For that, use perldoc! 181#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 182 183sub new { 184 my $new = shift->SUPER::new(@_); 185 #$new->nix_X_codes(1); 186 $new->nbsp_for_S(1); 187 $new->accept_targets( 'html', 'HTML' ); 188 $new->accept_codes('VerbatimFormatted'); 189 $new->accept_codes(@_to_accept); 190 DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n"; 191 192 $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); 193 $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); 194 $new->man_url_prefix( $Man_URL_Prefix ); 195 $new->man_url_postfix( $Man_URL_Postfix ); 196 $new->title_prefix( $Title_Prefix ); 197 $new->title_postfix( $Title_Postfix ); 198 199 $new->html_header_before_title( 200 qq[$Doctype_decl<html><head><title>] 201 ); 202 $new->html_header_after_title( join "\n" => 203 "</title>", 204 $Content_decl, 205 "</head>\n<body class='pod'>", 206 $new->version_tag_comment, 207 "<!-- start doc -->\n", 208 ); 209 $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 210 211 $new->{'Tagmap'} = {%Tagmap}; 212 213 return $new; 214} 215 216sub __adjust_html_h_levels { 217 my ($self) = @_; 218 my $Tagmap = $self->{'Tagmap'}; 219 220 my $add = $self->html_h_level; 221 return unless defined $add; 222 return if ($self->{'Adjusted_html_h_levels'}||0) == $add; 223 224 $add -= 1; 225 for (1 .. 4) { 226 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e; 227 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e; 228 } 229} 230 231sub batch_mode_page_object_init { 232 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; 233 DEBUG and print "Initting $self\n for $module\n", 234 " in $infile\n out $outfile\n depth $depth\n"; 235 $self->batch_mode(1); 236 $self->batch_mode_current_level($depth); 237 return $self; 238} 239 240sub run { 241 my $self = $_[0]; 242 return $self->do_middle if $self->bare_output; 243 return 244 $self->do_beginning && $self->do_middle && $self->do_end; 245} 246 247#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 248 249sub do_beginning { 250 my $self = $_[0]; 251 252 my $title; 253 254 if(defined $self->force_title) { 255 $title = $self->force_title; 256 DEBUG and print "Forcing title to be $title\n"; 257 } else { 258 # Actually try looking for the title in the document: 259 $title = $self->get_short_title(); 260 unless($self->content_seen) { 261 DEBUG and print "No content seen in search for title.\n"; 262 return; 263 } 264 $self->{'Title'} = $title; 265 266 if(defined $title and $title =~ m/\S/) { 267 $title = $self->title_prefix . esc($title) . $self->title_postfix; 268 } else { 269 $title = $self->default_title; 270 $title = '' unless defined $title; 271 DEBUG and print "Title defaults to $title\n"; 272 } 273 } 274 275 276 my $after = $self->html_header_after_title || ''; 277 if($self->html_css) { 278 my $link = 279 $self->html_css =~ m/</ 280 ? $self->html_css # It's a big blob of markup, let's drop it in 281 : sprintf( # It's just a URL, so let's wrap it up 282 qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n], 283 $self->html_css, 284 ); 285 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 286 } 287 $self->_add_top_anchor(\$after); 288 289 if($self->html_javascript) { 290 my $link = 291 $self->html_javascript =~ m/</ 292 ? $self->html_javascript # It's a big blob of markup, let's drop it in 293 : sprintf( # It's just a URL, so let's wrap it up 294 qq[<script type="text/javascript" src="%s"></script>\n], 295 $self->html_javascript, 296 ); 297 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 298 } 299 300 print {$self->{'output_fh'}} 301 $self->html_header_before_title || '', 302 $title, # already escaped 303 $after, 304 ; 305 306 DEBUG and print "Returning from do_beginning...\n"; 307 return 1; 308} 309 310sub _add_top_anchor { 311 my($self, $text_r) = @_; 312 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack 313 $$text_r .= "<a name='___top' class='dummyTopAnchor' ></a>\n"; 314 } 315 return; 316} 317 318sub version_tag_comment { 319 my $self = shift; 320 return sprintf 321 "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n", 322 esc( 323 ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), 324 $], scalar(gmtime), 325 ), $self->_modnote(), 326 ; 327} 328 329sub _modnote { 330 my $class = ref($_[0]) || $_[0]; 331 return join "\n " => grep m/\S/, split "\n", 332 333qq{ 334If you want to change this HTML document, you probably shouldn't do that 335by changing it directly. Instead, see about changing the calling options 336to $class, and/or subclassing $class, 337then reconverting this document from the Pod source. 338When in doubt, email the author of $class for advice. 339See 'perldoc $class' for more info. 340}; 341 342} 343 344sub do_end { 345 my $self = $_[0]; 346 print {$self->{'output_fh'}} $self->html_footer || ''; 347 return 1; 348} 349 350# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 351# Normally this would just be a call to _do_middle_main_loop -- but we 352# have to do some elaborate things to emit all the content and then 353# summarize it and output it /before/ the content that it's a summary of. 354 355sub do_middle { 356 my $self = $_[0]; 357 return $self->_do_middle_main_loop unless $self->index; 358 359 if( $self->output_string ) { 360 # An efficiency hack 361 my $out = $self->output_string; #it's a reference to it 362 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; 363 $$out .= $sneakytag; 364 $self->_do_middle_main_loop; 365 $sneakytag = quotemeta($sneakytag); 366 my $index = $self->index_as_html(); 367 if( $$out =~ s/$sneakytag/$index/s ) { 368 # Expected case 369 DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n"; 370 } else { 371 DEBUG and print "Odd, couldn't find where to insert the index in the output!\n"; 372 # I don't think this should ever happen. 373 } 374 return 1; 375 } 376 377 unless( $self->output_fh ) { 378 require Carp; 379 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); 380 } 381 382 # If we get here, we're outputting to a FH. So we need to do some magic. 383 # Namely, divert all content to a string, which we output after the index. 384 my $fh = $self->output_fh; 385 my $content = ''; 386 { 387 # Our horrible bait and switch: 388 $self->output_string( \$content ); 389 $self->_do_middle_main_loop; 390 $self->abandon_output_string(); 391 $self->output_fh($fh); 392 } 393 print $fh $self->index_as_html(); 394 print $fh $content; 395 396 return 1; 397} 398 399########################################################################### 400 401sub index_as_html { 402 my $self = $_[0]; 403 # This is meant to be called AFTER the input document has been parsed! 404 405 my $points = $self->{'PSHTML_index_points'} || []; 406 407 @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n]; 408 # There's no point in having a 0-item or 1-item index, I dare say. 409 410 my(@out) = qq{\n<div class='indexgroup'>}; 411 my $level = 0; 412 413 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); 414 foreach my $p (@$points, ['head0', '(end)']) { 415 ($tagname, $text) = @$p; 416 $anchorname = $self->section_escape($text); 417 if( $tagname =~ m{^head(\d+)$} ) { 418 $target_level = 0 + $1; 419 } else { # must be some kinda list item 420 if($previous_tagname =~ m{^head\d+$} ) { 421 $target_level = $level + 1; 422 } else { 423 $target_level = $level; # no change needed 424 } 425 } 426 427 # Get to target_level by opening or closing ULs 428 while($level > $target_level) 429 { --$level; push @out, (" " x $level) . "</ul>"; } 430 while($level < $target_level) 431 { ++$level; push @out, (" " x ($level-1)) 432 . "<ul class='indexList indexList$level'>"; } 433 434 $previous_tagname = $tagname; 435 next unless $level; 436 437 $indent = ' ' x $level; 438 push @out, sprintf 439 "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>", 440 $indent, $level, esc($anchorname), esc($text) 441 ; 442 } 443 push @out, "</div>\n"; 444 return join "\n", @out; 445} 446 447########################################################################### 448 449sub _do_middle_main_loop { 450 my $self = $_[0]; 451 my $fh = $self->{'output_fh'}; 452 my $tagmap = $self->{'Tagmap'}; 453 454 $self->__adjust_html_h_levels; 455 456 my($token, $type, $tagname, $linkto, $linktype); 457 my @stack; 458 my $dont_wrap = 0; 459 460 while($token = $self->get_token) { 461 462 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 463 if( ($type = $token->type) eq 'start' ) { 464 if(($tagname = $token->tagname) eq 'L') { 465 $linktype = $token->attr('type') || 'insane'; 466 467 $linkto = $self->do_link($token); 468 469 if(defined $linkto and length $linkto) { 470 esc($linkto); 471 # (Yes, SGML-escaping applies on top of %-escaping! 472 # But it's rarely noticeable in practice.) 473 print $fh qq{<a href="$linkto" class="podlink$linktype"\n>}; 474 } else { 475 print $fh "<a>"; # Yes, an 'a' element with no attributes! 476 } 477 478 } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { 479 print $fh $tagmap->{$tagname} || next; 480 481 my @to_unget; 482 while(1) { 483 push @to_unget, $self->get_token; 484 last if $to_unget[-1]->is_end 485 and $to_unget[-1]->tagname eq $tagname; 486 487 # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) 488 } 489 490 my $name = $self->linearize_tokens(@to_unget); 491 $name = $self->do_section($name, $token) if defined $name; 492 493 print $fh "<a "; 494 print $fh "class='u' href='#___top' title='click to go to top of document'\n" 495 if $tagname =~ m/^head\d$/s; 496 497 if(defined $name) { 498 my $esc = esc( $self->section_name_tidy( $name ) ); 499 print $fh qq[name="$esc"]; 500 DEBUG and print "Linearized ", scalar(@to_unget), 501 " tokens as \"$name\".\n"; 502 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] 503 if $ToIndex{ $tagname }; 504 # Obviously, this discards all formatting codes (saving 505 # just their content), but ahwell. 506 507 } else { # ludicrously long, so nevermind 508 DEBUG and print "Linearized ", scalar(@to_unget), 509 " tokens, but it was too long, so nevermind.\n"; 510 } 511 print $fh "\n>"; 512 $self->unget_token(@to_unget); 513 514 } elsif ($tagname eq 'Data') { 515 my $next = $self->get_token; 516 next unless defined $next; 517 unless( $next->type eq 'text' ) { 518 $self->unget_token($next); 519 next; 520 } 521 DEBUG and print " raw text ", $next->text, "\n"; 522 print $fh "\n" . $next->text . "\n"; 523 next; 524 525 } else { 526 if( $tagname =~ m/^over-/s ) { 527 push @stack, ''; 528 } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { 529 print $fh $stack[-1]; 530 $stack[-1] = ''; 531 } 532 print $fh $tagmap->{$tagname} || next; 533 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" 534 or $tagname eq 'X'; 535 } 536 537 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 538 } elsif( $type eq 'end' ) { 539 if( ($tagname = $token->tagname) =~ m/^over-/s ) { 540 if( my $end = pop @stack ) { 541 print $fh $end; 542 } 543 } elsif( $tagname =~ m/^item-/s and @stack) { 544 $stack[-1] = $tagmap->{"/$tagname"}; 545 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { 546 $self->unget_token($next); 547 if( $next->type eq 'start' ) { 548 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; 549 $stack[-1] = $tagmap->{"/item-body"}; 550 } 551 } 552 next; 553 } 554 print $fh $tagmap->{"/$tagname"} || next; 555 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; 556 557 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 558 } elsif( $type eq 'text' ) { 559 esc($type = $token->text); # reuse $type, why not 560 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; 561 print $fh $type; 562 } 563 564 } 565 return 1; 566} 567 568########################################################################### 569# 570 571sub do_section { 572 my($self, $name, $token) = @_; 573 return $name; 574} 575 576sub do_link { 577 my($self, $token) = @_; 578 my $type = $token->attr('type'); 579 if(!defined $type) { 580 $self->whine("Typeless L!?", $token->attr('start_line')); 581 } elsif( $type eq 'pod') { return $self->do_pod_link($token); 582 } elsif( $type eq 'url') { return $self->do_url_link($token); 583 } elsif( $type eq 'man') { return $self->do_man_link($token); 584 } else { 585 $self->whine("L of unknown type $type!?", $token->attr('start_line')); 586 } 587 return 'FNORG'; # should never get called 588} 589 590# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 591 592sub do_url_link { return $_[1]->attr('to') } 593 594sub do_man_link { 595 my ($self, $link) = @_; 596 my $to = $link->attr('to'); 597 my $frag = $link->attr('section'); 598 599 return undef unless defined $to and length $to; # should never happen 600 601 $frag = $self->section_escape($frag) 602 if defined $frag and length($frag .= ''); # (stringify) 603 604 DEBUG and print "Resolving \"$to/$frag\"\n\n"; 605 606 return $self->resolve_man_page_link($to, $frag); 607} 608 609 610sub do_pod_link { 611 # And now things get really messy... 612 my($self, $link) = @_; 613 my $to = $link->attr('to'); 614 my $section = $link->attr('section'); 615 return undef unless( # should never happen 616 (defined $to and length $to) or 617 (defined $section and length $section) 618 ); 619 620 $section = $self->section_escape($section) 621 if defined $section and length($section .= ''); # (stringify) 622 623 DEBUG and printf "Resolving \"%s\" \"%s\"...\n", 624 $to || "(nil)", $section || "(nil)"; 625 626 { 627 # An early hack: 628 my $complete_url = $self->resolve_pod_link_by_table($to, $section); 629 if( $complete_url ) { 630 DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ", 631 $complete_url, "\n (Returning that.)\n"; 632 return $complete_url; 633 } else { 634 DEBUG > 4 and print " resolve_pod_link_by_table(T,S)", 635 " didn't return anything interesting.\n"; 636 } 637 } 638 639 if(defined $to and length $to) { 640 # Give this routine first hack again 641 my $there = $self->resolve_pod_link_by_table($to); 642 if(defined $there and length $there) { 643 DEBUG > 1 644 and print "resolve_pod_link_by_table(T) gives $there\n"; 645 } else { 646 $there = 647 $self->resolve_pod_page_link($to, $section); 648 # (I pass it the section value, but I don't see a 649 # particular reason it'd use it.) 650 DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n"; 651 unless( defined $there and length $there ) { 652 DEBUG and print "Can't resolve $to\n"; 653 return undef; 654 } 655 # resolve_pod_page_link returning undef is how it 656 # can signal that it gives up on making a link 657 } 658 $to = $there; 659 } 660 661 #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n"; 662 663 my $out = (defined $to and length $to) ? $to : ''; 664 $out .= "#" . $section if defined $section and length $section; 665 666 unless(length $out) { # sanity check 667 DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n", 668 $to || "(nil)", $section || "(nil)"; 669 return undef; 670 } 671 672 DEBUG and print "Resolved to $out\n"; 673 return $out; 674} 675 676 677# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 678 679sub section_escape { 680 my($self, $section) = @_; 681 return $self->section_url_escape( 682 $self->section_name_tidy($section) 683 ); 684} 685 686sub section_name_tidy { 687 my($self, $section) = @_; 688 $section =~ tr/ /_/; 689 $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters 690 $section = $self->unicode_escape_url($section); 691 $section = '_' unless length $section; 692 return $section; 693} 694 695sub section_url_escape { shift->general_url_escape(@_) } 696sub pagepath_url_escape { shift->general_url_escape(@_) } 697sub manpage_url_escape { shift->general_url_escape(@_) } 698 699sub general_url_escape { 700 my($self, $string) = @_; 701 702 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; 703 # express Unicode things as urlencode(utf(orig)). 704 705 # A pretty conservative escaping, behoovey even for query components 706 # of a URL (see RFC 2396) 707 708 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; 709 # Yes, stipulate the list without a range, so that this can work right on 710 # all charsets that this module happens to run under. 711 # Altho, hmm, what about that ord? Presumably that won't work right 712 # under non-ASCII charsets. Something should be done 713 # about that, I guess? 714 715 return $string; 716} 717 718#-------------------------------------------------------------------------- 719# 720# Oh look, a yawning portal to Hell! Let's play touch football right by it! 721# 722 723sub resolve_pod_page_link { 724 # resolve_pod_page_link must return a properly escaped URL 725 my $self = shift; 726 return $self->batch_mode() 727 ? $self->resolve_pod_page_link_batch_mode(@_) 728 : $self->resolve_pod_page_link_singleton_mode(@_) 729 ; 730} 731 732sub resolve_pod_page_link_singleton_mode { 733 my($self, $it) = @_; 734 return undef unless defined $it and length $it; 735 my $url = $self->pagepath_url_escape($it); 736 737 $url =~ s{::$}{}s; # probably never comes up anyway 738 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? 739 740 return undef unless length $url; 741 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; 742} 743 744sub resolve_pod_page_link_batch_mode { 745 my($self, $to) = @_; 746 DEBUG > 1 and print " During batch mode, resolving $to ...\n"; 747 my @path = grep length($_), split m/::/s, $to, -1; 748 unless( @path ) { # sanity 749 DEBUG and print "Very odd! Splitting $to gives (nil)!\n"; 750 return undef; 751 } 752 $self->batch_mode_rectify_path(\@path); 753 my $out = join('/', map $self->pagepath_url_escape($_), @path) 754 . $HTML_EXTENSION; 755 DEBUG > 1 and print " => $out\n"; 756 return $out; 757} 758 759sub batch_mode_rectify_path { 760 my($self, $pathbits) = @_; 761 my $level = $self->batch_mode_current_level; 762 $level--; # how many levels up to go to get to the root 763 if($level < 1) { 764 unshift @$pathbits, '.'; # just to be pretty 765 } else { 766 unshift @$pathbits, ('..') x $level; 767 } 768 return; 769} 770 771sub resolve_man_page_link { 772 my ($self, $to, $frag) = @_; 773 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/; 774 775 return undef unless defined $page and length $page; 776 $section ||= 1; 777 778 return $self->man_url_prefix . "$section/" 779 . $self->manpage_url_escape($page) 780 . $self->man_url_postfix; 781} 782 783#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 784 785sub resolve_pod_link_by_table { 786 # A crazy hack to allow specifying custom L<foo> => URL mappings 787 788 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut 789 790 my($self, $to, $section) = @_; 791 792 # TODO: add a method that actually populates podhtml_LOT from a file? 793 794 if(defined $section) { 795 $to = '' unless defined $to and length $to; 796 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! 797 } else { 798 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! 799 } 800 return; 801} 802 803########################################################################### 804 805sub linearize_tokens { # self, tokens 806 my $self = shift; 807 my $out = ''; 808 809 my $t; 810 while($t = shift @_) { 811 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { 812 $out .= $t; # a string, or some insane thing 813 } elsif($t->is_text) { 814 $out .= $t->text; 815 } elsif($t->is_start and $t->tag eq 'X') { 816 # Ignore until the end of this X<...> sequence: 817 my $x_open = 1; 818 while($x_open) { 819 next if( ($t = shift @_)->is_text ); 820 if( $t->is_start and $t->tag eq 'X') { ++$x_open } 821 elsif($t->is_end and $t->tag eq 'X') { --$x_open } 822 } 823 } 824 } 825 return undef if length $out > $Linearization_Limit; 826 return $out; 827} 828 829#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 830 831sub unicode_escape_url { 832 my($self, $string) = @_; 833 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; 834 # Turn char 1234 into "(1234)" 835 return $string; 836} 837 838#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 839sub esc { # a function. 840 if(defined wantarray) { 841 if(wantarray) { 842 @_ = splice @_; # break aliasing 843 } else { 844 my $x = shift; 845 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; 846 return $x; 847 } 848 } 849 foreach my $x (@_) { 850 # Escape things very cautiously: 851 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg 852 if defined $x; 853 # Leave out "- so that "--" won't make it thru in X-generated comments 854 # with text in them. 855 856 # Yes, stipulate the list without a range, so that this can work right on 857 # all charsets that this module happens to run under. 858 # Altho, hmm, what about that ord? Presumably that won't work right 859 # under non-ASCII charsets. Something should be done about that. 860 } 861 return @_; 862} 863 864#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 865 8661; 867__END__ 868 869=head1 NAME 870 871Pod::Simple::HTML - convert Pod to HTML 872 873=head1 SYNOPSIS 874 875 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod 876 877 878=head1 DESCRIPTION 879 880This class is for making an HTML rendering of a Pod document. 881 882This is a subclass of L<Pod::Simple::PullParser> and inherits all its 883methods (and options). 884 885Note that if you want to do a batch conversion of a lot of Pod 886documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>. 887 888 889 890=head1 CALLING FROM THE COMMAND LINE 891 892TODO 893 894 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html 895 896 897 898=head1 CALLING FROM PERL 899 900TODO make a new object, set any options, and use parse_from_file 901 902 903=head1 METHODS 904 905TODO 906all (most?) accessorized methods 907 908 909=head1 SUBCLASSING 910 911TODO 912 913 can just set any of: html_css html_javascript title_prefix 914 'html_header_before_title', 915 'html_header_after_title', 916 'html_footer', 917 918maybe override do_pod_link 919 920maybe override do_beginning do_end 921 922=head1 SEE ALSO 923 924L<Pod::Simple>, L<Pod::Simple::HTMLBatch> 925 926TODO: a corpus of sample Pod input and HTML output? Or common 927idioms? 928 929=head1 SUPPORT 930 931Questions or discussion about POD and Pod::Simple should be sent to the 932pod-people@perl.org mail list. Send an empty email to 933pod-people-subscribe@perl.org to subscribe. 934 935This module is managed in an open GitHub repository, 936L<http://github.com/theory/pod-simple/>. Feel free to fork and contribute, or 937to clone L<git://github.com/theory/pod-simple.git> and send patches! 938 939Patches against Pod::Simple are welcome. Please send bug reports to 940<bug-pod-simple@rt.cpan.org>. 941 942=head1 COPYRIGHT AND DISCLAIMERS 943 944Copyright (c) 2002-2004 Sean M. Burke. 945 946This library is free software; you can redistribute it and/or modify it 947under the same terms as Perl itself. 948 949This program is distributed in the hope that it will be useful, but 950without any warranty; without even the implied warranty of 951merchantability or fitness for a particular purpose. 952 953=head1 ACKNOWLEDGEMENTS 954 955Thanks to L<Hurricane Electrict|http://he.net/> for permission to use its 956L<Linux man pages online|http://man.he.net/> site for man page links. 957 958Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the 959site for Perl module links. 960 961=head1 AUTHOR 962 963Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. 964But don't bother him, he's retired. 965 966Pod::Simple is maintained by: 967 968=over 969 970=item * Allison Randal C<allison@perl.org> 971 972=item * Hans Dieter Pearcey C<hdp@cpan.org> 973 974=item * David E. Wheeler C<dwheeler@cpan.org> 975 976=back 977 978=cut 979