1 2package Pod::Simple::BlackBox; 3# 4# "What's in the box?" "Pain." 5# 6########################################################################### 7# 8# This is where all the scary things happen: parsing lines into 9# paragraphs; and then into directives, verbatims, and then also 10# turning formatting sequences into treelets. 11# 12# Are you really sure you want to read this code? 13# 14#----------------------------------------------------------------------------- 15# 16# The basic work of this module Pod::Simple::BlackBox is doing the dirty work 17# of parsing Pod into treelets (generally one per non-verbatim paragraph), and 18# to call the proper callbacks on the treelets. 19# 20# Every node in a treelet is a ['name', {attrhash}, ...children...] 21 22use integer; # vroom! 23use strict; 24use Carp (); 25use vars qw($VERSION ); 26$VERSION = '3.28'; 27#use constant DEBUG => 7; 28BEGIN { 29 require Pod::Simple; 30 *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG 31} 32 33#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 34 35sub parse_line { shift->parse_lines(@_) } # alias 36 37# - - - Turn back now! Run away! - - - 38 39sub parse_lines { # Usage: $parser->parse_lines(@lines) 40 # an undef means end-of-stream 41 my $self = shift; 42 43 my $code_handler = $self->{'code_handler'}; 44 my $cut_handler = $self->{'cut_handler'}; 45 my $wl_handler = $self->{'whiteline_handler'}; 46 $self->{'line_count'} ||= 0; 47 48 my $scratch; 49 50 DEBUG > 4 and 51 print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; 52 53 DEBUG > 5 and 54 print "# About to parse lines: ", 55 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; 56 57 my $paras = ($self->{'paras'} ||= []); 58 # paragraph buffer. Because we need to defer processing of =over 59 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer 60 # to process this. 61 62 $self->{'pod_para_count'} ||= 0; 63 64 my $line; 65 foreach my $source_line (@_) { 66 if( $self->{'source_dead'} ) { 67 DEBUG > 4 and print "# Source is dead.\n"; 68 last; 69 } 70 71 unless( defined $source_line ) { 72 DEBUG > 4 and print "# Undef-line seen.\n"; 73 74 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; 75 push @$paras, $paras->[-1], $paras->[-1]; 76 # So that it definitely fills the buffer. 77 $self->{'source_dead'} = 1; 78 $self->_ponder_paragraph_buffer; 79 next; 80 } 81 82 83 if( $self->{'line_count'}++ ) { 84 ($line = $source_line) =~ tr/\n\r//d; 85 # If we don't have two vars, we'll end up with that there 86 # tr/// modding the (potentially read-only) original source line! 87 88 } else { 89 DEBUG > 2 and print "First line: [$source_line]\n"; 90 91 if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { 92 DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; 93 $self->_handle_encoding_line( "=encoding utf8" ); 94 delete $self->{'_processed_encoding'}; 95 $line =~ tr/\n\r//d; 96 97 } elsif( $line =~ s/^\xFE\xFF//s ) { 98 DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; 99 $self->scream( 100 $self->{'line_count'}, 101 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." 102 ); 103 splice @_; 104 push @_, undef; 105 next; 106 107 # TODO: implement somehow? 108 109 } elsif( $line =~ s/^\xFF\xFE//s ) { 110 DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; 111 $self->scream( 112 $self->{'line_count'}, 113 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." 114 ); 115 splice @_; 116 push @_, undef; 117 next; 118 119 # TODO: implement somehow? 120 121 } else { 122 DEBUG > 2 and print "First line is BOM-less.\n"; 123 ($line = $source_line) =~ tr/\n\r//d; 124 } 125 } 126 127 # Try to guess encoding. Inlined for performance reasons. 128 if(!$self->{'parse_characters'} && !$self->{'encoding'} 129 && ($self->{'in_pod'} || $line =~ /^=/s) 130 && $line =~ /[^\x00-\x7f]/ 131 ) { 132 my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1'; 133 $self->_handle_encoding_line( "=encoding $encoding" ); 134 $self->{'_transcoder'} && $self->{'_transcoder'}->($line); 135 136 my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/; 137 138 $self->whine( 139 $self->{'line_count'}, 140 "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" 141 ); 142 } 143 144 DEBUG > 5 and print "# Parsing line: [$line]\n"; 145 146 if(!$self->{'in_pod'}) { 147 if($line =~ m/^=([a-zA-Z]+)/s) { 148 if($1 eq 'cut') { 149 $self->scream( 150 $self->{'line_count'}, 151 "=cut found outside a pod block. Skipping to next block." 152 ); 153 154 ## Before there were errata sections in the world, it was 155 ## least-pessimal to abort processing the file. But now we can 156 ## just barrel on thru (but still not start a pod block). 157 #splice @_; 158 #push @_, undef; 159 160 next; 161 } else { 162 $self->{'in_pod'} = $self->{'start_of_pod_block'} 163 = $self->{'last_was_blank'} = 1; 164 # And fall thru to the pod-mode block further down 165 } 166 } else { 167 DEBUG > 5 and print "# It's a code-line.\n"; 168 $code_handler->(map $_, $line, $self->{'line_count'}, $self) 169 if $code_handler; 170 # Note: this may cause code to be processed out of order relative 171 # to pods, but in order relative to cuts. 172 173 # Note also that we haven't yet applied the transcoding to $line 174 # by time we call $code_handler! 175 176 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { 177 # That RE is from perlsyn, section "Plain Old Comments (Not!)", 178 #$fname = $2 if defined $2; 179 #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; 180 DEBUG > 1 and print "# Setting nextline to $1\n"; 181 $self->{'line_count'} = $1 - 1; 182 } 183 184 next; 185 } 186 } 187 188 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 189 # Else we're in pod mode: 190 191 # Apply any necessary transcoding: 192 $self->{'_transcoder'} && $self->{'_transcoder'}->($line); 193 194 # HERE WE CATCH =encoding EARLY! 195 if( $line =~ m/^=encoding\s+\S+\s*$/s ) { 196 next if $self->parse_characters; # Ignore this line 197 $line = $self->_handle_encoding_line( $line ); 198 } 199 200 if($line =~ m/^=cut/s) { 201 # here ends the pod block, and therefore the previous pod para 202 DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; 203 $self->{'in_pod'} = 0; 204 # ++$self->{'pod_para_count'}; 205 $self->_ponder_paragraph_buffer(); 206 # by now it's safe to consider the previous paragraph as done. 207 $cut_handler->(map $_, $line, $self->{'line_count'}, $self) 208 if $cut_handler; 209 210 # TODO: add to docs: Note: this may cause cuts to be processed out 211 # of order relative to pods, but in order relative to code. 212 213 } elsif($line =~ m/^(\s*)$/s) { # it's a blank line 214 if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line 215 $wl_handler->(map $_, $line, $self->{'line_count'}, $self) 216 if $wl_handler; 217 } 218 219 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { 220 DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; 221 push @{$paras->[-1]}, $line; 222 } # otherwise it's not interesting 223 224 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { 225 DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; 226 } 227 228 $self->{'last_was_blank'} = 1; 229 230 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... 231 232 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { 233 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS 234 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; 235 # Note that in "=head1 foo", the WS is lost. 236 # Example: ['=head1', {'start_line' => 123}, ' foo'] 237 238 ++$self->{'pod_para_count'}; 239 240 $self->_ponder_paragraph_buffer(); 241 # by now it's safe to consider the previous paragraph as done. 242 243 push @$paras, $new; # the new incipient paragraph 244 DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; 245 246 } elsif($line =~ m/^\s/s) { 247 248 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { 249 DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; 250 push @{$paras->[-1]}, $line; 251 } else { 252 ++$self->{'pod_para_count'}; 253 $self->_ponder_paragraph_buffer(); 254 # by now it's safe to consider the previous paragraph as done. 255 DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; 256 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; 257 } 258 } else { 259 ++$self->{'pod_para_count'}; 260 $self->_ponder_paragraph_buffer(); 261 # by now it's safe to consider the previous paragraph as done. 262 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; 263 DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; 264 } 265 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; 266 267 } else { 268 # It's a non-blank line /continuing/ the current para 269 if(@$paras) { 270 DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; 271 push @{$paras->[-1]}, $line; 272 } else { 273 # Unexpected case! 274 die "Continuing a paragraph but \@\$paras is empty?"; 275 } 276 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; 277 } 278 279 } # ends the big while loop 280 281 DEBUG > 1 and print(pretty(@$paras), "\n"); 282 return $self; 283} 284 285#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 286 287sub _handle_encoding_line { 288 my($self, $line) = @_; 289 290 return if $self->parse_characters; 291 292 # The point of this routine is to set $self->{'_transcoder'} as indicated. 293 294 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; 295 DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; 296 297 my $e = $1; 298 my $orig = $e; 299 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; 300 301 my $enc_error; 302 303 # Cf. perldoc Encode and perldoc Encode::Supported 304 305 require Pod::Simple::Transcode; 306 307 if( $self->{'encoding'} ) { 308 my $norm_current = $self->{'encoding'}; 309 my $norm_e = $e; 310 foreach my $that ($norm_current, $norm_e) { 311 $that = lc($that); 312 $that =~ s/[-_]//g; 313 } 314 if($norm_current eq $norm_e) { 315 DEBUG > 1 and print "The '=encoding $orig' line is ", 316 "redundant. ($norm_current eq $norm_e). Ignoring.\n"; 317 $enc_error = ''; 318 # But that doesn't necessarily mean that the earlier one went okay 319 } else { 320 $enc_error = "Encoding is already set to " . $self->{'encoding'}; 321 DEBUG > 1 and print $enc_error; 322 } 323 } elsif ( 324 # OK, let's turn on the encoding 325 do { 326 DEBUG > 1 and print " Setting encoding to $e\n"; 327 $self->{'encoding'} = $e; 328 1; 329 } 330 and $e eq 'HACKRAW' 331 ) { 332 DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; 333 334 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { 335 336 die($enc_error = "WHAT? _transcoder is already set?!") 337 if $self->{'_transcoder'}; # should never happen 338 require Pod::Simple::Transcode; 339 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); 340 eval { 341 my @x = ('', "abc", "123"); 342 $self->{'_transcoder'}->(@x); 343 }; 344 $@ && die( $enc_error = 345 "Really unexpected error setting up encoding $e: $@\nAborting" 346 ); 347 $self->{'detected_encoding'} = $e; 348 349 } else { 350 my @supported = Pod::Simple::Transcode::->all_encodings; 351 352 # Note unsupported, and complain 353 DEBUG and print " Encoding [$e] is unsupported.", 354 "\nSupporteds: @supported\n"; 355 my $suggestion = ''; 356 357 # Look for a near match: 358 my $norm = lc($e); 359 $norm =~ tr[-_][]d; 360 my $n; 361 foreach my $enc (@supported) { 362 $n = lc($enc); 363 $n =~ tr[-_][]d; 364 next unless $n eq $norm; 365 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; 366 last; 367 } 368 my $encmodver = Pod::Simple::Transcode::->encmodver; 369 $enc_error = join '' => 370 "This document probably does not appear as it should, because its ", 371 "\"=encoding $e\" line calls for an unsupported encoding.", 372 $suggestion, " [$encmodver\'s supported encodings are: @supported]" 373 ; 374 375 $self->scream( $self->{'line_count'}, $enc_error ); 376 } 377 push @{ $self->{'encoding_command_statuses'} }, $enc_error; 378 if (defined($self->{'_processed_encoding'})) { 379 # Should never happen 380 die "Nested processed encoding."; 381 } 382 $self->{'_processed_encoding'} = $orig; 383 384 return $line; 385} 386 387# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 388 389sub _handle_encoding_second_level { 390 # By time this is called, the encoding (if well formed) will already 391 # have been acted one. 392 my($self, $para) = @_; 393 my @x = @$para; 394 my $content = join ' ', splice @x, 2; 395 $content =~ s/^\s+//s; 396 $content =~ s/\s+$//s; 397 398 DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; 399 400 if (defined($self->{'_processed_encoding'})) { 401 #if($content ne $self->{'_processed_encoding'}) { 402 # Could it happen? 403 #} 404 delete $self->{'_processed_encoding'}; 405 # It's already been handled. Check for errors. 406 if(! $self->{'encoding_command_statuses'} ) { 407 DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; 408 } elsif( $self->{'encoding_command_statuses'}[-1] ) { 409 $self->whine( $para->[1]{'start_line'}, 410 sprintf "Couldn't do %s: %s", 411 $self->{'encoding_command_reqs' }[-1], 412 $self->{'encoding_command_statuses'}[-1], 413 ); 414 } else { 415 DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; 416 } 417 418 } else { 419 # Otherwise it's a syntax error 420 $self->whine( $para->[1]{'start_line'}, 421 "Invalid =encoding syntax: $content" 422 ); 423 } 424 425 return; 426} 427 428#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` 429 430{ 431my $m = -321; # magic line number 432 433sub _gen_errata { 434 my $self = $_[0]; 435 # Return 0 or more fake-o paragraphs explaining the accumulated 436 # errors on this document. 437 438 return() unless $self->{'errata'} and keys %{$self->{'errata'}}; 439 440 my @out; 441 442 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { 443 push @out, 444 ['=item', {'start_line' => $m}, "Around line $line:"], 445 map( ['~Para', {'start_line' => $m, '~cooked' => 1}, 446 #['~Top', {'start_line' => $m}, 447 $_ 448 #] 449 ], 450 @{$self->{'errata'}{$line}} 451 ) 452 ; 453 } 454 455 # TODO: report of unknown entities? unrenderable characters? 456 457 unshift @out, 458 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], 459 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, 460 "Hey! ", 461 ['B', {}, 462 'The above document had some coding errors, which are explained below:' 463 ] 464 ], 465 ['=over', {'start_line' => $m, 'errata' => 1}, ''], 466 ; 467 468 push @out, 469 ['=back', {'start_line' => $m, 'errata' => 1}, ''], 470 ; 471 472 DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; 473 474 return @out; 475} 476 477} 478 479#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 480 481############################################################################## 482## 483## stop reading now stop reading now stop reading now stop reading now stop 484## 485## HERE IT BECOMES REALLY SCARY 486## 487## stop reading now stop reading now stop reading now stop reading now stop 488## 489############################################################################## 490 491sub _ponder_paragraph_buffer { 492 493 # Para-token types as found in the buffer. 494 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, 495 # =over, =back, =item 496 # and the null =pod (to be complained about if over one line) 497 # 498 # "~data" paragraphs are something we generate at this level, depending on 499 # a currently open =over region 500 501 # Events fired: Begin and end for: 502 # directivename (like head1 .. head4), item, extend, 503 # for (from =begin...=end, =for), 504 # over-bullet, over-number, over-text, over-block, 505 # item-bullet, item-number, item-text, 506 # Document, 507 # Data, Para, Verbatim 508 # B, C, longdirname (TODO -- wha?), etc. for all directives 509 # 510 511 my $self = $_[0]; 512 my $paras; 513 return unless @{$paras = $self->{'paras'}}; 514 my $curr_open = ($self->{'curr_open'} ||= []); 515 516 my $scratch; 517 518 DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; 519 520 # We have something in our buffer. So apparently the document has started. 521 unless($self->{'doc_has_started'}) { 522 $self->{'doc_has_started'} = 1; 523 524 my $starting_contentless; 525 $starting_contentless = 526 ( 527 !@$curr_open 528 and @$paras and ! grep $_->[0] ne '~end', @$paras 529 # i.e., if the paras is all ~ends 530 ) 531 ; 532 DEBUG and print "# Starting ", 533 $starting_contentless ? 'contentless' : 'contentful', 534 " document\n" 535 ; 536 537 $self->_handle_element_start( 538 ($scratch = 'Document'), 539 { 540 'start_line' => $paras->[0][1]{'start_line'}, 541 $starting_contentless ? ( 'contentless' => 1 ) : (), 542 }, 543 ); 544 } 545 546 my($para, $para_type); 547 while(@$paras) { 548 last if @$paras == 1 and 549 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' 550 or $paras->[0][0] eq '=item' ) 551 ; 552 # Those're the three kinds of paragraphs that require lookahead. 553 # Actually, an "=item Foo" inside an <over type=text> region 554 # and any =item inside an <over type=block> region (rare) 555 # don't require any lookahead, but all others (bullets 556 # and numbers) do. 557 558# TODO: whinge about many kinds of directives in non-resolving =for regions? 559# TODO: many? like what? =head1 etc? 560 561 $para = shift @$paras; 562 $para_type = $para->[0]; 563 564 DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", 565 $self->_dump_curr_open(), ")\n"; 566 567 if($para_type eq '=for') { 568 next if $self->_ponder_for($para,$curr_open,$paras); 569 570 } elsif($para_type eq '=begin') { 571 next if $self->_ponder_begin($para,$curr_open,$paras); 572 573 } elsif($para_type eq '=end') { 574 next if $self->_ponder_end($para,$curr_open,$paras); 575 576 } elsif($para_type eq '~end') { # The virtual end-document signal 577 next if $self->_ponder_doc_end($para,$curr_open,$paras); 578 } 579 580 581 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 582 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 583 if(grep $_->[1]{'~ignore'}, @$curr_open) { 584 DEBUG > 1 and 585 print "Skipping $para_type paragraph because in ignore mode.\n"; 586 next; 587 } 588 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 589 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 590 591 if($para_type eq '=pod') { 592 $self->_ponder_pod($para,$curr_open,$paras); 593 594 } elsif($para_type eq '=over') { 595 next if $self->_ponder_over($para,$curr_open,$paras); 596 597 } elsif($para_type eq '=back') { 598 next if $self->_ponder_back($para,$curr_open,$paras); 599 600 } else { 601 602 # All non-magical codes!!! 603 604 # Here we start using $para_type for our own twisted purposes, to 605 # mean how it should get treated, not as what the element name 606 # should be. 607 608 DEBUG > 1 and print "Pondering non-magical $para_type\n"; 609 610 my $i; 611 612 # Enforce some =headN discipline 613 if($para_type =~ m/^=head\d$/s 614 and ! $self->{'accept_heads_anywhere'} 615 and @$curr_open 616 and $curr_open->[-1][0] eq '=over' 617 ) { 618 DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; 619 $self->whine( 620 $para->[1]{'start_line'}, 621 "You forgot a '=back' before '$para_type'" 622 ); 623 unshift @$paras, ['=back', {}, ''], $para; # close the =over 624 next; 625 } 626 627 628 if($para_type eq '=item') { 629 630 my $over; 631 unless(@$curr_open and 632 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { 633 $self->whine( 634 $para->[1]{'start_line'}, 635 "'=item' outside of any '=over'" 636 ); 637 unshift @$paras, 638 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], 639 $para 640 ; 641 next; 642 } 643 644 645 my $over_type = $over->[1]{'~type'}; 646 647 if(!$over_type) { 648 # Shouldn't happen1 649 die "Typeless over in stack, starting at line " 650 . $over->[1]{'start_line'}; 651 652 } elsif($over_type eq 'block') { 653 unless($curr_open->[-1][1]{'~bitched_about'}) { 654 $curr_open->[-1][1]{'~bitched_about'} = 1; 655 $self->whine( 656 $curr_open->[-1][1]{'start_line'}, 657 "You can't have =items (as at line " 658 . $para->[1]{'start_line'} 659 . ") unless the first thing after the =over is an =item" 660 ); 661 } 662 # Just turn it into a paragraph and reconsider it 663 $para->[0] = '~Para'; 664 unshift @$paras, $para; 665 next; 666 667 } elsif($over_type eq 'text') { 668 my $item_type = $self->_get_item_type($para); 669 # That kills the content of the item if it's a number or bullet. 670 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 671 672 if($item_type eq 'text') { 673 # Nothing special needs doing for 'text' 674 } elsif($item_type eq 'number' or $item_type eq 'bullet') { 675 $self->whine( 676 $para->[1]{'start_line'}, 677 "Expected text after =item, not a $item_type" 678 ); 679 # Undo our clobbering: 680 push @$para, $para->[1]{'~orig_content'}; 681 delete $para->[1]{'number'}; 682 # Only a PROPER item-number element is allowed 683 # to have a number attribute. 684 } else { 685 die "Unhandled item type $item_type"; # should never happen 686 } 687 688 # =item-text thingies don't need any assimilation, it seems. 689 690 } elsif($over_type eq 'number') { 691 my $item_type = $self->_get_item_type($para); 692 # That kills the content of the item if it's a number or bullet. 693 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 694 695 my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; 696 697 if($item_type eq 'bullet') { 698 # Hm, it's not numeric. Correct for this. 699 $para->[1]{'number'} = $expected_value; 700 $self->whine( 701 $para->[1]{'start_line'}, 702 "Expected '=item $expected_value'" 703 ); 704 push @$para, $para->[1]{'~orig_content'}; 705 # restore the bullet, blocking the assimilation of next para 706 707 } elsif($item_type eq 'text') { 708 # Hm, it's not numeric. Correct for this. 709 $para->[1]{'number'} = $expected_value; 710 $self->whine( 711 $para->[1]{'start_line'}, 712 "Expected '=item $expected_value'" 713 ); 714 # Text content will still be there and will block next ~Para 715 716 } elsif($item_type ne 'number') { 717 die "Unknown item type $item_type"; # should never happen 718 719 } elsif($expected_value == $para->[1]{'number'}) { 720 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; 721 722 } else { 723 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, 724 " instead of the expected value of $expected_value\n"; 725 $self->whine( 726 $para->[1]{'start_line'}, 727 "You have '=item " . $para->[1]{'number'} . 728 "' instead of the expected '=item $expected_value'" 729 ); 730 $para->[1]{'number'} = $expected_value; # correcting!! 731 } 732 733 if(@$para == 2) { 734 # For the cases where we /didn't/ push to @$para 735 if($paras->[0][0] eq '~Para') { 736 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 737 push @$para, splice @{shift @$paras},2; 738 } else { 739 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 740 push @$para, ''; # Just so it's not contentless 741 } 742 } 743 744 745 } elsif($over_type eq 'bullet') { 746 my $item_type = $self->_get_item_type($para); 747 # That kills the content of the item if it's a number or bullet. 748 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 749 750 if($item_type eq 'bullet') { 751 # as expected! 752 753 if( $para->[1]{'~_freaky_para_hack'} ) { 754 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; 755 push @$para, delete $para->[1]{'~_freaky_para_hack'}; 756 } 757 758 } elsif($item_type eq 'number') { 759 $self->whine( 760 $para->[1]{'start_line'}, 761 "Expected '=item *'" 762 ); 763 push @$para, $para->[1]{'~orig_content'}; 764 # and block assimilation of the next paragraph 765 delete $para->[1]{'number'}; 766 # Only a PROPER item-number element is allowed 767 # to have a number attribute. 768 } elsif($item_type eq 'text') { 769 $self->whine( 770 $para->[1]{'start_line'}, 771 "Expected '=item *'" 772 ); 773 # But doesn't need processing. But it'll block assimilation 774 # of the next para. 775 } else { 776 die "Unhandled item type $item_type"; # should never happen 777 } 778 779 if(@$para == 2) { 780 # For the cases where we /didn't/ push to @$para 781 if($paras->[0][0] eq '~Para') { 782 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 783 push @$para, splice @{shift @$paras},2; 784 } else { 785 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 786 push @$para, ''; # Just so it's not contentless 787 } 788 } 789 790 } else { 791 die "Unhandled =over type \"$over_type\"?"; 792 # Shouldn't happen! 793 } 794 795 $para_type = 'Plain'; 796 $para->[0] .= '-' . $over_type; 797 # Whew. Now fall thru and process it. 798 799 800 } elsif($para_type eq '=extend') { 801 # Well, might as well implement it here. 802 $self->_ponder_extend($para); 803 next; # and skip 804 } elsif($para_type eq '=encoding') { 805 # Not actually acted on here, but we catch errors here. 806 $self->_handle_encoding_second_level($para); 807 next unless $self->keep_encoding_directive; 808 $para_type = 'Plain'; 809 } elsif($para_type eq '~Verbatim') { 810 $para->[0] = 'Verbatim'; 811 $para_type = '?Verbatim'; 812 } elsif($para_type eq '~Para') { 813 $para->[0] = 'Para'; 814 $para_type = '?Plain'; 815 } elsif($para_type eq 'Data') { 816 $para->[0] = 'Data'; 817 $para_type = '?Data'; 818 } elsif( $para_type =~ s/^=//s 819 and defined( $para_type = $self->{'accept_directives'}{$para_type} ) 820 ) { 821 DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; 822 } else { 823 # An unknown directive! 824 DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", 825 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) 826 ; 827 $self->whine( 828 $para->[1]{'start_line'}, 829 "Unknown directive: $para->[0]" 830 ); 831 832 # And maybe treat it as text instead of just letting it go? 833 next; 834 } 835 836 if($para_type =~ s/^\?//s) { 837 if(! @$curr_open) { # usual case 838 DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; 839 } else { 840 my @fors = grep $_->[0] eq '=for', @$curr_open; 841 DEBUG > 1 and print "Containing fors: ", 842 join(',', map $_->[1]{'target'}, @fors), "\n"; 843 844 if(! @fors) { 845 DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; 846 847 #} elsif(grep $_->[1]{'~resolve'}, @fors) { 848 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { 849 } elsif( $fors[-1][1]{'~resolve'} ) { 850 # Look to the immediately containing for 851 852 if($para_type eq 'Data') { 853 DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; 854 $para->[0] = 'Para'; 855 $para_type = 'Plain'; 856 } else { 857 DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; 858 } 859 } else { 860 DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; 861 $para->[0] = $para_type = 'Data'; 862 } 863 } 864 } 865 866 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 867 if($para_type eq 'Plain') { 868 $self->_ponder_Plain($para); 869 } elsif($para_type eq 'Verbatim') { 870 $self->_ponder_Verbatim($para); 871 } elsif($para_type eq 'Data') { 872 $self->_ponder_Data($para); 873 } else { 874 die "\$para type is $para_type -- how did that happen?"; 875 # Shouldn't happen. 876 } 877 878 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 879 $para->[0] =~ s/^[~=]//s; 880 881 DEBUG and print "\n", pretty($para), "\n"; 882 883 # traverse the treelet (which might well be just one string scalar) 884 $self->{'content_seen'} ||= 1; 885 $self->_traverse_treelet_bit(@$para); 886 } 887 } 888 889 return; 890} 891 892########################################################################### 893# The sub-ponderers... 894 895 896 897sub _ponder_for { 898 my ($self,$para,$curr_open,$paras) = @_; 899 900 # Fake it out as a begin/end 901 my $target; 902 903 if(grep $_->[1]{'~ignore'}, @$curr_open) { 904 DEBUG > 1 and print "Ignoring ignorable =for\n"; 905 return 1; 906 } 907 908 for(my $i = 2; $i < @$para; ++$i) { 909 if($para->[$i] =~ s/^\s*(\S+)\s*//s) { 910 $target = $1; 911 last; 912 } 913 } 914 unless(defined $target) { 915 $self->whine( 916 $para->[1]{'start_line'}, 917 "=for without a target?" 918 ); 919 return 1; 920 } 921 DEBUG > 1 and 922 print "Faking out a =for $target as a =begin $target / =end $target\n"; 923 924 $para->[0] = 'Data'; 925 926 unshift @$paras, 927 ['=begin', 928 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, 929 $target, 930 ], 931 $para, 932 ['=end', 933 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, 934 $target, 935 ], 936 ; 937 938 return 1; 939} 940 941sub _ponder_begin { 942 my ($self,$para,$curr_open,$paras) = @_; 943 my $content = join ' ', splice @$para, 2; 944 $content =~ s/^\s+//s; 945 $content =~ s/\s+$//s; 946 unless(length($content)) { 947 $self->whine( 948 $para->[1]{'start_line'}, 949 "=begin without a target?" 950 ); 951 DEBUG and print "Ignoring targetless =begin\n"; 952 return 1; 953 } 954 955 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; 956 $para->[1]{'title'} = $title if ($title); 957 $para->[1]{'target'} = $target; # without any ':' 958 $content = $target; # strip off the title 959 960 $content =~ s/^:!/!:/s; 961 my $neg; # whether this is a negation-match 962 $neg = 1 if $content =~ s/^!//s; 963 my $to_resolve; # whether to process formatting codes 964 $to_resolve = 1 if $content =~ s/^://s; 965 966 my $dont_ignore; # whether this target matches us 967 968 foreach my $target_name ( 969 split(',', $content, -1), 970 $neg ? () : '*' 971 ) { 972 DEBUG > 2 and 973 print " Considering whether =begin $content matches $target_name\n"; 974 next unless $self->{'accept_targets'}{$target_name}; 975 976 DEBUG > 2 and 977 print " It DOES match the acceptable target $target_name!\n"; 978 $to_resolve = 1 979 if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; 980 $dont_ignore = 1; 981 $para->[1]{'target_matching'} = $target_name; 982 last; # stop looking at other target names 983 } 984 985 if($neg) { 986 if( $dont_ignore ) { 987 $dont_ignore = ''; 988 delete $para->[1]{'target_matching'}; 989 DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; 990 } else { 991 $dont_ignore = 1; 992 $para->[1]{'target_matching'} = '!'; 993 DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; 994 } 995 } 996 997 $para->[0] = '=for'; # Just what we happen to call these, internally 998 $para->[1]{'~really'} ||= '=begin'; 999 $para->[1]{'~ignore'} = (! $dont_ignore) || 0; 1000 $para->[1]{'~resolve'} = $to_resolve || 0; 1001 1002 DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', 1003 "ignore contents of this region\n"; 1004 DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", 1005 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; 1006 DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; 1007 1008 push @$curr_open, $para; 1009 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { 1010 DEBUG > 1 and print "Ignoring ignorable =begin\n"; 1011 } else { 1012 $self->{'content_seen'} ||= 1; 1013 $self->_handle_element_start((my $scratch='for'), $para->[1]); 1014 } 1015 1016 return 1; 1017} 1018 1019sub _ponder_end { 1020 my ($self,$para,$curr_open,$paras) = @_; 1021 my $content = join ' ', splice @$para, 2; 1022 $content =~ s/^\s+//s; 1023 $content =~ s/\s+$//s; 1024 DEBUG and print "Ogling '=end $content' directive\n"; 1025 1026 unless(length($content)) { 1027 $self->whine( 1028 $para->[1]{'start_line'}, 1029 "'=end' without a target?" . ( 1030 ( @$curr_open and $curr_open->[-1][0] eq '=for' ) 1031 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) 1032 : '' 1033 ) 1034 ); 1035 DEBUG and print "Ignoring targetless =end\n"; 1036 return 1; 1037 } 1038 1039 unless($content =~ m/^\S+$/) { # i.e., unless it's one word 1040 $self->whine( 1041 $para->[1]{'start_line'}, 1042 "'=end $content' is invalid. (Stack: " 1043 . $self->_dump_curr_open() . ')' 1044 ); 1045 DEBUG and print "Ignoring mistargetted =end $content\n"; 1046 return 1; 1047 } 1048 1049 unless(@$curr_open and $curr_open->[-1][0] eq '=for') { 1050 $self->whine( 1051 $para->[1]{'start_line'}, 1052 "=end $content without matching =begin. (Stack: " 1053 . $self->_dump_curr_open() . ')' 1054 ); 1055 DEBUG and print "Ignoring mistargetted =end $content\n"; 1056 return 1; 1057 } 1058 1059 unless($content eq $curr_open->[-1][1]{'target'}) { 1060 $self->whine( 1061 $para->[1]{'start_line'}, 1062 "=end $content doesn't match =begin " 1063 . $curr_open->[-1][1]{'target'} 1064 . ". (Stack: " 1065 . $self->_dump_curr_open() . ')' 1066 ); 1067 DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; 1068 return 1; 1069 } 1070 1071 # Else it's okay to close... 1072 if(grep $_->[1]{'~ignore'}, @$curr_open) { 1073 DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; 1074 # And that may be because of this to-be-closed =for region, or some 1075 # other one, but it doesn't matter. 1076 } else { 1077 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; 1078 # what's that for? 1079 1080 $self->{'content_seen'} ||= 1; 1081 $self->_handle_element_end( my $scratch = 'for', $para->[1]); 1082 } 1083 DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; 1084 pop @$curr_open; 1085 1086 return 1; 1087} 1088 1089sub _ponder_doc_end { 1090 my ($self,$para,$curr_open,$paras) = @_; 1091 if(@$curr_open) { # Deal with things left open 1092 DEBUG and print "Stack is nonempty at end-document: (", 1093 $self->_dump_curr_open(), ")\n"; 1094 1095 DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; 1096 unshift @$paras, $self->_closers_for_all_curr_open; 1097 # Make sure there is exactly one ~end in the parastack, at the end: 1098 @$paras = grep $_->[0] ne '~end', @$paras; 1099 push @$paras, $para, $para; 1100 # We need two -- once for the next cycle where we 1101 # generate errata, and then another to be at the end 1102 # when that loop back around to process the errata. 1103 return 1; 1104 1105 } else { 1106 DEBUG and print "Okay, stack is empty now.\n"; 1107 } 1108 1109 # Try generating errata section, if applicable 1110 unless($self->{'~tried_gen_errata'}) { 1111 $self->{'~tried_gen_errata'} = 1; 1112 my @extras = $self->_gen_errata(); 1113 if(@extras) { 1114 unshift @$paras, @extras; 1115 DEBUG and print "Generated errata... relooping...\n"; 1116 return 1; # I.e., loop around again to process these fake-o paragraphs 1117 } 1118 } 1119 1120 splice @$paras; # Well, that's that for this paragraph buffer. 1121 DEBUG and print "Throwing end-document event.\n"; 1122 1123 $self->_handle_element_end( my $scratch = 'Document' ); 1124 return 1; # Hasta la byebye 1125} 1126 1127sub _ponder_pod { 1128 my ($self,$para,$curr_open,$paras) = @_; 1129 $self->whine( 1130 $para->[1]{'start_line'}, 1131 "=pod directives shouldn't be over one line long! Ignoring all " 1132 . (@$para - 2) . " lines of content" 1133 ) if @$para > 3; 1134 1135 # Content ignored unless 'pod_handler' is set 1136 if (my $pod_handler = $self->{'pod_handler'}) { 1137 my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; 1138 $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output 1139 $pod_handler->($line, $line_num, $self); 1140 } 1141 1142 # The surrounding methods set content_seen, so let us remain consistent. 1143 # I do not know why it was not here before -- should it not be here? 1144 # $self->{'content_seen'} ||= 1; 1145 1146 return; 1147} 1148 1149sub _ponder_over { 1150 my ($self,$para,$curr_open,$paras) = @_; 1151 return 1 unless @$paras; 1152 my $list_type; 1153 1154 if($paras->[0][0] eq '=item') { # most common case 1155 $list_type = $self->_get_initial_item_type($paras->[0]); 1156 1157 } elsif($paras->[0][0] eq '=back') { 1158 # Ignore empty lists by default 1159 if ($self->{'parse_empty_lists'}) { 1160 $list_type = 'empty'; 1161 } else { 1162 shift @$paras; 1163 return 1; 1164 } 1165 } elsif($paras->[0][0] eq '~end') { 1166 $self->whine( 1167 $para->[1]{'start_line'}, 1168 "=over is the last thing in the document?!" 1169 ); 1170 return 1; # But feh, ignore it. 1171 } else { 1172 $list_type = 'block'; 1173 } 1174 $para->[1]{'~type'} = $list_type; 1175 push @$curr_open, $para; 1176 # yes, we reuse the paragraph as a stack item 1177 1178 my $content = join ' ', splice @$para, 2; 1179 my $overness; 1180 if($content =~ m/^\s*$/s) { 1181 $para->[1]{'indent'} = 4; 1182 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { 1183 no integer; 1184 $para->[1]{'indent'} = $1; 1185 if($1 == 0) { 1186 $self->whine( 1187 $para->[1]{'start_line'}, 1188 "Can't have a 0 in =over $content" 1189 ); 1190 $para->[1]{'indent'} = 4; 1191 } 1192 } else { 1193 $self->whine( 1194 $para->[1]{'start_line'}, 1195 "=over should be: '=over' or '=over positive_number'" 1196 ); 1197 $para->[1]{'indent'} = 4; 1198 } 1199 DEBUG > 1 and print "=over found of type $list_type\n"; 1200 1201 $self->{'content_seen'} ||= 1; 1202 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); 1203 1204 return; 1205} 1206 1207sub _ponder_back { 1208 my ($self,$para,$curr_open,$paras) = @_; 1209 # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? 1210 1211 my $content = join ' ', splice @$para, 2; 1212 if($content =~ m/\S/) { 1213 $self->whine( 1214 $para->[1]{'start_line'}, 1215 "=back doesn't take any parameters, but you said =back $content" 1216 ); 1217 } 1218 1219 if(@$curr_open and $curr_open->[-1][0] eq '=over') { 1220 DEBUG > 1 and print "=back happily closes matching =over\n"; 1221 # Expected case: we're closing the most recently opened thing 1222 #my $over = pop @$curr_open; 1223 $self->{'content_seen'} ||= 1; 1224 $self->_handle_element_end( my $scratch = 1225 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] 1226 ); 1227 } else { 1228 DEBUG > 1 and print "=back found without a matching =over. Stack: (", 1229 join(', ', map $_->[0], @$curr_open), ").\n"; 1230 $self->whine( 1231 $para->[1]{'start_line'}, 1232 '=back without =over' 1233 ); 1234 return 1; # and ignore it 1235 } 1236} 1237 1238sub _ponder_item { 1239 my ($self,$para,$curr_open,$paras) = @_; 1240 my $over; 1241 unless(@$curr_open and 1242 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { 1243 $self->whine( 1244 $para->[1]{'start_line'}, 1245 "'=item' outside of any '=over'" 1246 ); 1247 unshift @$paras, 1248 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], 1249 $para 1250 ; 1251 return 1; 1252 } 1253 1254 1255 my $over_type = $over->[1]{'~type'}; 1256 1257 if(!$over_type) { 1258 # Shouldn't happen1 1259 die "Typeless over in stack, starting at line " 1260 . $over->[1]{'start_line'}; 1261 1262 } elsif($over_type eq 'block') { 1263 unless($curr_open->[-1][1]{'~bitched_about'}) { 1264 $curr_open->[-1][1]{'~bitched_about'} = 1; 1265 $self->whine( 1266 $curr_open->[-1][1]{'start_line'}, 1267 "You can't have =items (as at line " 1268 . $para->[1]{'start_line'} 1269 . ") unless the first thing after the =over is an =item" 1270 ); 1271 } 1272 # Just turn it into a paragraph and reconsider it 1273 $para->[0] = '~Para'; 1274 unshift @$paras, $para; 1275 return 1; 1276 1277 } elsif($over_type eq 'text') { 1278 my $item_type = $self->_get_item_type($para); 1279 # That kills the content of the item if it's a number or bullet. 1280 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 1281 1282 if($item_type eq 'text') { 1283 # Nothing special needs doing for 'text' 1284 } elsif($item_type eq 'number' or $item_type eq 'bullet') { 1285 $self->whine( 1286 $para->[1]{'start_line'}, 1287 "Expected text after =item, not a $item_type" 1288 ); 1289 # Undo our clobbering: 1290 push @$para, $para->[1]{'~orig_content'}; 1291 delete $para->[1]{'number'}; 1292 # Only a PROPER item-number element is allowed 1293 # to have a number attribute. 1294 } else { 1295 die "Unhandled item type $item_type"; # should never happen 1296 } 1297 1298 # =item-text thingies don't need any assimilation, it seems. 1299 1300 } elsif($over_type eq 'number') { 1301 my $item_type = $self->_get_item_type($para); 1302 # That kills the content of the item if it's a number or bullet. 1303 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 1304 1305 my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; 1306 1307 if($item_type eq 'bullet') { 1308 # Hm, it's not numeric. Correct for this. 1309 $para->[1]{'number'} = $expected_value; 1310 $self->whine( 1311 $para->[1]{'start_line'}, 1312 "Expected '=item $expected_value'" 1313 ); 1314 push @$para, $para->[1]{'~orig_content'}; 1315 # restore the bullet, blocking the assimilation of next para 1316 1317 } elsif($item_type eq 'text') { 1318 # Hm, it's not numeric. Correct for this. 1319 $para->[1]{'number'} = $expected_value; 1320 $self->whine( 1321 $para->[1]{'start_line'}, 1322 "Expected '=item $expected_value'" 1323 ); 1324 # Text content will still be there and will block next ~Para 1325 1326 } elsif($item_type ne 'number') { 1327 die "Unknown item type $item_type"; # should never happen 1328 1329 } elsif($expected_value == $para->[1]{'number'}) { 1330 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; 1331 1332 } else { 1333 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, 1334 " instead of the expected value of $expected_value\n"; 1335 $self->whine( 1336 $para->[1]{'start_line'}, 1337 "You have '=item " . $para->[1]{'number'} . 1338 "' instead of the expected '=item $expected_value'" 1339 ); 1340 $para->[1]{'number'} = $expected_value; # correcting!! 1341 } 1342 1343 if(@$para == 2) { 1344 # For the cases where we /didn't/ push to @$para 1345 if($paras->[0][0] eq '~Para') { 1346 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 1347 push @$para, splice @{shift @$paras},2; 1348 } else { 1349 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 1350 push @$para, ''; # Just so it's not contentless 1351 } 1352 } 1353 1354 1355 } elsif($over_type eq 'bullet') { 1356 my $item_type = $self->_get_item_type($para); 1357 # That kills the content of the item if it's a number or bullet. 1358 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; 1359 1360 if($item_type eq 'bullet') { 1361 # as expected! 1362 1363 if( $para->[1]{'~_freaky_para_hack'} ) { 1364 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; 1365 push @$para, delete $para->[1]{'~_freaky_para_hack'}; 1366 } 1367 1368 } elsif($item_type eq 'number') { 1369 $self->whine( 1370 $para->[1]{'start_line'}, 1371 "Expected '=item *'" 1372 ); 1373 push @$para, $para->[1]{'~orig_content'}; 1374 # and block assimilation of the next paragraph 1375 delete $para->[1]{'number'}; 1376 # Only a PROPER item-number element is allowed 1377 # to have a number attribute. 1378 } elsif($item_type eq 'text') { 1379 $self->whine( 1380 $para->[1]{'start_line'}, 1381 "Expected '=item *'" 1382 ); 1383 # But doesn't need processing. But it'll block assimilation 1384 # of the next para. 1385 } else { 1386 die "Unhandled item type $item_type"; # should never happen 1387 } 1388 1389 if(@$para == 2) { 1390 # For the cases where we /didn't/ push to @$para 1391 if($paras->[0][0] eq '~Para') { 1392 DEBUG and print "Assimilating following ~Para content into $over_type item\n"; 1393 push @$para, splice @{shift @$paras},2; 1394 } else { 1395 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; 1396 push @$para, ''; # Just so it's not contentless 1397 } 1398 } 1399 1400 } else { 1401 die "Unhandled =over type \"$over_type\"?"; 1402 # Shouldn't happen! 1403 } 1404 $para->[0] .= '-' . $over_type; 1405 1406 return; 1407} 1408 1409sub _ponder_Plain { 1410 my ($self,$para) = @_; 1411 DEBUG and print " giving plain treatment...\n"; 1412 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) 1413 or $para->[1]{'~cooked'} 1414 ) { 1415 push @$para, 1416 @{$self->_make_treelet( 1417 join("\n", splice(@$para, 2)), 1418 $para->[1]{'start_line'} 1419 )}; 1420 } 1421 # Empty paragraphs don't need a treelet for any reason I can see. 1422 # And precooked paragraphs already have a treelet. 1423 return; 1424} 1425 1426sub _ponder_Verbatim { 1427 my ($self,$para) = @_; 1428 DEBUG and print " giving verbatim treatment...\n"; 1429 1430 $para->[1]{'xml:space'} = 'preserve'; 1431 1432 my $indent = $self->strip_verbatim_indent; 1433 if ($indent && ref $indent eq 'CODE') { 1434 my @shifted = (shift @{$para}, shift @{$para}); 1435 $indent = $indent->($para); 1436 unshift @{$para}, @shifted; 1437 } 1438 1439 for(my $i = 2; $i < @$para; $i++) { 1440 foreach my $line ($para->[$i]) { # just for aliasing 1441 # Strip indentation. 1442 $line =~ s/^\Q$indent// if $indent 1443 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); 1444 while( $line =~ 1445 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that 1446 # tabs are at every EIGHTH column. For portability, it has to be 1447 # one setting everywhere, and 8th wins. 1448 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e 1449 ) {} 1450 1451 # TODO: whinge about (or otherwise treat) unindented or overlong lines 1452 1453 } 1454 } 1455 1456 # Now the VerbatimFormatted hoodoo... 1457 if( $self->{'accept_codes'} and 1458 $self->{'accept_codes'}{'VerbatimFormatted'} 1459 ) { 1460 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } 1461 # Kill any number of terminal newlines 1462 $self->_verbatim_format($para); 1463 } elsif ($self->{'codes_in_verbatim'}) { 1464 push @$para, 1465 @{$self->_make_treelet( 1466 join("\n", splice(@$para, 2)), 1467 $para->[1]{'start_line'}, $para->[1]{'xml:space'} 1468 )}; 1469 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines 1470 } else { 1471 push @$para, join "\n", splice(@$para, 2) if @$para > 3; 1472 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines 1473 } 1474 return; 1475} 1476 1477sub _ponder_Data { 1478 my ($self,$para) = @_; 1479 DEBUG and print " giving data treatment...\n"; 1480 $para->[1]{'xml:space'} = 'preserve'; 1481 push @$para, join "\n", splice(@$para, 2) if @$para > 3; 1482 return; 1483} 1484 1485 1486 1487 1488########################################################################### 1489 1490sub _traverse_treelet_bit { # for use only by the routine above 1491 my($self, $name) = splice @_,0,2; 1492 1493 my $scratch; 1494 $self->_handle_element_start(($scratch=$name), shift @_); 1495 1496 while (@_) { 1497 my $x = shift; 1498 if (ref($x)) { 1499 &_traverse_treelet_bit($self, @$x); 1500 } else { 1501 $x .= shift while @_ && !ref($_[0]); 1502 $self->_handle_text($x); 1503 } 1504 } 1505 1506 $self->_handle_element_end($scratch=$name); 1507 return; 1508} 1509 1510#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1511 1512sub _closers_for_all_curr_open { 1513 my $self = $_[0]; 1514 my @closers; 1515 foreach my $still_open (@{ $self->{'curr_open'} || return }) { 1516 my @copy = @$still_open; 1517 $copy[1] = {%{ $copy[1] }}; 1518 #$copy[1]{'start_line'} = -1; 1519 if($copy[0] eq '=for') { 1520 $copy[0] = '=end'; 1521 } elsif($copy[0] eq '=over') { 1522 $self->whine( 1523 $still_open->[1]{start_line} , 1524 "=over without closing =back" 1525 ); 1526 1527 $copy[0] = '=back'; 1528 } else { 1529 die "I don't know how to auto-close an open $copy[0] region"; 1530 } 1531 1532 unless( @copy > 2 ) { 1533 push @copy, $copy[1]{'target'}; 1534 $copy[-1] = '' unless defined $copy[-1]; 1535 # since =over's don't have targets 1536 } 1537 1538 $copy[1]{'fake-closer'} = 1; 1539 1540 DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; 1541 unshift @closers, \@copy; 1542 } 1543 return @closers; 1544} 1545 1546#-------------------------------------------------------------------------- 1547 1548sub _verbatim_format { 1549 my($it, $p) = @_; 1550 1551 my $formatting; 1552 1553 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines 1554 DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; 1555 $p->[$i] .= "\n"; 1556 # Unlike with simple Verbatim blocks, we don't end up just doing 1557 # a join("\n", ...) on the contents, so we have to append a 1558 # newline to ever line, and then nix the last one later. 1559 } 1560 1561 if( DEBUG > 4 ) { 1562 print "<<\n"; 1563 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines 1564 print "_verbatim_format $i: $p->[$i]"; 1565 } 1566 print ">>\n"; 1567 } 1568 1569 for(my $i = $#$p; $i > 2; $i--) { 1570 # work backwards over the lines, except the first (#2) 1571 1572 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s 1573 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; 1574 # look at a formatty line preceding a nonformatty one 1575 DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; 1576 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { 1577 DEBUG > 5 and print " It's a formatty line. ", 1578 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; 1579 1580 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { 1581 DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; 1582 next; 1583 } else { 1584 DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; 1585 } 1586 } else { 1587 DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; 1588 next; 1589 } 1590 1591 # A formatty line has to have #: in the first two columns, and uses 1592 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. 1593 # Example: 1594 # What do you want? i like pie. [or whatever] 1595 # #:^^^^^^^^^^^^^^^^^ ///////////// 1596 1597 1598 DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; 1599 1600 $formatting = ' ' . $1; 1601 $formatting =~ s/\s+$//s; # nix trailing whitespace 1602 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op 1603 splice @$p,$i,1; # remove this line 1604 $i--; # don't consider next line 1605 next; 1606 } 1607 1608 if( length($formatting) >= length($p->[$i-1]) ) { 1609 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; 1610 } else { 1611 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); 1612 } 1613 # Make $formatting and the previous line be exactly the same length, 1614 # with $formatting having a " " as the last character. 1615 1616 DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; 1617 1618 1619 my @new_line; 1620 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { 1621 #print "Format matches $1\n"; 1622 1623 if($2) { 1624 #print "SKIPPING <$2>\n"; 1625 push @new_line, 1626 substr($p->[$i-1], pos($formatting)-length($1), length($1)); 1627 } else { 1628 #print "SNARING $+\n"; 1629 push @new_line, [ 1630 ( 1631 $3 ? 'VerbatimB' : 1632 $4 ? 'VerbatimI' : 1633 $5 ? 'VerbatimBI' : die("Should never get called") 1634 ), {}, 1635 substr($p->[$i-1], pos($formatting)-length($1), length($1)) 1636 ]; 1637 #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; 1638 } 1639 } 1640 my @nixed = 1641 splice @$p, $i-1, 2, @new_line; # replace myself and the next line 1642 DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; 1643 1644 DEBUG > 6 and print "New version of the above line is these tokens (", 1645 scalar(@new_line), "):", 1646 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; 1647 $i--; # So the next line we scrutinize is the line before the one 1648 # that we just went and formatted 1649 } 1650 1651 $p->[0] = 'VerbatimFormatted'; 1652 1653 # Collapse adjacent text nodes, just for kicks. 1654 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last 1655 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { 1656 DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; 1657 $p->[$i] .= splice @$p, $i+1, 1; # merge 1658 --$i; # and back up 1659 } 1660 } 1661 1662 # Now look for the last text token, and remove the terminal newline 1663 for( my $i = $#$p; $i >= 2; $i-- ) { 1664 # work backwards over the tokens, even the first 1665 if( !ref($p->[$i]) ) { 1666 if($p->[$i] =~ s/\n$//s) { 1667 DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; 1668 } else { 1669 DEBUG > 5 and print 1670 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; 1671 } 1672 last; # we only want the next one 1673 } 1674 } 1675 1676 return; 1677} 1678 1679 1680#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1681 1682 1683sub _treelet_from_formatting_codes { 1684 # Given a paragraph, returns a treelet. Full of scary tokenizing code. 1685 # Like [ '~Top', {'start_line' => $start_line}, 1686 # "I like ", 1687 # [ 'B', {}, "pie" ], 1688 # "!" 1689 # ] 1690 1691 my($self, $para, $start_line, $preserve_space) = @_; 1692 1693 my $treelet = ['~Top', {'start_line' => $start_line},]; 1694 1695 unless ($preserve_space || $self->{'preserve_whitespace'}) { 1696 $para =~ s/\s+/ /g; # collapse and trim all whitespace first. 1697 $para =~ s/ $//; 1698 $para =~ s/^ //; 1699 } 1700 1701 # Only apparent problem the above code is that N<< >> turns into 1702 # N<< >>. But then, word wrapping does that too! So don't do that! 1703 1704 my @stack; 1705 my @lineage = ($treelet); 1706 my $raw = ''; # raw content of L<> fcode before splitting/processing 1707 # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed 1708 # into just 1 ' '. Is this the regex's doing or 'raw's? 1709 my $inL = 0; 1710 1711 DEBUG > 4 and print "Paragraph:\n$para\n\n"; 1712 1713 # Here begins our frightening tokenizer RE. The following regex matches 1714 # text in four main parts: 1715 # 1716 # * Start-codes. The first alternative matches C< or C<<, the latter 1717 # followed by some whitespace. $1 will hold the entire start code 1718 # (including any space following a multiple-angle-bracket delimiter), 1719 # and $2 will hold only the additional brackets past the first in a 1720 # multiple-bracket delimiter. length($2) + 1 will be the number of 1721 # closing brackets we have to find. 1722 # 1723 # * Closing brackets. Match some amount of whitespace followed by 1724 # multiple close brackets. The logic to see if this closes anything 1725 # is down below. Note that in order to parse C<< >> correctly, we 1726 # have to use look-behind (?<=\s\s), since the match of the starting 1727 # code will have consumed the whitespace. 1728 # 1729 # * A single closing bracket, to close a simple code like C<>. 1730 # 1731 # * Something that isn't a start or end code. We have to be careful 1732 # about accepting whitespace, since perlpodspec says that any whitespace 1733 # before a multiple-bracket closing delimiter should be ignored. 1734 # 1735 while($para =~ 1736 m/\G 1737 (?: 1738 # Match starting codes, including the whitespace following a 1739 # multiple-delimiter start code. $1 gets the whole start code and 1740 # $2 gets all but one of the <s in the multiple-bracket case. 1741 ([A-Z]<(?:(<+)\s+)?) 1742 | 1743 # Match multiple-bracket end codes. $3 gets the whitespace that 1744 # should be discarded before an end bracket but kept in other cases 1745 # and $4 gets the end brackets themselves. 1746 (\s+|(?<=\s\s))(>{2,}) 1747 | 1748 (\s?>) # $5: simple end-codes 1749 | 1750 ( # $6: stuff containing no start-codes or end-codes 1751 (?: 1752 [^A-Z\s>] 1753 | 1754 (?: 1755 [A-Z](?!<) 1756 ) 1757 | 1758 # whitespace is ok, but we don't want to eat the whitespace before 1759 # a multiple-bracket end code. 1760 # NOTE: we may still have problems with e.g. S<< >> 1761 (?: 1762 \s(?!\s*>{2,}) 1763 ) 1764 )+ 1765 ) 1766 ) 1767 /xgo 1768 ) { 1769 DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; 1770 if(defined $1) { 1771 if(defined $2) { 1772 DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; 1773 push @stack, length($2) + 1; 1774 # length of the necessary complex end-code string 1775 } else { 1776 DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; 1777 push @stack, 0; # signal that we're looking for simple 1778 } 1779 push @lineage, [ substr($1,0,1), {}, ]; # new node object 1780 push @{ $lineage[-2] }, $lineage[-1]; 1781 if ('L' eq substr($1,0,1)) { 1782 $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator 1783 $inL = 1; 1784 } else { 1785 $raw .= $1 if $inL; 1786 } 1787 1788 } elsif(defined $4) { 1789 DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; 1790 # This is where it gets messy... 1791 if(! @stack) { 1792 # We saw " >>>>" but needed nothing. This is ALL just stuff then. 1793 DEBUG > 4 and print " But it's really just stuff.\n"; 1794 push @{ $lineage[-1] }, $3, $4; 1795 next; 1796 } elsif(!$stack[-1]) { 1797 # We saw " >>>>" but needed only ">". Back pos up. 1798 DEBUG > 4 and print " And that's more than we needed to close simple.\n"; 1799 push @{ $lineage[-1] }, $3; # That was a for-real space, too. 1800 pos($para) = pos($para) - length($4) + 1; 1801 } elsif($stack[-1] == length($4)) { 1802 # We found " >>>>", and it was exactly what we needed. Commonest case. 1803 DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; 1804 } elsif($stack[-1] < length($4)) { 1805 # We saw " >>>>" but needed only " >>". Back pos up. 1806 DEBUG > 4 and print " And that's more than we needed to close complex.\n"; 1807 pos($para) = pos($para) - length($4) + $stack[-1]; 1808 } else { 1809 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! 1810 DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; 1811 push @{ $lineage[-1] }, $3, $4; 1812 next; 1813 } 1814 #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; 1815 1816 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; 1817 # Keep the element from being childless 1818 1819 pop @stack; 1820 pop @lineage; 1821 1822 unless (@stack) { # not in an L if there are no open fcodes 1823 $inL = 0; 1824 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { 1825 $lineage[-1][-1][1]{'raw'} = $raw 1826 } 1827 } 1828 $raw .= $3.$4 if $inL; 1829 1830 } elsif(defined $5) { 1831 DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n"; 1832 1833 if(@stack and ! $stack[-1]) { 1834 # We're indeed expecting a simple end-code 1835 DEBUG > 4 and print " It's indeed an end-code.\n"; 1836 1837 if(length($5) == 2) { # There was a space there: " >" 1838 push @{ $lineage[-1] }, ' '; 1839 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element 1840 push @{ $lineage[-1] }, ''; # keep it from being really childless 1841 } 1842 1843 pop @stack; 1844 pop @lineage; 1845 } else { 1846 DEBUG > 4 and print " It's just stuff.\n"; 1847 push @{ $lineage[-1] }, $5; 1848 } 1849 1850 unless (@stack) { # not in an L if there are no open fcodes 1851 $inL = 0; 1852 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { 1853 $lineage[-1][-1][1]{'raw'} = $raw 1854 } 1855 } 1856 $raw .= $5 if $inL; 1857 1858 } elsif(defined $6) { 1859 DEBUG > 3 and print "Found stuff \"$6\"\n"; 1860 push @{ $lineage[-1] }, $6; 1861 $raw .= $6 if $inL; 1862 # XXX does not capture multiplace whitespaces -- 'raw' ends up with 1863 # at most 1 leading/trailing whitespace, why not all of it? 1864 1865 } else { 1866 # should never ever ever ever happen 1867 DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; 1868 die "SPORK 512512!"; 1869 } 1870 } 1871 1872 if(@stack) { # Uhoh, some sequences weren't closed. 1873 my $x= "..."; 1874 while(@stack) { 1875 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; 1876 # Hmmmmm! 1877 1878 my $code = (pop @lineage)->[0]; 1879 my $ender_length = pop @stack; 1880 if($ender_length) { 1881 --$ender_length; 1882 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); 1883 } else { 1884 $x = $code . "<$x>"; 1885 } 1886 } 1887 DEBUG > 1 and print "Unterminated $x sequence\n"; 1888 $self->whine($start_line, 1889 "Unterminated $x sequence", 1890 ); 1891 } 1892 1893 return $treelet; 1894} 1895 1896#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1897 1898sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) 1899 return stringify_lol($_[1]); 1900} 1901 1902sub stringify_lol { # function: stringify_lol($lol) 1903 my $string_form = ''; 1904 _stringify_lol( $_[0] => \$string_form ); 1905 return $string_form; 1906} 1907 1908sub _stringify_lol { # the real recursor 1909 my($lol, $to) = @_; 1910 for(my $i = 2; $i < @$lol; ++$i) { 1911 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { 1912 _stringify_lol( $lol->[$i], $to); # recurse! 1913 } else { 1914 $$to .= $lol->[$i]; 1915 } 1916 } 1917 return; 1918} 1919 1920#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 1921 1922sub _dump_curr_open { # return a string representation of the stack 1923 my $curr_open = $_[0]{'curr_open'}; 1924 1925 return '[empty]' unless @$curr_open; 1926 return join '; ', 1927 map {; 1928 ($_->[0] eq '=for') 1929 ? ( ($_->[1]{'~really'} || '=over') 1930 . ' ' . $_->[1]{'target'}) 1931 : $_->[0] 1932 } 1933 @$curr_open 1934 ; 1935} 1936 1937########################################################################### 1938my %pretty_form = ( 1939 "\a" => '\a', # ding! 1940 "\b" => '\b', # BS 1941 "\e" => '\e', # ESC 1942 "\f" => '\f', # FF 1943 "\t" => '\t', # tab 1944 "\cm" => '\cm', 1945 "\cj" => '\cj', 1946 "\n" => '\n', # probably overrides one of either \cm or \cj 1947 '"' => '\"', 1948 '\\' => '\\\\', 1949 '$' => '\\$', 1950 '@' => '\\@', 1951 '%' => '\\%', 1952 '#' => '\\#', 1953); 1954 1955sub pretty { # adopted from Class::Classless 1956 # Not the most brilliant routine, but passable. 1957 # Don't give it a cyclic data structure! 1958 my @stuff = @_; # copy 1959 my $x; 1960 my $out = 1961 # join ",\n" . 1962 join ", ", 1963 map {; 1964 if(!defined($_)) { 1965 "undef"; 1966 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { 1967 $x = "[ " . pretty(@$_) . " ]" ; 1968 $x; 1969 } elsif(ref($_) eq 'SCALAR') { 1970 $x = "\\" . pretty($$_) ; 1971 $x; 1972 } elsif(ref($_) eq 'HASH') { 1973 my $hr = $_; 1974 $x = "{" . join(", ", 1975 map(pretty($_) . '=>' . pretty($hr->{$_}), 1976 sort keys %$hr ) ) . "}" ; 1977 $x; 1978 } elsif(!length($_)) { q{''} # empty string 1979 } elsif( 1980 $_ eq '0' # very common case 1981 or( 1982 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s 1983 and $_ ne '-0' # the strange case that that RE lets thru 1984 ) 1985 ) { $_; 1986 } else { 1987 if( chr(65) eq 'A' ) { 1988 s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> 1989 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; 1990 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; 1991 } else { 1992 # We're in some crazy non-ASCII world! 1993 s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> 1994 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; 1995 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; 1996 } 1997 qq{"$_"}; 1998 } 1999 } @stuff; 2000 # $out =~ s/\n */ /g if length($out) < 75; 2001 return $out; 2002} 2003 2004#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2005 2006# A rather unsubtle method of blowing away all the state information 2007# from a parser object so it can be reused. Provided as a utility for 2008# backward compatibility in Pod::Man, etc. but not recommended for 2009# general use. 2010 2011sub reinit { 2012 my $self = shift; 2013 foreach (qw(source_dead source_filename doc_has_started 2014start_of_pod_block content_seen last_was_blank paras curr_open 2015line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen 2016Title)) { 2017 2018 delete $self->{$_}; 2019 } 2020} 2021 2022#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 20231; 2024 2025