1
2package Pod::Simple::BlackBox;
3#
4# "What's in the box?"  "Pain."
5#
6###########################################################################
7#
8# This is where all the scary things happen: parsing lines into
9#  paragraphs; and then into directives, verbatims, and then also
10#  turning formatting sequences into treelets.
11#
12# Are you really sure you want to read this code?
13#
14#-----------------------------------------------------------------------------
15#
16# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18# to call the proper callbacks on the treelets.
19#
20# Every node in a treelet is a ['name', {attrhash}, ...children...]
21
22use integer; # vroom!
23use strict;
24use Carp ();
25use vars qw($VERSION );
26$VERSION = '3.28';
27#use constant DEBUG => 7;
28BEGIN {
29  require Pod::Simple;
30  *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
31}
32
33#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
34
35sub parse_line { shift->parse_lines(@_) } # alias
36
37# - - -  Turn back now!  Run away!  - - -
38
39sub parse_lines {             # Usage: $parser->parse_lines(@lines)
40  # an undef means end-of-stream
41  my $self = shift;
42
43  my $code_handler = $self->{'code_handler'};
44  my $cut_handler  = $self->{'cut_handler'};
45  my $wl_handler   = $self->{'whiteline_handler'};
46  $self->{'line_count'} ||= 0;
47
48  my $scratch;
49
50  DEBUG > 4 and
51   print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
52
53  DEBUG > 5 and
54   print "#  About to parse lines: ",
55     join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
56
57  my $paras = ($self->{'paras'} ||= []);
58   # paragraph buffer.  Because we need to defer processing of =over
59   # directives and verbatim paragraphs.  We call _ponder_paragraph_buffer
60   # to process this.
61
62  $self->{'pod_para_count'} ||= 0;
63
64  my $line;
65  foreach my $source_line (@_) {
66    if( $self->{'source_dead'} ) {
67      DEBUG > 4 and print "# Source is dead.\n";
68      last;
69    }
70
71    unless( defined $source_line ) {
72      DEBUG > 4 and print "# Undef-line seen.\n";
73
74      push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
75      push @$paras, $paras->[-1], $paras->[-1];
76       # So that it definitely fills the buffer.
77      $self->{'source_dead'} = 1;
78      $self->_ponder_paragraph_buffer;
79      next;
80    }
81
82
83    if( $self->{'line_count'}++ ) {
84      ($line = $source_line) =~ tr/\n\r//d;
85       # If we don't have two vars, we'll end up with that there
86       # tr/// modding the (potentially read-only) original source line!
87
88    } else {
89      DEBUG > 2 and print "First line: [$source_line]\n";
90
91      if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
92        DEBUG and print "UTF-8 BOM seen.  Faking a '=encoding utf8'.\n";
93        $self->_handle_encoding_line( "=encoding utf8" );
94        delete $self->{'_processed_encoding'};
95        $line =~ tr/\n\r//d;
96
97      } elsif( $line =~ s/^\xFE\xFF//s ) {
98        DEBUG and print "Big-endian UTF-16 BOM seen.  Aborting parsing.\n";
99        $self->scream(
100          $self->{'line_count'},
101          "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
102        );
103        splice @_;
104        push @_, undef;
105        next;
106
107        # TODO: implement somehow?
108
109      } elsif( $line =~ s/^\xFF\xFE//s ) {
110        DEBUG and print "Little-endian UTF-16 BOM seen.  Aborting parsing.\n";
111        $self->scream(
112          $self->{'line_count'},
113          "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
114        );
115        splice @_;
116        push @_, undef;
117        next;
118
119        # TODO: implement somehow?
120
121      } else {
122        DEBUG > 2 and print "First line is BOM-less.\n";
123        ($line = $source_line) =~ tr/\n\r//d;
124      }
125    }
126
127    # Try to guess encoding. Inlined for performance reasons.
128    if(!$self->{'parse_characters'} && !$self->{'encoding'}
129      && ($self->{'in_pod'} || $line =~ /^=/s)
130      && $line =~ /[^\x00-\x7f]/
131    ) {
132      my $encoding = $line =~ /^[\x00-\x7f]*[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
133      $self->_handle_encoding_line( "=encoding $encoding" );
134      $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
135
136      my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;
137
138      $self->whine(
139        $self->{'line_count'},
140        "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
141      );
142    }
143
144    DEBUG > 5 and print "# Parsing line: [$line]\n";
145
146    if(!$self->{'in_pod'}) {
147      if($line =~ m/^=([a-zA-Z]+)/s) {
148        if($1 eq 'cut') {
149          $self->scream(
150            $self->{'line_count'},
151            "=cut found outside a pod block.  Skipping to next block."
152          );
153
154          ## Before there were errata sections in the world, it was
155          ## least-pessimal to abort processing the file.  But now we can
156          ## just barrel on thru (but still not start a pod block).
157          #splice @_;
158          #push @_, undef;
159
160          next;
161        } else {
162          $self->{'in_pod'} = $self->{'start_of_pod_block'}
163                            = $self->{'last_was_blank'}     = 1;
164          # And fall thru to the pod-mode block further down
165        }
166      } else {
167        DEBUG > 5 and print "# It's a code-line.\n";
168        $code_handler->(map $_, $line, $self->{'line_count'}, $self)
169         if $code_handler;
170        # Note: this may cause code to be processed out of order relative
171        #  to pods, but in order relative to cuts.
172
173        # Note also that we haven't yet applied the transcoding to $line
174        #  by time we call $code_handler!
175
176        if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
177          # That RE is from perlsyn, section "Plain Old Comments (Not!)",
178          #$fname = $2 if defined $2;
179          #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
180          DEBUG > 1 and print "# Setting nextline to $1\n";
181          $self->{'line_count'} = $1 - 1;
182        }
183
184        next;
185      }
186    }
187
188    # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
189    # Else we're in pod mode:
190
191    # Apply any necessary transcoding:
192    $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
193
194    # HERE WE CATCH =encoding EARLY!
195    if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
196      next if $self->parse_characters;   # Ignore this line
197      $line = $self->_handle_encoding_line( $line );
198    }
199
200    if($line =~ m/^=cut/s) {
201      # here ends the pod block, and therefore the previous pod para
202      DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
203      $self->{'in_pod'} = 0;
204      # ++$self->{'pod_para_count'};
205      $self->_ponder_paragraph_buffer();
206       # by now it's safe to consider the previous paragraph as done.
207      $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
208       if $cut_handler;
209
210      # TODO: add to docs: Note: this may cause cuts to be processed out
211      #  of order relative to pods, but in order relative to code.
212
213    } elsif($line =~ m/^(\s*)$/s) {  # it's a blank line
214      if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
215        $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
216          if $wl_handler;
217      }
218
219      if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
220        DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
221        push @{$paras->[-1]}, $line;
222      }  # otherwise it's not interesting
223
224      if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
225        DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
226      }
227
228      $self->{'last_was_blank'} = 1;
229
230    } elsif($self->{'last_was_blank'}) {  # A non-blank line starting a new para...
231
232      if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
233        # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
234        my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
235         # Note that in "=head1 foo", the WS is lost.
236         # Example: ['=head1', {'start_line' => 123}, ' foo']
237
238        ++$self->{'pod_para_count'};
239
240        $self->_ponder_paragraph_buffer();
241         # by now it's safe to consider the previous paragraph as done.
242
243        push @$paras, $new; # the new incipient paragraph
244        DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
245
246      } elsif($line =~ m/^\s/s) {
247
248        if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
249          DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
250          push @{$paras->[-1]}, $line;
251        } else {
252          ++$self->{'pod_para_count'};
253          $self->_ponder_paragraph_buffer();
254           # by now it's safe to consider the previous paragraph as done.
255          DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
256          push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
257        }
258      } else {
259        ++$self->{'pod_para_count'};
260        $self->_ponder_paragraph_buffer();
261         # by now it's safe to consider the previous paragraph as done.
262        push @$paras, ['~Para',  {'start_line' => $self->{'line_count'}}, $line];
263        DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
264      }
265      $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
266
267    } else {
268      # It's a non-blank line /continuing/ the current para
269      if(@$paras) {
270        DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
271        push @{$paras->[-1]}, $line;
272      } else {
273        # Unexpected case!
274        die "Continuing a paragraph but \@\$paras is empty?";
275      }
276      $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
277    }
278
279  } # ends the big while loop
280
281  DEBUG > 1 and print(pretty(@$paras), "\n");
282  return $self;
283}
284
285#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
286
287sub _handle_encoding_line {
288  my($self, $line) = @_;
289
290  return if $self->parse_characters;
291
292  # The point of this routine is to set $self->{'_transcoder'} as indicated.
293
294  return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
295  DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
296
297  my $e    = $1;
298  my $orig = $e;
299  push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
300
301  my $enc_error;
302
303  # Cf.   perldoc Encode   and   perldoc Encode::Supported
304
305  require Pod::Simple::Transcode;
306
307  if( $self->{'encoding'} ) {
308    my $norm_current = $self->{'encoding'};
309    my $norm_e = $e;
310    foreach my $that ($norm_current, $norm_e) {
311      $that =  lc($that);
312      $that =~ s/[-_]//g;
313    }
314    if($norm_current eq $norm_e) {
315      DEBUG > 1 and print "The '=encoding $orig' line is ",
316       "redundant.  ($norm_current eq $norm_e).  Ignoring.\n";
317      $enc_error = '';
318       # But that doesn't necessarily mean that the earlier one went okay
319    } else {
320      $enc_error = "Encoding is already set to " . $self->{'encoding'};
321      DEBUG > 1 and print $enc_error;
322    }
323  } elsif (
324    # OK, let's turn on the encoding
325    do {
326      DEBUG > 1 and print " Setting encoding to $e\n";
327      $self->{'encoding'} = $e;
328      1;
329    }
330    and $e eq 'HACKRAW'
331  ) {
332    DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
333
334  } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
335
336    die($enc_error = "WHAT? _transcoder is already set?!")
337     if $self->{'_transcoder'};   # should never happen
338    require Pod::Simple::Transcode;
339    $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
340    eval {
341      my @x = ('', "abc", "123");
342      $self->{'_transcoder'}->(@x);
343    };
344    $@ && die( $enc_error =
345      "Really unexpected error setting up encoding $e: $@\nAborting"
346    );
347    $self->{'detected_encoding'} = $e;
348
349  } else {
350    my @supported = Pod::Simple::Transcode::->all_encodings;
351
352    # Note unsupported, and complain
353    DEBUG and print " Encoding [$e] is unsupported.",
354      "\nSupporteds: @supported\n";
355    my $suggestion = '';
356
357    # Look for a near match:
358    my $norm = lc($e);
359    $norm =~ tr[-_][]d;
360    my $n;
361    foreach my $enc (@supported) {
362      $n = lc($enc);
363      $n =~ tr[-_][]d;
364      next unless $n eq $norm;
365      $suggestion = "  (Maybe \"$e\" should be \"$enc\"?)";
366      last;
367    }
368    my $encmodver = Pod::Simple::Transcode::->encmodver;
369    $enc_error = join '' =>
370      "This document probably does not appear as it should, because its ",
371      "\"=encoding $e\" line calls for an unsupported encoding.",
372      $suggestion, "  [$encmodver\'s supported encodings are: @supported]"
373    ;
374
375    $self->scream( $self->{'line_count'}, $enc_error );
376  }
377  push @{ $self->{'encoding_command_statuses'} }, $enc_error;
378  if (defined($self->{'_processed_encoding'})) {
379    # Should never happen
380    die "Nested processed encoding.";
381  }
382  $self->{'_processed_encoding'} = $orig;
383
384  return $line;
385}
386
387# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
388
389sub _handle_encoding_second_level {
390  # By time this is called, the encoding (if well formed) will already
391  #  have been acted one.
392  my($self, $para) = @_;
393  my @x = @$para;
394  my $content = join ' ', splice @x, 2;
395  $content =~ s/^\s+//s;
396  $content =~ s/\s+$//s;
397
398  DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
399
400  if (defined($self->{'_processed_encoding'})) {
401    #if($content ne $self->{'_processed_encoding'}) {
402    #  Could it happen?
403    #}
404    delete $self->{'_processed_encoding'};
405    # It's already been handled.  Check for errors.
406    if(! $self->{'encoding_command_statuses'} ) {
407      DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
408    } elsif( $self->{'encoding_command_statuses'}[-1] ) {
409      $self->whine( $para->[1]{'start_line'},
410        sprintf "Couldn't do %s: %s",
411          $self->{'encoding_command_reqs'  }[-1],
412          $self->{'encoding_command_statuses'}[-1],
413      );
414    } else {
415      DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
416    }
417
418  } else {
419    # Otherwise it's a syntax error
420    $self->whine( $para->[1]{'start_line'},
421      "Invalid =encoding syntax: $content"
422    );
423  }
424
425  return;
426}
427
428#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
429
430{
431my $m = -321;   # magic line number
432
433sub _gen_errata {
434  my $self = $_[0];
435  # Return 0 or more fake-o paragraphs explaining the accumulated
436  #  errors on this document.
437
438  return() unless $self->{'errata'} and keys %{$self->{'errata'}};
439
440  my @out;
441
442  foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
443    push @out,
444      ['=item', {'start_line' => $m}, "Around line $line:"],
445      map( ['~Para', {'start_line' => $m, '~cooked' => 1},
446        #['~Top', {'start_line' => $m},
447        $_
448        #]
449        ],
450        @{$self->{'errata'}{$line}}
451      )
452    ;
453  }
454
455  # TODO: report of unknown entities? unrenderable characters?
456
457  unshift @out,
458    ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
459    ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
460     "Hey! ",
461     ['B', {},
462      'The above document had some coding errors, which are explained below:'
463     ]
464    ],
465    ['=over',  {'start_line' => $m, 'errata' => 1}, ''],
466  ;
467
468  push @out,
469    ['=back',  {'start_line' => $m, 'errata' => 1}, ''],
470  ;
471
472  DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
473
474  return @out;
475}
476
477}
478
479#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
480
481##############################################################################
482##
483##  stop reading now stop reading now stop reading now stop reading now stop
484##
485##                         HERE IT BECOMES REALLY SCARY
486##
487##  stop reading now stop reading now stop reading now stop reading now stop
488##
489##############################################################################
490
491sub _ponder_paragraph_buffer {
492
493  # Para-token types as found in the buffer.
494  #   ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
495  #   =over, =back, =item
496  #   and the null =pod (to be complained about if over one line)
497  #
498  # "~data" paragraphs are something we generate at this level, depending on
499  # a currently open =over region
500
501  # Events fired:  Begin and end for:
502  #                   directivename (like head1 .. head4), item, extend,
503  #                   for (from =begin...=end, =for),
504  #                   over-bullet, over-number, over-text, over-block,
505  #                   item-bullet, item-number, item-text,
506  #                   Document,
507  #                   Data, Para, Verbatim
508  #                   B, C, longdirname (TODO -- wha?), etc. for all directives
509  #
510
511  my $self = $_[0];
512  my $paras;
513  return unless @{$paras = $self->{'paras'}};
514  my $curr_open = ($self->{'curr_open'} ||= []);
515
516  my $scratch;
517
518  DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
519
520  # We have something in our buffer.  So apparently the document has started.
521  unless($self->{'doc_has_started'}) {
522    $self->{'doc_has_started'} = 1;
523
524    my $starting_contentless;
525    $starting_contentless =
526     (
527       !@$curr_open
528       and @$paras and ! grep $_->[0] ne '~end', @$paras
529        # i.e., if the paras is all ~ends
530     )
531    ;
532    DEBUG and print "# Starting ",
533      $starting_contentless ? 'contentless' : 'contentful',
534      " document\n"
535    ;
536
537    $self->_handle_element_start(
538      ($scratch = 'Document'),
539      {
540        'start_line' => $paras->[0][1]{'start_line'},
541        $starting_contentless ? ( 'contentless' => 1 ) : (),
542      },
543    );
544  }
545
546  my($para, $para_type);
547  while(@$paras) {
548    last if @$paras == 1 and
549      ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
550        or $paras->[0][0] eq '=item' )
551    ;
552    # Those're the three kinds of paragraphs that require lookahead.
553    #   Actually, an "=item Foo" inside an <over type=text> region
554    #   and any =item inside an <over type=block> region (rare)
555    #   don't require any lookahead, but all others (bullets
556    #   and numbers) do.
557
558# TODO: whinge about many kinds of directives in non-resolving =for regions?
559# TODO: many?  like what?  =head1 etc?
560
561    $para = shift @$paras;
562    $para_type = $para->[0];
563
564    DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
565      $self->_dump_curr_open(), ")\n";
566
567    if($para_type eq '=for') {
568      next if $self->_ponder_for($para,$curr_open,$paras);
569
570    } elsif($para_type eq '=begin') {
571      next if $self->_ponder_begin($para,$curr_open,$paras);
572
573    } elsif($para_type eq '=end') {
574      next if $self->_ponder_end($para,$curr_open,$paras);
575
576    } elsif($para_type eq '~end') { # The virtual end-document signal
577      next if $self->_ponder_doc_end($para,$curr_open,$paras);
578    }
579
580
581    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
582    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
583    if(grep $_->[1]{'~ignore'}, @$curr_open) {
584      DEBUG > 1 and
585       print "Skipping $para_type paragraph because in ignore mode.\n";
586      next;
587    }
588    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
589    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
590
591    if($para_type eq '=pod') {
592      $self->_ponder_pod($para,$curr_open,$paras);
593
594    } elsif($para_type eq '=over') {
595      next if $self->_ponder_over($para,$curr_open,$paras);
596
597    } elsif($para_type eq '=back') {
598      next if $self->_ponder_back($para,$curr_open,$paras);
599
600    } else {
601
602      # All non-magical codes!!!
603
604      # Here we start using $para_type for our own twisted purposes, to
605      #  mean how it should get treated, not as what the element name
606      #  should be.
607
608      DEBUG > 1 and print "Pondering non-magical $para_type\n";
609
610      my $i;
611
612      # Enforce some =headN discipline
613      if($para_type =~ m/^=head\d$/s
614         and ! $self->{'accept_heads_anywhere'}
615         and @$curr_open
616         and $curr_open->[-1][0] eq '=over'
617      ) {
618        DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
619        $self->whine(
620          $para->[1]{'start_line'},
621          "You forgot a '=back' before '$para_type'"
622        );
623        unshift @$paras, ['=back', {}, ''], $para;   # close the =over
624        next;
625      }
626
627
628      if($para_type eq '=item') {
629
630        my $over;
631        unless(@$curr_open and
632               $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
633          $self->whine(
634            $para->[1]{'start_line'},
635            "'=item' outside of any '=over'"
636          );
637          unshift @$paras,
638            ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
639            $para
640          ;
641          next;
642        }
643
644
645        my $over_type = $over->[1]{'~type'};
646
647        if(!$over_type) {
648          # Shouldn't happen1
649          die "Typeless over in stack, starting at line "
650           . $over->[1]{'start_line'};
651
652        } elsif($over_type eq 'block') {
653          unless($curr_open->[-1][1]{'~bitched_about'}) {
654            $curr_open->[-1][1]{'~bitched_about'} = 1;
655            $self->whine(
656              $curr_open->[-1][1]{'start_line'},
657              "You can't have =items (as at line "
658              . $para->[1]{'start_line'}
659              . ") unless the first thing after the =over is an =item"
660            );
661          }
662          # Just turn it into a paragraph and reconsider it
663          $para->[0] = '~Para';
664          unshift @$paras, $para;
665          next;
666
667        } elsif($over_type eq 'text') {
668          my $item_type = $self->_get_item_type($para);
669            # That kills the content of the item if it's a number or bullet.
670          DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
671
672          if($item_type eq 'text') {
673            # Nothing special needs doing for 'text'
674          } elsif($item_type eq 'number' or $item_type eq 'bullet') {
675            $self->whine(
676              $para->[1]{'start_line'},
677              "Expected text after =item, not a $item_type"
678            );
679            # Undo our clobbering:
680            push @$para, $para->[1]{'~orig_content'};
681            delete $para->[1]{'number'};
682             # Only a PROPER item-number element is allowed
683             #  to have a number attribute.
684          } else {
685            die "Unhandled item type $item_type"; # should never happen
686          }
687
688          # =item-text thingies don't need any assimilation, it seems.
689
690        } elsif($over_type eq 'number') {
691          my $item_type = $self->_get_item_type($para);
692            # That kills the content of the item if it's a number or bullet.
693          DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
694
695          my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
696
697          if($item_type eq 'bullet') {
698            # Hm, it's not numeric.  Correct for this.
699            $para->[1]{'number'} = $expected_value;
700            $self->whine(
701              $para->[1]{'start_line'},
702              "Expected '=item $expected_value'"
703            );
704            push @$para, $para->[1]{'~orig_content'};
705              # restore the bullet, blocking the assimilation of next para
706
707          } elsif($item_type eq 'text') {
708            # Hm, it's not numeric.  Correct for this.
709            $para->[1]{'number'} = $expected_value;
710            $self->whine(
711              $para->[1]{'start_line'},
712              "Expected '=item $expected_value'"
713            );
714            # Text content will still be there and will block next ~Para
715
716          } elsif($item_type ne 'number') {
717            die "Unknown item type $item_type"; # should never happen
718
719          } elsif($expected_value == $para->[1]{'number'}) {
720            DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
721
722          } else {
723            DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
724             " instead of the expected value of $expected_value\n";
725            $self->whine(
726              $para->[1]{'start_line'},
727              "You have '=item " . $para->[1]{'number'} .
728              "' instead of the expected '=item $expected_value'"
729            );
730            $para->[1]{'number'} = $expected_value;  # correcting!!
731          }
732
733          if(@$para == 2) {
734            # For the cases where we /didn't/ push to @$para
735            if($paras->[0][0] eq '~Para') {
736              DEBUG and print "Assimilating following ~Para content into $over_type item\n";
737              push @$para, splice @{shift @$paras},2;
738            } else {
739              DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
740              push @$para, '';  # Just so it's not contentless
741            }
742          }
743
744
745        } elsif($over_type eq 'bullet') {
746          my $item_type = $self->_get_item_type($para);
747            # That kills the content of the item if it's a number or bullet.
748          DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
749
750          if($item_type eq 'bullet') {
751            # as expected!
752
753            if( $para->[1]{'~_freaky_para_hack'} ) {
754              DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
755              push @$para, delete $para->[1]{'~_freaky_para_hack'};
756            }
757
758          } elsif($item_type eq 'number') {
759            $self->whine(
760              $para->[1]{'start_line'},
761              "Expected '=item *'"
762            );
763            push @$para, $para->[1]{'~orig_content'};
764             # and block assimilation of the next paragraph
765            delete $para->[1]{'number'};
766             # Only a PROPER item-number element is allowed
767             #  to have a number attribute.
768          } elsif($item_type eq 'text') {
769            $self->whine(
770              $para->[1]{'start_line'},
771              "Expected '=item *'"
772            );
773             # But doesn't need processing.  But it'll block assimilation
774             #  of the next para.
775          } else {
776            die "Unhandled item type $item_type"; # should never happen
777          }
778
779          if(@$para == 2) {
780            # For the cases where we /didn't/ push to @$para
781            if($paras->[0][0] eq '~Para') {
782              DEBUG and print "Assimilating following ~Para content into $over_type item\n";
783              push @$para, splice @{shift @$paras},2;
784            } else {
785              DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
786              push @$para, '';  # Just so it's not contentless
787            }
788          }
789
790        } else {
791          die "Unhandled =over type \"$over_type\"?";
792          # Shouldn't happen!
793        }
794
795        $para_type = 'Plain';
796        $para->[0] .= '-' . $over_type;
797        # Whew.  Now fall thru and process it.
798
799
800      } elsif($para_type eq '=extend') {
801        # Well, might as well implement it here.
802        $self->_ponder_extend($para);
803        next;  # and skip
804      } elsif($para_type eq '=encoding') {
805        # Not actually acted on here, but we catch errors here.
806        $self->_handle_encoding_second_level($para);
807        next unless $self->keep_encoding_directive;
808        $para_type = 'Plain';
809      } elsif($para_type eq '~Verbatim') {
810        $para->[0] = 'Verbatim';
811        $para_type = '?Verbatim';
812      } elsif($para_type eq '~Para') {
813        $para->[0] = 'Para';
814        $para_type = '?Plain';
815      } elsif($para_type eq 'Data') {
816        $para->[0] = 'Data';
817        $para_type = '?Data';
818      } elsif( $para_type =~ s/^=//s
819        and defined( $para_type = $self->{'accept_directives'}{$para_type} )
820      ) {
821        DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
822      } else {
823        # An unknown directive!
824        DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
825         $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
826        ;
827        $self->whine(
828          $para->[1]{'start_line'},
829          "Unknown directive: $para->[0]"
830        );
831
832        # And maybe treat it as text instead of just letting it go?
833        next;
834      }
835
836      if($para_type =~ s/^\?//s) {
837        if(! @$curr_open) {  # usual case
838          DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
839        } else {
840          my @fors = grep $_->[0] eq '=for', @$curr_open;
841          DEBUG > 1 and print "Containing fors: ",
842            join(',', map $_->[1]{'target'}, @fors), "\n";
843
844          if(! @fors) {
845            DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
846
847          #} elsif(grep $_->[1]{'~resolve'}, @fors) {
848          #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
849          } elsif( $fors[-1][1]{'~resolve'} ) {
850            # Look to the immediately containing for
851
852            if($para_type eq 'Data') {
853              DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
854              $para->[0] = 'Para';
855              $para_type = 'Plain';
856            } else {
857              DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
858            }
859          } else {
860            DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
861            $para->[0] = $para_type = 'Data';
862          }
863        }
864      }
865
866      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
867      if($para_type eq 'Plain') {
868        $self->_ponder_Plain($para);
869      } elsif($para_type eq 'Verbatim') {
870        $self->_ponder_Verbatim($para);
871      } elsif($para_type eq 'Data') {
872        $self->_ponder_Data($para);
873      } else {
874        die "\$para type is $para_type -- how did that happen?";
875        # Shouldn't happen.
876      }
877
878      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
879      $para->[0] =~ s/^[~=]//s;
880
881      DEBUG and print "\n", pretty($para), "\n";
882
883      # traverse the treelet (which might well be just one string scalar)
884      $self->{'content_seen'} ||= 1;
885      $self->_traverse_treelet_bit(@$para);
886    }
887  }
888
889  return;
890}
891
892###########################################################################
893# The sub-ponderers...
894
895
896
897sub _ponder_for {
898  my ($self,$para,$curr_open,$paras) = @_;
899
900  # Fake it out as a begin/end
901  my $target;
902
903  if(grep $_->[1]{'~ignore'}, @$curr_open) {
904    DEBUG > 1 and print "Ignoring ignorable =for\n";
905    return 1;
906  }
907
908  for(my $i = 2; $i < @$para; ++$i) {
909    if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
910      $target = $1;
911      last;
912    }
913  }
914  unless(defined $target) {
915    $self->whine(
916      $para->[1]{'start_line'},
917      "=for without a target?"
918    );
919    return 1;
920  }
921  DEBUG > 1 and
922   print "Faking out a =for $target as a =begin $target / =end $target\n";
923
924  $para->[0] = 'Data';
925
926  unshift @$paras,
927    ['=begin',
928      {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
929      $target,
930    ],
931    $para,
932    ['=end',
933      {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
934      $target,
935    ],
936  ;
937
938  return 1;
939}
940
941sub _ponder_begin {
942  my ($self,$para,$curr_open,$paras) = @_;
943  my $content = join ' ', splice @$para, 2;
944  $content =~ s/^\s+//s;
945  $content =~ s/\s+$//s;
946  unless(length($content)) {
947    $self->whine(
948      $para->[1]{'start_line'},
949      "=begin without a target?"
950    );
951    DEBUG and print "Ignoring targetless =begin\n";
952    return 1;
953  }
954
955  my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
956  $para->[1]{'title'} = $title if ($title);
957  $para->[1]{'target'} = $target;  # without any ':'
958  $content = $target; # strip off the title
959
960  $content =~ s/^:!/!:/s;
961  my $neg;  # whether this is a negation-match
962  $neg = 1        if $content =~ s/^!//s;
963  my $to_resolve;  # whether to process formatting codes
964  $to_resolve = 1 if $content =~ s/^://s;
965
966  my $dont_ignore; # whether this target matches us
967
968  foreach my $target_name (
969    split(',', $content, -1),
970    $neg ? () : '*'
971  ) {
972    DEBUG > 2 and
973     print " Considering whether =begin $content matches $target_name\n";
974    next unless $self->{'accept_targets'}{$target_name};
975
976    DEBUG > 2 and
977     print "  It DOES match the acceptable target $target_name!\n";
978    $to_resolve = 1
979      if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
980    $dont_ignore = 1;
981    $para->[1]{'target_matching'} = $target_name;
982    last; # stop looking at other target names
983  }
984
985  if($neg) {
986    if( $dont_ignore ) {
987      $dont_ignore = '';
988      delete $para->[1]{'target_matching'};
989      DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
990    } else {
991      $dont_ignore = 1;
992      $para->[1]{'target_matching'} = '!';
993      DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
994    }
995  }
996
997  $para->[0] = '=for';  # Just what we happen to call these, internally
998  $para->[1]{'~really'} ||= '=begin';
999  $para->[1]{'~ignore'}   = (! $dont_ignore) || 0;
1000  $para->[1]{'~resolve'}  = $to_resolve || 0;
1001
1002  DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
1003    "ignore contents of this region\n";
1004  DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
1005    ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1006  DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
1007
1008  push @$curr_open, $para;
1009  if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1010    DEBUG > 1 and print "Ignoring ignorable =begin\n";
1011  } else {
1012    $self->{'content_seen'} ||= 1;
1013    $self->_handle_element_start((my $scratch='for'), $para->[1]);
1014  }
1015
1016  return 1;
1017}
1018
1019sub _ponder_end {
1020  my ($self,$para,$curr_open,$paras) = @_;
1021  my $content = join ' ', splice @$para, 2;
1022  $content =~ s/^\s+//s;
1023  $content =~ s/\s+$//s;
1024  DEBUG and print "Ogling '=end $content' directive\n";
1025
1026  unless(length($content)) {
1027    $self->whine(
1028      $para->[1]{'start_line'},
1029      "'=end' without a target?" . (
1030        ( @$curr_open and $curr_open->[-1][0] eq '=for' )
1031        ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1032        : ''
1033      )
1034    );
1035    DEBUG and print "Ignoring targetless =end\n";
1036    return 1;
1037  }
1038
1039  unless($content =~ m/^\S+$/) {  # i.e., unless it's one word
1040    $self->whine(
1041      $para->[1]{'start_line'},
1042      "'=end $content' is invalid.  (Stack: "
1043      . $self->_dump_curr_open() . ')'
1044    );
1045    DEBUG and print "Ignoring mistargetted =end $content\n";
1046    return 1;
1047  }
1048
1049  unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1050    $self->whine(
1051      $para->[1]{'start_line'},
1052      "=end $content without matching =begin.  (Stack: "
1053      . $self->_dump_curr_open() . ')'
1054    );
1055    DEBUG and print "Ignoring mistargetted =end $content\n";
1056    return 1;
1057  }
1058
1059  unless($content eq $curr_open->[-1][1]{'target'}) {
1060    $self->whine(
1061      $para->[1]{'start_line'},
1062      "=end $content doesn't match =begin "
1063      . $curr_open->[-1][1]{'target'}
1064      . ".  (Stack: "
1065      . $self->_dump_curr_open() . ')'
1066    );
1067    DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1068    return 1;
1069  }
1070
1071  # Else it's okay to close...
1072  if(grep $_->[1]{'~ignore'}, @$curr_open) {
1073    DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1074    # And that may be because of this to-be-closed =for region, or some
1075    #  other one, but it doesn't matter.
1076  } else {
1077    $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1078      # what's that for?
1079
1080    $self->{'content_seen'} ||= 1;
1081    $self->_handle_element_end( my $scratch = 'for', $para->[1]);
1082  }
1083  DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1084  pop @$curr_open;
1085
1086  return 1;
1087}
1088
1089sub _ponder_doc_end {
1090  my ($self,$para,$curr_open,$paras) = @_;
1091  if(@$curr_open) { # Deal with things left open
1092    DEBUG and print "Stack is nonempty at end-document: (",
1093      $self->_dump_curr_open(), ")\n";
1094
1095    DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1096    unshift @$paras, $self->_closers_for_all_curr_open;
1097    # Make sure there is exactly one ~end in the parastack, at the end:
1098    @$paras = grep $_->[0] ne '~end', @$paras;
1099    push @$paras, $para, $para;
1100     # We need two -- once for the next cycle where we
1101     #  generate errata, and then another to be at the end
1102     #  when that loop back around to process the errata.
1103    return 1;
1104
1105  } else {
1106    DEBUG and print "Okay, stack is empty now.\n";
1107  }
1108
1109  # Try generating errata section, if applicable
1110  unless($self->{'~tried_gen_errata'}) {
1111    $self->{'~tried_gen_errata'} = 1;
1112    my @extras = $self->_gen_errata();
1113    if(@extras) {
1114      unshift @$paras, @extras;
1115      DEBUG and print "Generated errata... relooping...\n";
1116      return 1;  # I.e., loop around again to process these fake-o paragraphs
1117    }
1118  }
1119
1120  splice @$paras; # Well, that's that for this paragraph buffer.
1121  DEBUG and print "Throwing end-document event.\n";
1122
1123  $self->_handle_element_end( my $scratch = 'Document' );
1124  return 1; # Hasta la byebye
1125}
1126
1127sub _ponder_pod {
1128  my ($self,$para,$curr_open,$paras) = @_;
1129  $self->whine(
1130    $para->[1]{'start_line'},
1131    "=pod directives shouldn't be over one line long!  Ignoring all "
1132     . (@$para - 2) . " lines of content"
1133  ) if @$para > 3;
1134
1135  # Content ignored unless 'pod_handler' is set
1136  if (my $pod_handler = $self->{'pod_handler'}) {
1137      my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1138      $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1139      $pod_handler->($line, $line_num, $self);
1140  }
1141
1142  # The surrounding methods set content_seen, so let us remain consistent.
1143  # I do not know why it was not here before -- should it not be here?
1144  # $self->{'content_seen'} ||= 1;
1145
1146  return;
1147}
1148
1149sub _ponder_over {
1150  my ($self,$para,$curr_open,$paras) = @_;
1151  return 1 unless @$paras;
1152  my $list_type;
1153
1154  if($paras->[0][0] eq '=item') { # most common case
1155    $list_type = $self->_get_initial_item_type($paras->[0]);
1156
1157  } elsif($paras->[0][0] eq '=back') {
1158    # Ignore empty lists by default
1159    if ($self->{'parse_empty_lists'}) {
1160      $list_type = 'empty';
1161    } else {
1162      shift @$paras;
1163      return 1;
1164    }
1165  } elsif($paras->[0][0] eq '~end') {
1166    $self->whine(
1167      $para->[1]{'start_line'},
1168      "=over is the last thing in the document?!"
1169    );
1170    return 1; # But feh, ignore it.
1171  } else {
1172    $list_type = 'block';
1173  }
1174  $para->[1]{'~type'} = $list_type;
1175  push @$curr_open, $para;
1176   # yes, we reuse the paragraph as a stack item
1177
1178  my $content = join ' ', splice @$para, 2;
1179  my $overness;
1180  if($content =~ m/^\s*$/s) {
1181    $para->[1]{'indent'} = 4;
1182  } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1183    no integer;
1184    $para->[1]{'indent'} = $1;
1185    if($1 == 0) {
1186      $self->whine(
1187        $para->[1]{'start_line'},
1188        "Can't have a 0 in =over $content"
1189      );
1190      $para->[1]{'indent'} = 4;
1191    }
1192  } else {
1193    $self->whine(
1194      $para->[1]{'start_line'},
1195      "=over should be: '=over' or '=over positive_number'"
1196    );
1197    $para->[1]{'indent'} = 4;
1198  }
1199  DEBUG > 1 and print "=over found of type $list_type\n";
1200
1201  $self->{'content_seen'} ||= 1;
1202  $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1203
1204  return;
1205}
1206
1207sub _ponder_back {
1208  my ($self,$para,$curr_open,$paras) = @_;
1209  # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1210
1211  my $content = join ' ', splice @$para, 2;
1212  if($content =~ m/\S/) {
1213    $self->whine(
1214      $para->[1]{'start_line'},
1215      "=back doesn't take any parameters, but you said =back $content"
1216    );
1217  }
1218
1219  if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1220    DEBUG > 1 and print "=back happily closes matching =over\n";
1221    # Expected case: we're closing the most recently opened thing
1222    #my $over = pop @$curr_open;
1223    $self->{'content_seen'} ||= 1;
1224    $self->_handle_element_end( my $scratch =
1225      'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1226    );
1227  } else {
1228    DEBUG > 1 and print "=back found without a matching =over.  Stack: (",
1229        join(', ', map $_->[0], @$curr_open), ").\n";
1230    $self->whine(
1231      $para->[1]{'start_line'},
1232      '=back without =over'
1233    );
1234    return 1; # and ignore it
1235  }
1236}
1237
1238sub _ponder_item {
1239  my ($self,$para,$curr_open,$paras) = @_;
1240  my $over;
1241  unless(@$curr_open and
1242         $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1243    $self->whine(
1244      $para->[1]{'start_line'},
1245      "'=item' outside of any '=over'"
1246    );
1247    unshift @$paras,
1248      ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1249      $para
1250    ;
1251    return 1;
1252  }
1253
1254
1255  my $over_type = $over->[1]{'~type'};
1256
1257  if(!$over_type) {
1258    # Shouldn't happen1
1259    die "Typeless over in stack, starting at line "
1260     . $over->[1]{'start_line'};
1261
1262  } elsif($over_type eq 'block') {
1263    unless($curr_open->[-1][1]{'~bitched_about'}) {
1264      $curr_open->[-1][1]{'~bitched_about'} = 1;
1265      $self->whine(
1266        $curr_open->[-1][1]{'start_line'},
1267        "You can't have =items (as at line "
1268        . $para->[1]{'start_line'}
1269        . ") unless the first thing after the =over is an =item"
1270      );
1271    }
1272    # Just turn it into a paragraph and reconsider it
1273    $para->[0] = '~Para';
1274    unshift @$paras, $para;
1275    return 1;
1276
1277  } elsif($over_type eq 'text') {
1278    my $item_type = $self->_get_item_type($para);
1279      # That kills the content of the item if it's a number or bullet.
1280    DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1281
1282    if($item_type eq 'text') {
1283      # Nothing special needs doing for 'text'
1284    } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1285      $self->whine(
1286          $para->[1]{'start_line'},
1287          "Expected text after =item, not a $item_type"
1288      );
1289      # Undo our clobbering:
1290      push @$para, $para->[1]{'~orig_content'};
1291      delete $para->[1]{'number'};
1292       # Only a PROPER item-number element is allowed
1293       #  to have a number attribute.
1294    } else {
1295      die "Unhandled item type $item_type"; # should never happen
1296    }
1297
1298    # =item-text thingies don't need any assimilation, it seems.
1299
1300  } elsif($over_type eq 'number') {
1301    my $item_type = $self->_get_item_type($para);
1302      # That kills the content of the item if it's a number or bullet.
1303    DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1304
1305    my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1306
1307    if($item_type eq 'bullet') {
1308      # Hm, it's not numeric.  Correct for this.
1309      $para->[1]{'number'} = $expected_value;
1310      $self->whine(
1311        $para->[1]{'start_line'},
1312        "Expected '=item $expected_value'"
1313      );
1314      push @$para, $para->[1]{'~orig_content'};
1315        # restore the bullet, blocking the assimilation of next para
1316
1317    } elsif($item_type eq 'text') {
1318      # Hm, it's not numeric.  Correct for this.
1319      $para->[1]{'number'} = $expected_value;
1320      $self->whine(
1321        $para->[1]{'start_line'},
1322        "Expected '=item $expected_value'"
1323      );
1324      # Text content will still be there and will block next ~Para
1325
1326    } elsif($item_type ne 'number') {
1327      die "Unknown item type $item_type"; # should never happen
1328
1329    } elsif($expected_value == $para->[1]{'number'}) {
1330      DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1331
1332    } else {
1333      DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1334       " instead of the expected value of $expected_value\n";
1335      $self->whine(
1336        $para->[1]{'start_line'},
1337        "You have '=item " . $para->[1]{'number'} .
1338        "' instead of the expected '=item $expected_value'"
1339      );
1340      $para->[1]{'number'} = $expected_value;  # correcting!!
1341    }
1342
1343    if(@$para == 2) {
1344      # For the cases where we /didn't/ push to @$para
1345      if($paras->[0][0] eq '~Para') {
1346        DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1347        push @$para, splice @{shift @$paras},2;
1348      } else {
1349        DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1350        push @$para, '';  # Just so it's not contentless
1351      }
1352    }
1353
1354
1355  } elsif($over_type eq 'bullet') {
1356    my $item_type = $self->_get_item_type($para);
1357      # That kills the content of the item if it's a number or bullet.
1358    DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1359
1360    if($item_type eq 'bullet') {
1361      # as expected!
1362
1363      if( $para->[1]{'~_freaky_para_hack'} ) {
1364        DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1365        push @$para, delete $para->[1]{'~_freaky_para_hack'};
1366      }
1367
1368    } elsif($item_type eq 'number') {
1369      $self->whine(
1370        $para->[1]{'start_line'},
1371        "Expected '=item *'"
1372      );
1373      push @$para, $para->[1]{'~orig_content'};
1374       # and block assimilation of the next paragraph
1375      delete $para->[1]{'number'};
1376       # Only a PROPER item-number element is allowed
1377       #  to have a number attribute.
1378    } elsif($item_type eq 'text') {
1379      $self->whine(
1380        $para->[1]{'start_line'},
1381        "Expected '=item *'"
1382      );
1383       # But doesn't need processing.  But it'll block assimilation
1384       #  of the next para.
1385    } else {
1386      die "Unhandled item type $item_type"; # should never happen
1387    }
1388
1389    if(@$para == 2) {
1390      # For the cases where we /didn't/ push to @$para
1391      if($paras->[0][0] eq '~Para') {
1392        DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1393        push @$para, splice @{shift @$paras},2;
1394      } else {
1395        DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1396        push @$para, '';  # Just so it's not contentless
1397      }
1398    }
1399
1400  } else {
1401    die "Unhandled =over type \"$over_type\"?";
1402    # Shouldn't happen!
1403  }
1404  $para->[0] .= '-' . $over_type;
1405
1406  return;
1407}
1408
1409sub _ponder_Plain {
1410  my ($self,$para) = @_;
1411  DEBUG and print " giving plain treatment...\n";
1412  unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1413    or $para->[1]{'~cooked'}
1414  ) {
1415    push @$para,
1416    @{$self->_make_treelet(
1417      join("\n", splice(@$para, 2)),
1418      $para->[1]{'start_line'}
1419    )};
1420  }
1421  # Empty paragraphs don't need a treelet for any reason I can see.
1422  # And precooked paragraphs already have a treelet.
1423  return;
1424}
1425
1426sub _ponder_Verbatim {
1427  my ($self,$para) = @_;
1428  DEBUG and print " giving verbatim treatment...\n";
1429
1430  $para->[1]{'xml:space'} = 'preserve';
1431
1432  my $indent = $self->strip_verbatim_indent;
1433  if ($indent && ref $indent eq 'CODE') {
1434      my @shifted = (shift @{$para}, shift @{$para});
1435      $indent = $indent->($para);
1436      unshift @{$para}, @shifted;
1437  }
1438
1439  for(my $i = 2; $i < @$para; $i++) {
1440    foreach my $line ($para->[$i]) { # just for aliasing
1441      # Strip indentation.
1442      $line =~ s/^\Q$indent// if $indent
1443          && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1444      while( $line =~
1445        # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1446        # tabs are at every EIGHTH column.  For portability, it has to be
1447        # one setting everywhere, and 8th wins.
1448        s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1449      ) {}
1450
1451      # TODO: whinge about (or otherwise treat) unindented or overlong lines
1452
1453    }
1454  }
1455
1456  # Now the VerbatimFormatted hoodoo...
1457  if( $self->{'accept_codes'} and
1458      $self->{'accept_codes'}{'VerbatimFormatted'}
1459  ) {
1460    while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1461     # Kill any number of terminal newlines
1462    $self->_verbatim_format($para);
1463  } elsif ($self->{'codes_in_verbatim'}) {
1464    push @$para,
1465    @{$self->_make_treelet(
1466      join("\n", splice(@$para, 2)),
1467      $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1468    )};
1469    $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1470  } else {
1471    push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1472    $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1473  }
1474  return;
1475}
1476
1477sub _ponder_Data {
1478  my ($self,$para) = @_;
1479  DEBUG and print " giving data treatment...\n";
1480  $para->[1]{'xml:space'} = 'preserve';
1481  push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1482  return;
1483}
1484
1485
1486
1487
1488###########################################################################
1489
1490sub _traverse_treelet_bit {  # for use only by the routine above
1491  my($self, $name) = splice @_,0,2;
1492
1493  my $scratch;
1494  $self->_handle_element_start(($scratch=$name), shift @_);
1495
1496  while (@_) {
1497    my $x = shift;
1498    if (ref($x)) {
1499      &_traverse_treelet_bit($self, @$x);
1500    } else {
1501      $x .= shift while @_ && !ref($_[0]);
1502      $self->_handle_text($x);
1503    }
1504  }
1505
1506  $self->_handle_element_end($scratch=$name);
1507  return;
1508}
1509
1510#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1511
1512sub _closers_for_all_curr_open {
1513  my $self = $_[0];
1514  my @closers;
1515  foreach my $still_open (@{  $self->{'curr_open'} || return  }) {
1516    my @copy = @$still_open;
1517    $copy[1] = {%{ $copy[1] }};
1518    #$copy[1]{'start_line'} = -1;
1519    if($copy[0] eq '=for') {
1520      $copy[0] = '=end';
1521    } elsif($copy[0] eq '=over') {
1522      $self->whine(
1523        $still_open->[1]{start_line} ,
1524        "=over without closing =back"
1525      );
1526
1527      $copy[0] = '=back';
1528    } else {
1529      die "I don't know how to auto-close an open $copy[0] region";
1530    }
1531
1532    unless( @copy > 2 ) {
1533      push @copy, $copy[1]{'target'};
1534      $copy[-1] = '' unless defined $copy[-1];
1535       # since =over's don't have targets
1536    }
1537
1538    $copy[1]{'fake-closer'} = 1;
1539
1540    DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1541    unshift @closers, \@copy;
1542  }
1543  return @closers;
1544}
1545
1546#--------------------------------------------------------------------------
1547
1548sub _verbatim_format {
1549  my($it, $p) = @_;
1550
1551  my $formatting;
1552
1553  for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1554    DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1555    $p->[$i] .= "\n";
1556     # Unlike with simple Verbatim blocks, we don't end up just doing
1557     # a join("\n", ...) on the contents, so we have to append a
1558     # newline to ever line, and then nix the last one later.
1559  }
1560
1561  if( DEBUG > 4 ) {
1562    print "<<\n";
1563    for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1564      print "_verbatim_format $i: $p->[$i]";
1565    }
1566    print ">>\n";
1567  }
1568
1569  for(my $i = $#$p; $i > 2; $i--) {
1570    # work backwards over the lines, except the first (#2)
1571
1572    #next unless $p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s
1573    #        and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1574     # look at a formatty line preceding a nonformatty one
1575    DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1576    if($p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s) {
1577      DEBUG > 5 and print "  It's a formatty line.  ",
1578       "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1579
1580      if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1581        DEBUG > 5 and print "  Previous line is formatty!  Skipping this one.\n";
1582        next;
1583      } else {
1584        DEBUG > 5 and print "  Previous line is non-formatty!  Yay!\n";
1585      }
1586    } else {
1587      DEBUG > 5 and print "  It's not a formatty line.  Ignoring\n";
1588      next;
1589    }
1590
1591    # A formatty line has to have #: in the first two columns, and uses
1592    # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1593    # Example:
1594    #   What do you want?  i like pie. [or whatever]
1595    # #:^^^^^^^^^^^^^^^^^              /////////////
1596
1597
1598    DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1599
1600    $formatting = '  ' . $1;
1601    $formatting =~ s/\s+$//s; # nix trailing whitespace
1602    unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1603      splice @$p,$i,1; # remove this line
1604      $i--; # don't consider next line
1605      next;
1606    }
1607
1608    if( length($formatting) >= length($p->[$i-1]) ) {
1609      $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1610    } else {
1611      $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1612    }
1613    # Make $formatting and the previous line be exactly the same length,
1614    # with $formatting having a " " as the last character.
1615
1616    DEBUG > 4 and print "Formatting <$formatting>    on <", $p->[$i-1], ">\n";
1617
1618
1619    my @new_line;
1620    while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1621      #print "Format matches $1\n";
1622
1623      if($2) {
1624        #print "SKIPPING <$2>\n";
1625        push @new_line,
1626          substr($p->[$i-1], pos($formatting)-length($1), length($1));
1627      } else {
1628        #print "SNARING $+\n";
1629        push @new_line, [
1630          (
1631            $3 ? 'VerbatimB'  :
1632            $4 ? 'VerbatimI'  :
1633            $5 ? 'VerbatimBI' : die("Should never get called")
1634          ), {},
1635          substr($p->[$i-1], pos($formatting)-length($1), length($1))
1636        ];
1637        #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1638      }
1639    }
1640    my @nixed =
1641      splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1642    DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1643
1644    DEBUG > 6 and print "New version of the above line is these tokens (",
1645      scalar(@new_line), "):",
1646      map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1647    $i--; # So the next line we scrutinize is the line before the one
1648          #  that we just went and formatted
1649  }
1650
1651  $p->[0] = 'VerbatimFormatted';
1652
1653  # Collapse adjacent text nodes, just for kicks.
1654  for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1655    if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1656      DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1657      $p->[$i] .= splice @$p, $i+1, 1; # merge
1658      --$i;  # and back up
1659    }
1660  }
1661
1662  # Now look for the last text token, and remove the terminal newline
1663  for( my $i = $#$p; $i >= 2; $i-- ) {
1664    # work backwards over the tokens, even the first
1665    if( !ref($p->[$i]) ) {
1666      if($p->[$i] =~ s/\n$//s) {
1667        DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1668      } else {
1669        DEBUG > 5 and print
1670         "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1671      }
1672      last; # we only want the next one
1673    }
1674  }
1675
1676  return;
1677}
1678
1679
1680#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1681
1682
1683sub _treelet_from_formatting_codes {
1684  # Given a paragraph, returns a treelet.  Full of scary tokenizing code.
1685  #  Like [ '~Top', {'start_line' => $start_line},
1686  #            "I like ",
1687  #            [ 'B', {}, "pie" ],
1688  #            "!"
1689  #       ]
1690
1691  my($self, $para, $start_line, $preserve_space) = @_;
1692
1693  my $treelet = ['~Top', {'start_line' => $start_line},];
1694
1695  unless ($preserve_space || $self->{'preserve_whitespace'}) {
1696    $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1697    $para =~ s/ $//;
1698    $para =~ s/^ //;
1699  }
1700
1701  # Only apparent problem the above code is that N<<  >> turns into
1702  # N<< >>.  But then, word wrapping does that too!  So don't do that!
1703
1704  my @stack;
1705  my @lineage = ($treelet);
1706  my $raw = ''; # raw content of L<> fcode before splitting/processing
1707    # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1708    # into just 1 ' '. Is this the regex's doing or 'raw's?
1709  my $inL = 0;
1710
1711  DEBUG > 4 and print "Paragraph:\n$para\n\n";
1712
1713  # Here begins our frightening tokenizer RE.  The following regex matches
1714  # text in four main parts:
1715  #
1716  #  * Start-codes.  The first alternative matches C< or C<<, the latter
1717  #    followed by some whitespace.  $1 will hold the entire start code
1718  #    (including any space following a multiple-angle-bracket delimiter),
1719  #    and $2 will hold only the additional brackets past the first in a
1720  #    multiple-bracket delimiter.  length($2) + 1 will be the number of
1721  #    closing brackets we have to find.
1722  #
1723  #  * Closing brackets.  Match some amount of whitespace followed by
1724  #    multiple close brackets.  The logic to see if this closes anything
1725  #    is down below.  Note that in order to parse C<<  >> correctly, we
1726  #    have to use look-behind (?<=\s\s), since the match of the starting
1727  #    code will have consumed the whitespace.
1728  #
1729  #  * A single closing bracket, to close a simple code like C<>.
1730  #
1731  #  * Something that isn't a start or end code.  We have to be careful
1732  #    about accepting whitespace, since perlpodspec says that any whitespace
1733  #    before a multiple-bracket closing delimiter should be ignored.
1734  #
1735  while($para =~
1736    m/\G
1737      (?:
1738        # Match starting codes, including the whitespace following a
1739        # multiple-delimiter start code.  $1 gets the whole start code and
1740        # $2 gets all but one of the <s in the multiple-bracket case.
1741        ([A-Z]<(?:(<+)\s+)?)
1742        |
1743        # Match multiple-bracket end codes.  $3 gets the whitespace that
1744        # should be discarded before an end bracket but kept in other cases
1745        # and $4 gets the end brackets themselves.
1746        (\s+|(?<=\s\s))(>{2,})
1747        |
1748        (\s?>)          # $5: simple end-codes
1749        |
1750        (               # $6: stuff containing no start-codes or end-codes
1751          (?:
1752            [^A-Z\s>]
1753            |
1754            (?:
1755              [A-Z](?!<)
1756            )
1757            |
1758            # whitespace is ok, but we don't want to eat the whitespace before
1759            # a multiple-bracket end code.
1760            # NOTE: we may still have problems with e.g. S<<    >>
1761            (?:
1762              \s(?!\s*>{2,})
1763            )
1764          )+
1765        )
1766      )
1767    /xgo
1768  ) {
1769    DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1770    if(defined $1) {
1771      if(defined $2) {
1772        DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1773        push @stack, length($2) + 1;
1774          # length of the necessary complex end-code string
1775      } else {
1776        DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1777        push @stack, 0;  # signal that we're looking for simple
1778      }
1779      push @lineage, [ substr($1,0,1), {}, ];  # new node object
1780      push @{ $lineage[-2] }, $lineage[-1];
1781      if ('L' eq substr($1,0,1)) {
1782        $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1783        $inL = 1;
1784      } else {
1785        $raw .= $1 if $inL;
1786      }
1787
1788    } elsif(defined $4) {
1789      DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1790      # This is where it gets messy...
1791      if(! @stack) {
1792        # We saw " >>>>" but needed nothing.  This is ALL just stuff then.
1793        DEBUG > 4 and print " But it's really just stuff.\n";
1794        push @{ $lineage[-1] }, $3, $4;
1795        next;
1796      } elsif(!$stack[-1]) {
1797        # We saw " >>>>" but needed only ">".  Back pos up.
1798        DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1799        push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1800        pos($para) = pos($para) - length($4) + 1;
1801      } elsif($stack[-1] == length($4)) {
1802        # We found " >>>>", and it was exactly what we needed.  Commonest case.
1803        DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1804      } elsif($stack[-1] < length($4)) {
1805        # We saw " >>>>" but needed only " >>".  Back pos up.
1806        DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1807        pos($para) = pos($para) - length($4) + $stack[-1];
1808      } else {
1809        # We saw " >>>>" but needed " >>>>>>".  So this is all just stuff!
1810        DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1811        push @{ $lineage[-1] }, $3, $4;
1812        next;
1813      }
1814      #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1815
1816      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1817      # Keep the element from being childless
1818
1819      pop @stack;
1820      pop @lineage;
1821
1822      unless (@stack) { # not in an L if there are no open fcodes
1823        $inL = 0;
1824        if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1825          $lineage[-1][-1][1]{'raw'} = $raw
1826        }
1827      }
1828      $raw .= $3.$4 if $inL;
1829
1830    } elsif(defined $5) {
1831      DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
1832
1833      if(@stack and ! $stack[-1]) {
1834        # We're indeed expecting a simple end-code
1835        DEBUG > 4 and print " It's indeed an end-code.\n";
1836
1837        if(length($5) == 2) { # There was a space there: " >"
1838          push @{ $lineage[-1] }, ' ';
1839        } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1840          push @{ $lineage[-1] }, ''; # keep it from being really childless
1841        }
1842
1843        pop @stack;
1844        pop @lineage;
1845      } else {
1846        DEBUG > 4 and print " It's just stuff.\n";
1847        push @{ $lineage[-1] }, $5;
1848      }
1849
1850      unless (@stack) { # not in an L if there are no open fcodes
1851        $inL = 0;
1852        if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1853          $lineage[-1][-1][1]{'raw'} = $raw
1854        }
1855      }
1856      $raw .= $5 if $inL;
1857
1858    } elsif(defined $6) {
1859      DEBUG > 3 and print "Found stuff \"$6\"\n";
1860      push @{ $lineage[-1] }, $6;
1861      $raw .= $6 if $inL;
1862        # XXX does not capture multiplace whitespaces -- 'raw' ends up with
1863        #     at most 1 leading/trailing whitespace, why not all of it?
1864
1865    } else {
1866      # should never ever ever ever happen
1867      DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1868      die "SPORK 512512!";
1869    }
1870  }
1871
1872  if(@stack) { # Uhoh, some sequences weren't closed.
1873    my $x= "...";
1874    while(@stack) {
1875      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1876      # Hmmmmm!
1877
1878      my $code         = (pop @lineage)->[0];
1879      my $ender_length =  pop @stack;
1880      if($ender_length) {
1881        --$ender_length;
1882        $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1883      } else {
1884        $x = $code . "<$x>";
1885      }
1886    }
1887    DEBUG > 1 and print "Unterminated $x sequence\n";
1888    $self->whine($start_line,
1889      "Unterminated $x sequence",
1890    );
1891  }
1892
1893  return $treelet;
1894}
1895
1896#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1897
1898sub text_content_of_treelet {  # method: $parser->text_content_of_treelet($lol)
1899  return stringify_lol($_[1]);
1900}
1901
1902sub stringify_lol {  # function: stringify_lol($lol)
1903  my $string_form = '';
1904  _stringify_lol( $_[0] => \$string_form );
1905  return $string_form;
1906}
1907
1908sub _stringify_lol {  # the real recursor
1909  my($lol, $to) = @_;
1910  for(my $i = 2; $i < @$lol; ++$i) {
1911    if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1912      _stringify_lol( $lol->[$i], $to);  # recurse!
1913    } else {
1914      $$to .= $lol->[$i];
1915    }
1916  }
1917  return;
1918}
1919
1920#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1921
1922sub _dump_curr_open { # return a string representation of the stack
1923  my $curr_open = $_[0]{'curr_open'};
1924
1925  return '[empty]' unless @$curr_open;
1926  return join '; ',
1927    map {;
1928           ($_->[0] eq '=for')
1929             ? ( ($_->[1]{'~really'} || '=over')
1930               . ' ' . $_->[1]{'target'})
1931             : $_->[0]
1932        }
1933    @$curr_open
1934  ;
1935}
1936
1937###########################################################################
1938my %pretty_form = (
1939  "\a" => '\a', # ding!
1940  "\b" => '\b', # BS
1941  "\e" => '\e', # ESC
1942  "\f" => '\f', # FF
1943  "\t" => '\t', # tab
1944  "\cm" => '\cm',
1945  "\cj" => '\cj',
1946  "\n" => '\n', # probably overrides one of either \cm or \cj
1947  '"' => '\"',
1948  '\\' => '\\\\',
1949  '$' => '\\$',
1950  '@' => '\\@',
1951  '%' => '\\%',
1952  '#' => '\\#',
1953);
1954
1955sub pretty { # adopted from Class::Classless
1956  # Not the most brilliant routine, but passable.
1957  # Don't give it a cyclic data structure!
1958  my @stuff = @_; # copy
1959  my $x;
1960  my $out =
1961    # join ",\n" .
1962    join ", ",
1963    map {;
1964    if(!defined($_)) {
1965      "undef";
1966    } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1967      $x = "[ " . pretty(@$_) . " ]" ;
1968      $x;
1969    } elsif(ref($_) eq 'SCALAR') {
1970      $x = "\\" . pretty($$_) ;
1971      $x;
1972    } elsif(ref($_) eq 'HASH') {
1973      my $hr = $_;
1974      $x = "{" . join(", ",
1975        map(pretty($_) . '=>' . pretty($hr->{$_}),
1976            sort keys %$hr ) ) . "}" ;
1977      $x;
1978    } elsif(!length($_)) { q{''} # empty string
1979    } elsif(
1980      $_ eq '0' # very common case
1981      or(
1982         m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1983         and $_ ne '-0' # the strange case that that RE lets thru
1984      )
1985    ) { $_;
1986    } else {
1987      if( chr(65) eq 'A' ) {
1988        s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1989         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1990         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1991      } else {
1992        # We're in some crazy non-ASCII world!
1993        s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1994         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1995         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1996      }
1997      qq{"$_"};
1998    }
1999  } @stuff;
2000  # $out =~ s/\n */ /g if length($out) < 75;
2001  return $out;
2002}
2003
2004#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2005
2006# A rather unsubtle method of blowing away all the state information
2007# from a parser object so it can be reused. Provided as a utility for
2008# backward compatibility in Pod::Man, etc. but not recommended for
2009# general use.
2010
2011sub reinit {
2012  my $self = shift;
2013  foreach (qw(source_dead source_filename doc_has_started
2014start_of_pod_block content_seen last_was_blank paras curr_open
2015line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
2016Title)) {
2017
2018    delete $self->{$_};
2019  }
2020}
2021
2022#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
20231;
2024
2025