1package Pod::Simple::BlackBox;
2#
3# "What's in the box?"  "Pain."
4#
5###########################################################################
6#
7# This is where all the scary things happen: parsing lines into
8#  paragraphs; and then into directives, verbatims, and then also
9#  turning formatting sequences into treelets.
10#
11# Are you really sure you want to read this code?
12#
13#-----------------------------------------------------------------------------
14#
15# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
16# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
17# to call the proper callbacks on the treelets.
18#
19# Every node in a treelet is a ['name', {attrhash}, ...children...]
20
21use integer; # vroom!
22use strict;
23use warnings;
24use Carp ();
25our $VERSION = '3.45';
26#use constant DEBUG => 7;
27
28sub my_qr ($$) {
29
30    # $1 is a pattern to compile and return.  Older perls compile any
31    # syntactically valid property, even if it isn't legal.  To cope with
32    # this, return an empty string unless the compiled pattern also
33    # successfully matches $2, which the caller furnishes.
34
35    my ($input_re, $should_match) = @_;
36    # XXX could have a third parameter $shouldnt_match for extra safety
37
38    my $use_utf8 = ($] le 5.006002) ? 'use utf8;' : "";
39
40    my $re = eval "no warnings; $use_utf8 qr/$input_re/";
41    #print STDERR  __LINE__, ": $input_re: $@\n" if $@;
42    return "" if $@;
43
44    my $matches = eval "no warnings; $use_utf8 '$should_match' =~ /$re/";
45    #print STDERR  __LINE__, ": $input_re: $@\n" if $@;
46    return "" if $@;
47
48    #print STDERR  __LINE__, ": SUCCESS: $re\n" if $matches;
49    return $re if $matches;
50
51    #print STDERR  __LINE__, ": $re: didn't match\n";
52    return "";
53}
54
55BEGIN {
56  require Pod::Simple;
57  *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
58}
59
60# Matches a character iff the character will have a different meaning
61# if we choose CP1252 vs UTF-8 if there is no =encoding line.
62# This is broken for early Perls on non-ASCII platforms.
63my $non_ascii_re = my_qr('[[:^ascii:]]', "\xB6");
64$non_ascii_re = qr/[\x80-\xFF]/ unless $non_ascii_re;
65
66# Use patterns understandable by Perl 5.6, if possible
67my $cs_re = do { no warnings; my_qr('\p{IsCs}', "\x{D800}") };
68my $cn_re = my_qr('\p{IsCn}', "\x{09E4}");  # <reserved> code point unlikely
69                                            # to get assigned
70my $rare_blocks_re = my_qr('[\p{InIPAExtensions}\p{InSpacingModifierLetters}]',
71                           "\x{250}");
72$rare_blocks_re = my_qr('[\x{0250}-\x{02FF}]', "\x{250}") unless $rare_blocks_re;
73
74my $script_run_re = eval 'no warnings "experimental::script_run";
75                          qr/(*script_run: ^ .* $ )/x';
76my $latin_re = my_qr('[\p{IsLatin}\p{IsInherited}\p{IsCommon}]', "\x{100}");
77unless ($latin_re) {
78    # This was machine generated to be the ranges of the union of the above
79    # three properties, with things that were undefined by Unicode 4.1 filling
80    # gaps.  That is the version in use when Perl advanced enough to
81    # successfully compile and execute the above pattern.
82    $latin_re = my_qr('[\x00-\x{02E9}\x{02EC}-\x{0374}\x{037E}\x{0385}\x{0387}\x{0485}\x{0486}\x{0589}\x{060C}\x{061B}\x{061F}\x{0640}\x{064B}-\x{0655}\x{0670}\x{06DD}\x{0951}-\x{0954}\x{0964}\x{0965}\x{0E3F}\x{10FB}\x{16EB}-\x{16ED}\x{1735}\x{1736}\x{1802}\x{1803}\x{1805}\x{1D00}-\x{1D25}\x{1D2C}-\x{1D5C}\x{1D62}-\x{1D65}\x{1D6B}-\x{1D77}\x{1D79}-\x{1DBE}\x{1DC0}-\x{1EF9}\x{2000}-\x{2125}\x{2127}-\x{27FF}\x{2900}-\x{2B13}\x{2E00}-\x{2E1D}\x{2FF0}-\x{3004}\x{3006}\x{3008}-\x{3020}\x{302A}-\x{302D}\x{3030}-\x{3037}\x{303C}-\x{303F}\x{3099}-\x{309C}\x{30A0}\x{30FB}\x{30FC}\x{3190}-\x{319F}\x{31C0}-\x{31CF}\x{3220}-\x{325F}\x{327F}-\x{32CF}\x{3358}-\x{33FF}\x{4DC0}-\x{4DFF}\x{A700}-\x{A716}\x{FB00}-\x{FB06}\x{FD3E}\x{FD3F}\x{FE00}-\x{FE6B}\x{FEFF}-\x{FF65}\x{FF70}\x{FF9E}\x{FF9F}\x{FFE0}-\x{FFFD}\x{10100}-\x{1013F}\x{1D000}-\x{1D1DD}\x{1D300}-\x{1D7FF}]', "\x{100}");
83}
84
85my $every_char_is_latin_re = my_qr("^(?:$latin_re)*\\z", "A");
86
87# Latin script code points not in the first release of Unicode
88my $later_latin_re = my_qr('[^\P{IsLatin}\p{IsAge=1.1}]', "\x{1F6}");
89
90# If this perl doesn't have the Deprecated property, there's only one code
91# point in it that we need be concerned with.
92my $deprecated_re = my_qr('\p{IsDeprecated}', "\x{149}");
93$deprecated_re = qr/\x{149}/ unless $deprecated_re;
94
95my $utf8_bom;
96if (($] ge 5.007_003)) {
97  $utf8_bom = "\x{FEFF}";
98  utf8::encode($utf8_bom);
99} else {
100  $utf8_bom = "\xEF\xBB\xBF";   # No EBCDIC BOM detection for early Perls.
101}
102
103# This is used so that the 'content_seen' method doesn't return true on a
104# file that just happens to have a line that matches /^=[a-zA-z]/.  Only if
105# there is a valid =foo line will we return that content was seen.
106my $seen_legal_directive = 0;
107
108#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
109
110sub parse_line { shift->parse_lines(@_) } # alias
111
112# - - -  Turn back now!  Run away!  - - -
113
114sub parse_lines {             # Usage: $parser->parse_lines(@lines)
115  # an undef means end-of-stream
116  my $self = shift;
117
118  my $code_handler = $self->{'code_handler'};
119  my $cut_handler  = $self->{'cut_handler'};
120  my $wl_handler   = $self->{'whiteline_handler'};
121  $self->{'line_count'} ||= 0;
122
123  my $scratch;
124
125  DEBUG > 4 and
126   print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
127
128  DEBUG > 5 and
129   print STDERR "#  About to parse lines: ",
130     join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
131
132  my $paras = ($self->{'paras'} ||= []);
133   # paragraph buffer.  Because we need to defer processing of =over
134   # directives and verbatim paragraphs.  We call _ponder_paragraph_buffer
135   # to process this.
136
137  $self->{'pod_para_count'} ||= 0;
138
139  # An attempt to match the pod portions of a line.  This is not fool proof,
140  # but is good enough to serve as part of the heuristic for guessing the pod
141  # encoding if not specified.
142  my $codes = join '', grep { / ^ [A-Za-z] $/x } sort keys %{$self->{accept_codes}};
143  my $pod_chars_re = qr/ ^ = [A-Za-z]+ | [\Q$codes\E] < /x;
144
145  my $line;
146  foreach my $source_line (@_) {
147    if( $self->{'source_dead'} ) {
148      DEBUG > 4 and print STDERR "# Source is dead.\n";
149      last;
150    }
151
152    unless( defined $source_line ) {
153      DEBUG > 4 and print STDERR "# Undef-line seen.\n";
154
155      push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
156      push @$paras, $paras->[-1], $paras->[-1];
157       # So that it definitely fills the buffer.
158      $self->{'source_dead'} = 1;
159      $self->_ponder_paragraph_buffer;
160      next;
161    }
162
163
164    if( $self->{'line_count'}++ ) {
165      ($line = $source_line) =~ tr/\n\r//d;
166       # If we don't have two vars, we'll end up with that there
167       # tr/// modding the (potentially read-only) original source line!
168
169    } else {
170      DEBUG > 2 and print STDERR "First line: [$source_line]\n";
171
172      if( ($line = $source_line) =~ s/^$utf8_bom//s ) {
173        DEBUG and print STDERR "UTF-8 BOM seen.  Faking a '=encoding utf8'.\n";
174        $self->_handle_encoding_line( "=encoding utf8" );
175        delete $self->{'_processed_encoding'};
176        $line =~ tr/\n\r//d;
177
178      } elsif( $line =~ s/^\xFE\xFF//s ) {
179        DEBUG and print STDERR "Big-endian UTF-16 BOM seen.  Aborting parsing.\n";
180        $self->scream(
181          $self->{'line_count'},
182          "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
183        );
184        splice @_;
185        push @_, undef;
186        next;
187
188        # TODO: implement somehow?
189
190      } elsif( $line =~ s/^\xFF\xFE//s ) {
191        DEBUG and print STDERR "Little-endian UTF-16 BOM seen.  Aborting parsing.\n";
192        $self->scream(
193          $self->{'line_count'},
194          "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
195        );
196        splice @_;
197        push @_, undef;
198        next;
199
200        # TODO: implement somehow?
201
202      } else {
203        DEBUG > 2 and print STDERR "First line is BOM-less.\n";
204        ($line = $source_line) =~ tr/\n\r//d;
205      }
206    }
207
208    if(!$self->{'parse_characters'} && !$self->{'encoding'}
209      && ($self->{'in_pod'} || $line =~ /^=/s)
210      && $line =~ /$non_ascii_re/
211    ) {
212
213      my $encoding;
214
215      # No =encoding line, and we are at the first pod line in the input that
216      # contains a non-ascii byte, that is, one whose meaning varies depending
217      # on whether the file is encoded in UTF-8 or CP1252, which are the two
218      # possibilities permitted by the pod spec.  (ASCII is assumed if the
219      # file only contains ASCII bytes.)  In order to process this line, we
220      # need to figure out what encoding we will use for the file.
221      #
222      # Strictly speaking ISO 8859-1 (Latin 1) refers to the code points
223      # 160-255, but it is used here, as it often colloquially is, to refer to
224      # the complete set of code points 0-255, including ASCII (0-127), the C1
225      # controls (128-159), and strict Latin 1 (160-255).
226      #
227      # CP1252 is effectively a superset of Latin 1, because it differs only
228      # from colloquial 8859-1 in the C1 controls, which are very unlikely to
229      # actually be present in 8859-1 files, so can be used for other purposes
230      # without conflict.  CP 1252 uses most of them for graphic characters.
231      #
232      # Note that all ASCII-range bytes represent their corresponding code
233      # points in both CP1252 and UTF-8.  In ASCII platform UTF-8, all other
234      # code points require multiple (non-ASCII) bytes to represent.  (A
235      # separate paragraph for EBCDIC is below.)  The multi-byte
236      # representation is quite structured.  If we find an isolated byte that
237      # would require multiple bytes to represent in UTF-8, we know that the
238      # encoding is not UTF-8.  If we find a sequence of bytes that violates
239      # the UTF-8 structure, we also can presume the encoding isn't UTF-8, and
240      # hence must be 1252.
241      #
242      # But there are ambiguous cases where we could guess wrong.  If so, the
243      # user will end up having to supply an =encoding line.  We use all
244      # readily available information to improve our chances of guessing
245      # right.  The odds of something not being UTF-8, but still passing a
246      # UTF-8 validity test go down very rapidly with increasing length of the
247      # sequence.  Therefore we look at all non-ascii sequences on the line.
248      # If any of the sequences can't be UTF-8, we quit there and choose
249      # CP1252.  If all could be UTF-8, we see if any of the code points
250      # represented are unlikely to be in pod.  If so, we guess CP1252.  If
251      # not, we check if the line is all in the same script; if not guess
252      # CP1252; otherwise UTF-8.  For perls that don't have convenient script
253      # run testing, see if there is both Latin and non-Latin.  If so, CP1252,
254      # otherwise UTF-8.
255      #
256      # On EBCDIC platforms, the situation is somewhat different.  In
257      # UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
258      # but so do the bytes that are for the C1 controls.  Recall that these
259      # correspond to the unused portion of 8859-1 that 1252 mostly takes
260      # over.  That means that there are fewer code points that are
261      # represented by multi-bytes.  But, note that the these controls are
262      # very unlikely to be in pod text.  So if we encounter one of them, it
263      # means that it is quite likely CP1252 and not UTF-8.  The net result is
264      # the same code below is used for both platforms.
265      #
266      # XXX probably if the line has E<foo> that evaluates to illegal CP1252,
267      # then it is UTF-8.  But we haven't processed E<> yet.
268
269      goto set_1252 if $] lt 5.006_000;    # No UTF-8 on very early perls
270
271      my $copy;
272
273      no warnings 'utf8';
274
275      if ($] ge 5.007_003) {
276        $copy = $line;
277
278        # On perls that have this function, we can use it to easily see if the
279        # sequence is valid UTF-8 or not; if valid it turns on the UTF-8 flag
280        # needed below for script run detection
281        goto set_1252 if ! utf8::decode($copy);
282      }
283      elsif (ord("A") != 65) {  # Early EBCDIC, assume UTF-8.  What's a windows
284                                # code page doing here anyway?
285        goto set_utf8;
286      }
287      else { # ASCII, no decode(): do it ourselves using the fundamental
288             # characteristics of UTF-8
289        use if $] le 5.006002, 'utf8';
290
291        my $char_ord;
292        my $needed;         # How many continuation bytes to gobble up
293
294        # Initialize the translated line with a dummy character that will be
295        # deleted after everything else is done.  This dummy makes sure that
296        # $copy will be in UTF-8.  Doing it now avoids the bugs in early perls
297        # with upgrading in the middle
298        $copy = chr(0x100);
299
300        # Parse through the line
301        for (my $i = 0; $i < length $line; $i++) {
302          my $byte = substr($line, $i, 1);
303
304          # ASCII bytes are trivially dealt with
305          if ($byte !~ $non_ascii_re) {
306            $copy .= $byte;
307            next;
308          }
309
310          my $b_ord = ord $byte;
311
312          # Now figure out what this code point would be if the input is
313          # actually in UTF-8.  If, in the process, we discover that it isn't
314          # well-formed UTF-8, we guess CP1252.
315          #
316          # Start the process.  If it is UTF-8, we are at the first, start
317          # byte, of a multi-byte sequence.  We look at this byte to figure
318          # out how many continuation bytes are needed, and to initialize the
319          # code point accumulator with the data from this byte.
320          #
321          # Normally the minimum continuation byte is 0x80, but in certain
322          # instances the minimum is a higher number.  So the code below
323          # overrides this for those instances.
324          my $min_cont = 0x80;
325
326          if ($b_ord < 0xC2) { #  A start byte < C2 is malformed
327            goto set_1252;
328          }
329          elsif ($b_ord <= 0xDF) {
330            $needed = 1;
331            $char_ord = $b_ord & 0x1F;
332          }
333          elsif ($b_ord <= 0xEF) {
334            $min_cont = 0xA0 if $b_ord == 0xE0;
335            $needed = 2;
336            $char_ord = $b_ord & (0x1F >> 1);
337          }
338          elsif ($b_ord <= 0xF4) {
339            $min_cont = 0x90 if $b_ord == 0xF0;
340            $needed = 3;
341            $char_ord = $b_ord & (0x1F >> 2);
342          }
343          else { # F4 is the highest start byte for legal Unicode; higher is
344                 # unlikely to be in pod.
345            goto set_1252;
346          }
347
348          # ? not enough continuation bytes available
349          goto set_1252 if $i + $needed >= length $line;
350
351          # Accumulate the ordinal of the character from the remaining
352          # (continuation) bytes.
353          while ($needed-- > 0) {
354            my $cont = substr($line, ++$i, 1);
355            $b_ord = ord $cont;
356            goto set_1252 if $b_ord < $min_cont || $b_ord > 0xBF;
357
358            # In all cases, any next continuation bytes all have the same
359            # minimum legal value
360            $min_cont = 0x80;
361
362            # Accumulate this byte's contribution to the code point
363            $char_ord <<= 6;
364            $char_ord |= ($b_ord & 0x3F);
365          }
366
367          # Here, the sequence that formed this code point was valid UTF-8,
368          # so add the completed character to the output
369          $copy .= chr $char_ord;
370        } # End of loop through line
371
372        # Delete the dummy first character
373        $copy = substr($copy, 1);
374      }
375
376      # Here, $copy is legal UTF-8.
377
378      # If it can't be legal CP1252, no need to look further.  (These bytes
379      # aren't valid in CP1252.)  This test could have been placed higher in
380      # the code, but it seemed wrong to set the encoding to UTF-8 without
381      # making sure that the very first instance is well-formed.  But what if
382      # it isn't legal CP1252 either?  We have to choose one or the other, and
383      # It seems safer to favor the single-byte encoding over the multi-byte.
384      goto set_utf8 if ord("A") == 65 && $line =~ /[\x81\x8D\x8F\x90\x9D]/;
385
386      # The C1 controls are not likely to appear in pod
387      goto set_1252 if ord("A") == 65 && $copy =~ /[\x80-\x9F]/;
388
389      # Nor are surrogates nor unassigned, nor deprecated.
390      DEBUG > 8 and print STDERR __LINE__, ": $copy: surrogate\n" if $copy =~ $cs_re;
391      goto set_1252 if $cs_re && $copy =~ $cs_re;
392      DEBUG > 8 and print STDERR __LINE__, ": $copy: unassigned\n" if $cn_re && $copy =~ $cn_re;
393      goto set_1252 if $cn_re && $copy =~ $cn_re;
394      DEBUG > 8 and print STDERR __LINE__, ": $copy: deprecated\n" if $copy =~ $deprecated_re;
395      goto set_1252 if $copy =~ $deprecated_re;
396
397      # Nor are rare code points.  But this is hard to determine.  khw
398      # believes that IPA characters and the modifier letters are unlikely to
399      # be in pod (and certainly very unlikely to be the in the first line in
400      # the pod containing non-ASCII)
401      DEBUG > 8 and print STDERR __LINE__, ": $copy: rare\n" if $copy =~ $rare_blocks_re;
402      goto set_1252 if $rare_blocks_re && $copy =~ $rare_blocks_re;
403
404      # The first Unicode version included essentially every Latin character
405      # in modern usage.  So, a Latin character not in the first release will
406      # unlikely be in pod.
407      DEBUG > 8 and print STDERR __LINE__, ": $copy: later_latin\n" if $later_latin_re && $copy =~ $later_latin_re;
408      goto set_1252 if $later_latin_re && $copy =~ $later_latin_re;
409
410      # On perls that handle script runs, if the UTF-8 interpretation yields
411      # a single script, we guess UTF-8, otherwise just having a mixture of
412      # scripts is suspicious, so guess CP1252.  We first strip off, as best
413      # we can, the ASCII characters that look like they are pod directives,
414      # as these would always show as mixed with non-Latin text.
415      $copy =~ s/$pod_chars_re//g;
416
417      if ($script_run_re) {
418        goto set_utf8 if $copy =~ $script_run_re;
419        DEBUG > 8 and print STDERR __LINE__, ":  not script run\n";
420        goto set_1252;
421      }
422
423      # Even without script runs, but on recent enough perls and Unicodes, we
424      # can check if there is a mixture of both Latin and non-Latin.  Again,
425      # having a mixture of scripts is suspicious, so assume CP1252
426
427      # If it's all non-Latin, there is no CP1252, as that is Latin
428      # characters and punct, etc.
429      DEBUG > 8 and print STDERR __LINE__, ": $copy: not latin\n" if $copy !~ $latin_re;
430      goto set_utf8 if $copy !~ $latin_re;
431
432      DEBUG > 8 and print STDERR __LINE__, ": $copy: all latin\n" if $copy =~ $every_char_is_latin_re;
433      goto set_utf8 if $copy =~ $every_char_is_latin_re;
434
435      DEBUG > 8 and print STDERR __LINE__, ": $copy: mixed\n";
436
437     set_1252:
438      DEBUG > 9 and print STDERR __LINE__, ": $copy: is 1252\n";
439      $encoding = 'CP1252';
440      goto done_set;
441
442     set_utf8:
443      DEBUG > 9 and print STDERR __LINE__, ": $copy: is UTF-8\n";
444      $encoding = 'UTF-8';
445
446     done_set:
447      $self->_handle_encoding_line( "=encoding $encoding" );
448      delete $self->{'_processed_encoding'};
449      $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
450
451      my ($word) = $line =~ /(\S*$non_ascii_re\S*)/;
452
453      $self->whine(
454        $self->{'line_count'},
455        "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
456      );
457    }
458
459    DEBUG > 5 and print STDERR "# Parsing line: [$line]\n";
460
461    if(!$self->{'in_pod'}) {
462      if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) {
463        if($1 eq 'cut') {
464          $self->scream(
465            $self->{'line_count'},
466            "=cut found outside a pod block.  Skipping to next block."
467          );
468
469          ## Before there were errata sections in the world, it was
470          ## least-pessimal to abort processing the file.  But now we can
471          ## just barrel on thru (but still not start a pod block).
472          #splice @_;
473          #push @_, undef;
474
475          next;
476        } else {
477          $self->{'in_pod'} = $self->{'start_of_pod_block'}
478                            = $self->{'last_was_blank'}     = 1;
479          # And fall thru to the pod-mode block further down
480        }
481      } else {
482        DEBUG > 5 and print STDERR "# It's a code-line.\n";
483        $code_handler->(map $_, $line, $self->{'line_count'}, $self)
484         if $code_handler;
485        # Note: this may cause code to be processed out of order relative
486        #  to pods, but in order relative to cuts.
487
488        # Note also that we haven't yet applied the transcoding to $line
489        #  by time we call $code_handler!
490
491        if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
492          # That RE is from perlsyn, section "Plain Old Comments (Not!)",
493          #$fname = $2 if defined $2;
494          #DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n";
495          DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
496          $self->{'line_count'} = $1 - 1;
497        }
498
499        next;
500      }
501    }
502
503    # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
504    # Else we're in pod mode:
505
506    # Apply any necessary transcoding:
507    $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
508
509    # HERE WE CATCH =encoding EARLY!
510    if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
511      next if $self->parse_characters;   # Ignore this line
512      $line = $self->_handle_encoding_line( $line );
513    }
514
515    if($line =~ m/^=cut/s) {
516      # here ends the pod block, and therefore the previous pod para
517      DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n";
518      $self->{'in_pod'} = 0;
519      # ++$self->{'pod_para_count'};
520      $self->_ponder_paragraph_buffer();
521       # by now it's safe to consider the previous paragraph as done.
522      DEBUG > 6 and print STDERR "Processing any cut handler, line ${$self}{'line_count'}\n";
523      $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
524       if $cut_handler;
525
526      # TODO: add to docs: Note: this may cause cuts to be processed out
527      #  of order relative to pods, but in order relative to code.
528
529    } elsif($line =~ m/^(\s*)$/s) {  # it's a blank line
530      if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
531        $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
532          if $wl_handler;
533      }
534
535      if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
536        DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n";
537        push @{$paras->[-1]}, $line;
538      }  # otherwise it's not interesting
539
540      if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
541        DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n";
542      }
543
544      $self->{'last_was_blank'} = 1;
545
546    } elsif($self->{'last_was_blank'}) {  # A non-blank line starting a new para...
547
548      if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(\s+|$)(.*)/s) {
549        # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
550        my $new = [$1, {'start_line' => $self->{'line_count'}}, $3];
551        $new->[1]{'~orig_spacer'} = $2 if $2 && $2 ne " ";
552         # Note that in "=head1 foo", the WS is lost.
553         # Example: ['=head1', {'start_line' => 123}, ' foo']
554
555        ++$self->{'pod_para_count'};
556
557        $self->_ponder_paragraph_buffer();
558         # by now it's safe to consider the previous paragraph as done.
559
560        push @$paras, $new; # the new incipient paragraph
561        DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
562
563      } elsif($line =~ m/^\s/s) {
564
565        if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
566          DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n";
567          push @{$paras->[-1]}, $line;
568        } else {
569          ++$self->{'pod_para_count'};
570          $self->_ponder_paragraph_buffer();
571           # by now it's safe to consider the previous paragraph as done.
572          DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n";
573          push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
574        }
575      } else {
576        ++$self->{'pod_para_count'};
577        $self->_ponder_paragraph_buffer();
578         # by now it's safe to consider the previous paragraph as done.
579        push @$paras, ['~Para',  {'start_line' => $self->{'line_count'}}, $line];
580        DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n";
581      }
582      $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
583
584    } else {
585      # It's a non-blank line /continuing/ the current para
586      if(@$paras) {
587        DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n";
588        push @{$paras->[-1]}, $line;
589      } else {
590        # Unexpected case!
591        die "Continuing a paragraph but \@\$paras is empty?";
592      }
593      $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
594    }
595
596  } # ends the big while loop
597
598  DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
599  return $self;
600}
601
602#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
603
604sub _handle_encoding_line {
605  my($self, $line) = @_;
606
607  return if $self->parse_characters;
608
609  # The point of this routine is to set $self->{'_transcoder'} as indicated.
610
611  return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
612  DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n";
613
614  my $e    = $1;
615  my $orig = $e;
616  push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
617
618  my $enc_error;
619
620  # Cf.   perldoc Encode   and   perldoc Encode::Supported
621
622  require Pod::Simple::Transcode;
623
624  if( $self->{'encoding'} ) {
625    my $norm_current = $self->{'encoding'};
626    my $norm_e = $e;
627    foreach my $that ($norm_current, $norm_e) {
628      $that =  lc($that);
629      $that =~ s/[-_]//g;
630    }
631    if($norm_current eq $norm_e) {
632      DEBUG > 1 and print STDERR "The '=encoding $orig' line is ",
633       "redundant.  ($norm_current eq $norm_e).  Ignoring.\n";
634      $enc_error = '';
635       # But that doesn't necessarily mean that the earlier one went okay
636    } else {
637      $enc_error = "Encoding is already set to " . $self->{'encoding'};
638      DEBUG > 1 and print STDERR $enc_error;
639    }
640  } elsif (
641    # OK, let's turn on the encoding
642    do {
643      DEBUG > 1 and print STDERR " Setting encoding to $e\n";
644      $self->{'encoding'} = $e;
645      1;
646    }
647    and $e eq 'HACKRAW'
648  ) {
649    DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n";
650
651  } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
652
653    die($enc_error = "WHAT? _transcoder is already set?!")
654     if $self->{'_transcoder'};   # should never happen
655    require Pod::Simple::Transcode;
656    $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
657    eval {
658      my @x = ('', "abc", "123");
659      $self->{'_transcoder'}->(@x);
660    };
661    $@ && die( $enc_error =
662      "Really unexpected error setting up encoding $e: $@\nAborting"
663    );
664    $self->{'detected_encoding'} = $e;
665
666  } else {
667    my @supported = Pod::Simple::Transcode::->all_encodings;
668
669    # Note unsupported, and complain
670    DEBUG and print STDERR " Encoding [$e] is unsupported.",
671      "\nSupporteds: @supported\n";
672    my $suggestion = '';
673
674    # Look for a near match:
675    my $norm = lc($e);
676    $norm =~ tr[-_][]d;
677    my $n;
678    foreach my $enc (@supported) {
679      $n = lc($enc);
680      $n =~ tr[-_][]d;
681      next unless $n eq $norm;
682      $suggestion = "  (Maybe \"$e\" should be \"$enc\"?)";
683      last;
684    }
685    my $encmodver = Pod::Simple::Transcode::->encmodver;
686    $enc_error = join '' =>
687      "This document probably does not appear as it should, because its ",
688      "\"=encoding $e\" line calls for an unsupported encoding.",
689      $suggestion, "  [$encmodver\'s supported encodings are: @supported]"
690    ;
691
692    $self->scream( $self->{'line_count'}, $enc_error );
693  }
694  push @{ $self->{'encoding_command_statuses'} }, $enc_error;
695  if (defined($self->{'_processed_encoding'})) {
696    # Double declaration.
697    $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives');
698  }
699  $self->{'_processed_encoding'} = $orig;
700
701  return $line;
702}
703
704# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
705
706sub _handle_encoding_second_level {
707  # By time this is called, the encoding (if well formed) will already
708  #  have been acted on.
709  my($self, $para) = @_;
710  my @x = @$para;
711  my $content = join ' ', splice @x, 2;
712  $content =~ s/^\s+//s;
713  $content =~ s/\s+$//s;
714
715  DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
716
717  if (defined($self->{'_processed_encoding'})) {
718    #if($content ne $self->{'_processed_encoding'}) {
719    #  Could it happen?
720    #}
721    delete $self->{'_processed_encoding'};
722    # It's already been handled.  Check for errors.
723    if(! $self->{'encoding_command_statuses'} ) {
724      DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n";
725    } elsif( $self->{'encoding_command_statuses'}[-1] ) {
726      $self->whine( $para->[1]{'start_line'},
727        sprintf "Couldn't do %s: %s",
728          $self->{'encoding_command_reqs'  }[-1],
729          $self->{'encoding_command_statuses'}[-1],
730      );
731    } else {
732      DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n";
733    }
734
735  } else {
736    # Otherwise it's a syntax error
737    $self->whine( $para->[1]{'start_line'},
738      "Invalid =encoding syntax: $content"
739    );
740  }
741
742  return;
743}
744
745#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
746
747{
748my $m = -321;   # magic line number
749
750sub _gen_errata {
751  my $self = $_[0];
752  # Return 0 or more fake-o paragraphs explaining the accumulated
753  #  errors on this document.
754
755  return() unless $self->{'errata'} and keys %{$self->{'errata'}};
756
757  my @out;
758
759  foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
760    push @out,
761      ['=item', {'start_line' => $m}, "Around line $line:"],
762      map( ['~Para', {'start_line' => $m, '~cooked' => 1},
763        #['~Top', {'start_line' => $m},
764        $_
765        #]
766        ],
767        @{$self->{'errata'}{$line}}
768      )
769    ;
770  }
771
772  # TODO: report of unknown entities? unrenderable characters?
773
774  unshift @out,
775    ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
776    ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
777     "Hey! ",
778     ['B', {},
779      'The above document had some coding errors, which are explained below:'
780     ]
781    ],
782    ['=over',  {'start_line' => $m, 'errata' => 1}, ''],
783  ;
784
785  push @out,
786    ['=back',  {'start_line' => $m, 'errata' => 1}, ''],
787  ;
788
789  DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n";
790
791  return @out;
792}
793
794}
795
796#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
797
798##############################################################################
799##
800##  stop reading now stop reading now stop reading now stop reading now stop
801##
802##                         HERE IT BECOMES REALLY SCARY
803##
804##  stop reading now stop reading now stop reading now stop reading now stop
805##
806##############################################################################
807
808sub _ponder_paragraph_buffer {
809
810  # Para-token types as found in the buffer.
811  #   ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
812  #   =over, =back, =item
813  #   and the null =pod (to be complained about if over one line)
814  #
815  # "~data" paragraphs are something we generate at this level, depending on
816  # a currently open =over region
817
818  # Events fired:  Begin and end for:
819  #                   directivename (like head1 .. head4), item, extend,
820  #                   for (from =begin...=end, =for),
821  #                   over-bullet, over-number, over-text, over-block,
822  #                   item-bullet, item-number, item-text,
823  #                   Document,
824  #                   Data, Para, Verbatim
825  #                   B, C, longdirname (TODO -- wha?), etc. for all directives
826  #
827
828  my $self = $_[0];
829  my $paras;
830  return unless @{$paras = $self->{'paras'}};
831  my $curr_open = ($self->{'curr_open'} ||= []);
832
833  my $scratch;
834
835  DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n";
836
837  # We have something in our buffer.  So apparently the document has started.
838  unless($self->{'doc_has_started'}) {
839    $self->{'doc_has_started'} = 1;
840
841    my $starting_contentless;
842    $starting_contentless =
843     (
844       !@$curr_open
845       and @$paras and ! grep $_->[0] ne '~end', @$paras
846        # i.e., if the paras is all ~ends
847     )
848    ;
849    DEBUG and print STDERR "# Starting ",
850      $starting_contentless ? 'contentless' : 'contentful',
851      " document\n"
852    ;
853
854    $self->_handle_element_start(
855      ($scratch = 'Document'),
856      {
857        'start_line' => $paras->[0][1]{'start_line'},
858        $starting_contentless ? ( 'contentless' => 1 ) : (),
859      },
860    );
861  }
862
863  my($para, $para_type);
864  while(@$paras) {
865
866    # If a directive, assume it's legal; subtract below if found not to be
867    $seen_legal_directive++ if $paras->[0][0] =~ /^=/;
868
869    last if      @$paras == 1
870            and (    $paras->[0][0] eq '=over'
871                 or  $paras->[0][0] eq '=item'
872                 or ($paras->[0][0] eq '~Verbatim' and $self->{'in_pod'}));
873    # Those're the three kinds of paragraphs that require lookahead.
874    #   Actually, an "=item Foo" inside an <over type=text> region
875    #   and any =item inside an <over type=block> region (rare)
876    #   don't require any lookahead, but all others (bullets
877    #   and numbers) do.
878    # The verbatim is different from the other two, because those might be
879    # like:
880    #
881    #   =item
882    #   ...
883    #   =cut
884    #   ...
885    #   =item
886    #
887    # The =cut here finishes the paragraph but doesn't terminate the =over
888    # they should be in. (khw apologizes that he didn't comment at the time
889    # why the 'in_pod' works, and no longer remembers why, and doesn't think
890    # it is currently worth the effort to re-figure it out.)
891
892# TODO: whinge about many kinds of directives in non-resolving =for regions?
893# TODO: many?  like what?  =head1 etc?
894
895    $para = shift @$paras;
896    $para_type = $para->[0];
897
898    DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (",
899      $self->_dump_curr_open(), ")\n";
900
901    if($para_type eq '=for') {
902      next if $self->_ponder_for($para,$curr_open,$paras);
903
904    } elsif($para_type eq '=begin') {
905      next if $self->_ponder_begin($para,$curr_open,$paras);
906
907    } elsif($para_type eq '=end') {
908      next if $self->_ponder_end($para,$curr_open,$paras);
909
910    } elsif($para_type eq '~end') { # The virtual end-document signal
911      next if $self->_ponder_doc_end($para,$curr_open,$paras);
912    }
913
914
915    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
916    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
917    if(grep $_->[1]{'~ignore'}, @$curr_open) {
918      DEBUG > 1 and
919       print STDERR "Skipping $para_type paragraph because in ignore mode.\n";
920      next;
921    }
922    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
923    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
924
925    if($para_type eq '=pod') {
926      $self->_ponder_pod($para,$curr_open,$paras);
927
928    } elsif($para_type eq '=over') {
929      next if $self->_ponder_over($para,$curr_open,$paras);
930
931    } elsif($para_type eq '=back') {
932      next if $self->_ponder_back($para,$curr_open,$paras);
933
934    } else {
935
936      # All non-magical codes!!!
937
938      # Here we start using $para_type for our own twisted purposes, to
939      #  mean how it should get treated, not as what the element name
940      #  should be.
941
942      DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n";
943
944      my $i;
945
946      # Enforce some =headN discipline
947      if($para_type =~ m/^=head\d$/s
948         and ! $self->{'accept_heads_anywhere'}
949         and @$curr_open
950         and $curr_open->[-1][0] eq '=over'
951      ) {
952        DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n";
953        $self->whine(
954          $para->[1]{'start_line'},
955          "You forgot a '=back' before '$para_type'"
956        );
957        unshift @$paras, ['=back', {}, ''], $para;   # close the =over
958        next;
959      }
960
961
962      if($para_type eq '=item') {
963
964        my $over;
965        unless(@$curr_open and
966               $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
967          $self->whine(
968            $para->[1]{'start_line'},
969            "'=item' outside of any '=over'"
970          );
971          unshift @$paras,
972            ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
973            $para
974          ;
975          next;
976        }
977
978
979        my $over_type = $over->[1]{'~type'};
980
981        if(!$over_type) {
982          # Shouldn't happen1
983          die "Typeless over in stack, starting at line "
984           . $over->[1]{'start_line'};
985
986        } elsif($over_type eq 'block') {
987          unless($curr_open->[-1][1]{'~bitched_about'}) {
988            $curr_open->[-1][1]{'~bitched_about'} = 1;
989            $self->whine(
990              $curr_open->[-1][1]{'start_line'},
991              "You can't have =items (as at line "
992              . $para->[1]{'start_line'}
993              . ") unless the first thing after the =over is an =item"
994            );
995          }
996          # Just turn it into a paragraph and reconsider it
997          $para->[0] = '~Para';
998          unshift @$paras, $para;
999          next;
1000
1001        } elsif($over_type eq 'text') {
1002          my $item_type = $self->_get_item_type($para);
1003            # That kills the content of the item if it's a number or bullet.
1004          DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1005
1006          if($item_type eq 'text') {
1007            # Nothing special needs doing for 'text'
1008          } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1009            $self->whine(
1010              $para->[1]{'start_line'},
1011              "Expected text after =item, not a $item_type"
1012            );
1013            # Undo our clobbering:
1014            push @$para, $para->[1]{'~orig_content'};
1015            delete $para->[1]{'number'};
1016             # Only a PROPER item-number element is allowed
1017             #  to have a number attribute.
1018          } else {
1019            die "Unhandled item type $item_type"; # should never happen
1020          }
1021
1022          # =item-text thingies don't need any assimilation, it seems.
1023
1024        } elsif($over_type eq 'number') {
1025          my $item_type = $self->_get_item_type($para);
1026            # That kills the content of the item if it's a number or bullet.
1027          DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1028
1029          my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1030
1031          if($item_type eq 'bullet') {
1032            # Hm, it's not numeric.  Correct for this.
1033            $para->[1]{'number'} = $expected_value;
1034            $self->whine(
1035              $para->[1]{'start_line'},
1036              "Expected '=item $expected_value'"
1037            );
1038            push @$para, $para->[1]{'~orig_content'};
1039              # restore the bullet, blocking the assimilation of next para
1040
1041          } elsif($item_type eq 'text') {
1042            # Hm, it's not numeric.  Correct for this.
1043            $para->[1]{'number'} = $expected_value;
1044            $self->whine(
1045              $para->[1]{'start_line'},
1046              "Expected '=item $expected_value'"
1047            );
1048            # Text content will still be there and will block next ~Para
1049
1050          } elsif($item_type ne 'number') {
1051            die "Unknown item type $item_type"; # should never happen
1052
1053          } elsif($expected_value == $para->[1]{'number'}) {
1054            DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
1055
1056          } else {
1057            DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
1058             " instead of the expected value of $expected_value\n";
1059            $self->whine(
1060              $para->[1]{'start_line'},
1061              "You have '=item " . $para->[1]{'number'} .
1062              "' instead of the expected '=item $expected_value'"
1063            );
1064            $para->[1]{'number'} = $expected_value;  # correcting!!
1065          }
1066
1067          if(@$para == 2) {
1068            # For the cases where we /didn't/ push to @$para
1069            if($paras->[0][0] eq '~Para') {
1070              DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1071              push @$para, splice @{shift @$paras},2;
1072            } else {
1073              DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1074              push @$para, '';  # Just so it's not contentless
1075            }
1076          }
1077
1078
1079        } elsif($over_type eq 'bullet') {
1080          my $item_type = $self->_get_item_type($para);
1081            # That kills the content of the item if it's a number or bullet.
1082          DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1083
1084          if($item_type eq 'bullet') {
1085            # as expected!
1086
1087            if( $para->[1]{'~_freaky_para_hack'} ) {
1088              DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
1089              push @$para, $para->[1]{'~_freaky_para_hack'};
1090            }
1091
1092          } elsif($item_type eq 'number') {
1093            $self->whine(
1094              $para->[1]{'start_line'},
1095              "Expected '=item *'"
1096            );
1097            push @$para, $para->[1]{'~orig_content'};
1098             # and block assimilation of the next paragraph
1099            delete $para->[1]{'number'};
1100             # Only a PROPER item-number element is allowed
1101             #  to have a number attribute.
1102          } elsif($item_type eq 'text') {
1103            $self->whine(
1104              $para->[1]{'start_line'},
1105              "Expected '=item *'"
1106            );
1107             # But doesn't need processing.  But it'll block assimilation
1108             #  of the next para.
1109          } else {
1110            die "Unhandled item type $item_type"; # should never happen
1111          }
1112
1113          if(@$para == 2) {
1114            # For the cases where we /didn't/ push to @$para
1115            if($paras->[0][0] eq '~Para') {
1116              DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1117              push @$para, splice @{shift @$paras},2;
1118            } else {
1119              DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1120              push @$para, '';  # Just so it's not contentless
1121            }
1122          }
1123
1124        } else {
1125          die "Unhandled =over type \"$over_type\"?";
1126          # Shouldn't happen!
1127        }
1128
1129        $para_type = 'Plain';
1130        $para->[0] .= '-' . $over_type;
1131        # Whew.  Now fall thru and process it.
1132
1133
1134      } elsif($para_type eq '=extend') {
1135        # Well, might as well implement it here.
1136        $self->_ponder_extend($para);
1137        next;  # and skip
1138      } elsif($para_type eq '=encoding') {
1139        # Not actually acted on here, but we catch errors here.
1140        $self->_handle_encoding_second_level($para);
1141        next unless $self->keep_encoding_directive;
1142        $para_type = 'Plain';
1143      } elsif($para_type eq '~Verbatim') {
1144        $para->[0] = 'Verbatim';
1145        $para_type = '?Verbatim';
1146      } elsif($para_type eq '~Para') {
1147        $para->[0] = 'Para';
1148        $para_type = '?Plain';
1149      } elsif($para_type eq 'Data') {
1150        $para->[0] = 'Data';
1151        $para_type = '?Data';
1152      } elsif( $para_type =~ s/^=//s
1153        and defined( $para_type = $self->{'accept_directives'}{$para_type} )
1154      ) {
1155        DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n";
1156      } else {
1157        # An unknown directive!
1158        $seen_legal_directive--;
1159        DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n",
1160         $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
1161        ;
1162        $self->whine(
1163          $para->[1]{'start_line'},
1164          "Unknown directive: $para->[0]"
1165        );
1166
1167        # And maybe treat it as text instead of just letting it go?
1168        next;
1169      }
1170
1171      if($para_type =~ s/^\?//s) {
1172        if(! @$curr_open) {  # usual case
1173          DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n";
1174        } else {
1175          my @fors = grep $_->[0] eq '=for', @$curr_open;
1176          DEBUG > 1 and print STDERR "Containing fors: ",
1177            join(',', map $_->[1]{'target'}, @fors), "\n";
1178
1179          if(! @fors) {
1180            DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n";
1181
1182          #} elsif(grep $_->[1]{'~resolve'}, @fors) {
1183          #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
1184          } elsif( $fors[-1][1]{'~resolve'} ) {
1185            # Look to the immediately containing for
1186
1187            if($para_type eq 'Data') {
1188              DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
1189              $para->[0] = 'Para';
1190              $para_type = 'Plain';
1191            } else {
1192              DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
1193            }
1194          } else {
1195            DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
1196            $para->[0] = $para_type = 'Data';
1197          }
1198        }
1199      }
1200
1201      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1202      if($para_type eq 'Plain') {
1203        $self->_ponder_Plain($para);
1204      } elsif($para_type eq 'Verbatim') {
1205        $self->_ponder_Verbatim($para);
1206      } elsif($para_type eq 'Data') {
1207        $self->_ponder_Data($para);
1208      } else {
1209        die "\$para type is $para_type -- how did that happen?";
1210        # Shouldn't happen.
1211      }
1212
1213      #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1214      $para->[0] =~ s/^[~=]//s;
1215
1216      DEBUG and print STDERR "\n", pretty($para), "\n";
1217
1218      # traverse the treelet (which might well be just one string scalar)
1219      $self->{'content_seen'} ||= 1 if   $seen_legal_directive
1220                                    && ! $self->{'~tried_gen_errata'};
1221      $self->_traverse_treelet_bit(@$para);
1222    }
1223  }
1224
1225  return;
1226}
1227
1228###########################################################################
1229# The sub-ponderers...
1230
1231
1232
1233sub _ponder_for {
1234  my ($self,$para,$curr_open,$paras) = @_;
1235
1236  # Fake it out as a begin/end
1237  my $target;
1238
1239  if(grep $_->[1]{'~ignore'}, @$curr_open) {
1240    DEBUG > 1 and print STDERR "Ignoring ignorable =for\n";
1241    return 1;
1242  }
1243
1244  for(my $i = 2; $i < @$para; ++$i) {
1245    if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
1246      $target = $1;
1247      last;
1248    }
1249  }
1250  unless(defined $target) {
1251    $self->whine(
1252      $para->[1]{'start_line'},
1253      "=for without a target?"
1254    );
1255    return 1;
1256  }
1257  DEBUG > 1 and
1258   print STDERR "Faking out a =for $target as a =begin $target / =end $target\n";
1259
1260  $para->[0] = 'Data';
1261
1262  unshift @$paras,
1263    ['=begin',
1264      {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1265      $target,
1266    ],
1267    $para,
1268    ['=end',
1269      {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1270      $target,
1271    ],
1272  ;
1273
1274  return 1;
1275}
1276
1277sub _ponder_begin {
1278  my ($self,$para,$curr_open,$paras) = @_;
1279  my $content = join ' ', splice @$para, 2;
1280  $content =~ s/^\s+//s;
1281  $content =~ s/\s+$//s;
1282  unless(length($content)) {
1283    $self->whine(
1284      $para->[1]{'start_line'},
1285      "=begin without a target?"
1286    );
1287    DEBUG and print STDERR "Ignoring targetless =begin\n";
1288    return 1;
1289  }
1290
1291  my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
1292  $para->[1]{'title'} = $title if ($title);
1293  $para->[1]{'target'} = $target;  # without any ':'
1294  $content = $target; # strip off the title
1295
1296  $content =~ s/^:!/!:/s;
1297  my $neg;  # whether this is a negation-match
1298  $neg = 1        if $content =~ s/^!//s;
1299  my $to_resolve;  # whether to process formatting codes
1300  $to_resolve = 1 if $content =~ s/^://s;
1301
1302  my $dont_ignore; # whether this target matches us
1303
1304  foreach my $target_name (
1305    split(',', $content, -1),
1306    $neg ? () : '*'
1307  ) {
1308    DEBUG > 2 and
1309     print STDERR " Considering whether =begin $content matches $target_name\n";
1310    next unless $self->{'accept_targets'}{$target_name};
1311
1312    DEBUG > 2 and
1313     print STDERR "  It DOES match the acceptable target $target_name!\n";
1314    $to_resolve = 1
1315      if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
1316    $dont_ignore = 1;
1317    $para->[1]{'target_matching'} = $target_name;
1318    last; # stop looking at other target names
1319  }
1320
1321  if($neg) {
1322    if( $dont_ignore ) {
1323      $dont_ignore = '';
1324      delete $para->[1]{'target_matching'};
1325      DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n";
1326    } else {
1327      $dont_ignore = 1;
1328      $para->[1]{'target_matching'} = '!';
1329      DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n";
1330    }
1331  }
1332
1333  $para->[0] = '=for';  # Just what we happen to call these, internally
1334  $para->[1]{'~really'} ||= '=begin';
1335  $para->[1]{'~ignore'}   = (! $dont_ignore) || 0;
1336  $para->[1]{'~resolve'}  = $to_resolve || 0;
1337
1338  DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '',
1339    "ignore contents of this region\n";
1340  DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ",
1341    ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1342  DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n";
1343
1344  push @$curr_open, $para;
1345  if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1346    DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n";
1347  } else {
1348    $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'};
1349    $self->_handle_element_start((my $scratch='for'), $para->[1]);
1350  }
1351
1352  return 1;
1353}
1354
1355sub _ponder_end {
1356  my ($self,$para,$curr_open,$paras) = @_;
1357  my $content = join ' ', splice @$para, 2;
1358  $content =~ s/^\s+//s;
1359  $content =~ s/\s+$//s;
1360  DEBUG and print STDERR "Ogling '=end $content' directive\n";
1361
1362  unless(length($content)) {
1363    $self->whine(
1364      $para->[1]{'start_line'},
1365      "'=end' without a target?" . (
1366        ( @$curr_open and $curr_open->[-1][0] eq '=for' )
1367        ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1368        : ''
1369      )
1370    );
1371    DEBUG and print STDERR "Ignoring targetless =end\n";
1372    return 1;
1373  }
1374
1375  unless($content =~ m/^\S+$/) {  # i.e., unless it's one word
1376    $self->whine(
1377      $para->[1]{'start_line'},
1378      "'=end $content' is invalid.  (Stack: "
1379      . $self->_dump_curr_open() . ')'
1380    );
1381    DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1382    return 1;
1383  }
1384
1385  unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1386    $self->whine(
1387      $para->[1]{'start_line'},
1388      "=end $content without matching =begin.  (Stack: "
1389      . $self->_dump_curr_open() . ')'
1390    );
1391    DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1392    return 1;
1393  }
1394
1395  unless($content eq $curr_open->[-1][1]{'target'}) {
1396    $self->whine(
1397      $para->[1]{'start_line'},
1398      "=end $content doesn't match =begin "
1399      . $curr_open->[-1][1]{'target'}
1400      . ".  (Stack: "
1401      . $self->_dump_curr_open() . ')'
1402    );
1403    DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1404    return 1;
1405  }
1406
1407  # Else it's okay to close...
1408  if(grep $_->[1]{'~ignore'}, @$curr_open) {
1409    DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n";
1410    # And that may be because of this to-be-closed =for region, or some
1411    #  other one, but it doesn't matter.
1412  } else {
1413    $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1414      # what's that for?
1415
1416    $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'};
1417    $self->_handle_element_end( my $scratch = 'for', $para->[1]);
1418  }
1419  DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1420  pop @$curr_open;
1421
1422  return 1;
1423}
1424
1425sub _ponder_doc_end {
1426  my ($self,$para,$curr_open,$paras) = @_;
1427  if(@$curr_open) { # Deal with things left open
1428    DEBUG and print STDERR "Stack is nonempty at end-document: (",
1429      $self->_dump_curr_open(), ")\n";
1430
1431    DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
1432    unshift @$paras, $self->_closers_for_all_curr_open;
1433    # Make sure there is exactly one ~end in the parastack, at the end:
1434    @$paras = grep $_->[0] ne '~end', @$paras;
1435    push @$paras, $para, $para;
1436     # We need two -- once for the next cycle where we
1437     #  generate errata, and then another to be at the end
1438     #  when that loop back around to process the errata.
1439    return 1;
1440
1441  } else {
1442    DEBUG and print STDERR "Okay, stack is empty now.\n";
1443  }
1444
1445  # Try generating errata section, if applicable
1446  unless($self->{'~tried_gen_errata'}) {
1447    $self->{'~tried_gen_errata'} = 1;
1448    my @extras = $self->_gen_errata();
1449    if(@extras) {
1450      unshift @$paras, @extras;
1451      DEBUG and print STDERR "Generated errata... relooping...\n";
1452      return 1;  # I.e., loop around again to process these fake-o paragraphs
1453    }
1454  }
1455
1456  splice @$paras; # Well, that's that for this paragraph buffer.
1457  DEBUG and print STDERR "Throwing end-document event.\n";
1458
1459  $self->_handle_element_end( my $scratch = 'Document' );
1460  return 1; # Hasta la byebye
1461}
1462
1463sub _ponder_pod {
1464  my ($self,$para,$curr_open,$paras) = @_;
1465  $self->whine(
1466    $para->[1]{'start_line'},
1467    "=pod directives shouldn't be over one line long!  Ignoring all "
1468     . (@$para - 2) . " lines of content"
1469  ) if @$para > 3;
1470
1471  # Content ignored unless 'pod_handler' is set
1472  if (my $pod_handler = $self->{'pod_handler'}) {
1473      my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1474      $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1475      $pod_handler->($line, $line_num, $self);
1476  }
1477
1478  # The surrounding methods set content_seen, so let us remain consistent.
1479  # I do not know why it was not here before -- should it not be here?
1480  # $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'};
1481
1482  return;
1483}
1484
1485sub _ponder_over {
1486  my ($self,$para,$curr_open,$paras) = @_;
1487  return 1 unless @$paras;
1488  my $list_type;
1489
1490  if($paras->[0][0] eq '=item') { # most common case
1491    $list_type = $self->_get_initial_item_type($paras->[0]);
1492
1493  } elsif($paras->[0][0] eq '=back') {
1494    # Ignore empty lists by default
1495    if ($self->{'parse_empty_lists'}) {
1496      $list_type = 'empty';
1497    } else {
1498      shift @$paras;
1499      return 1;
1500    }
1501  } elsif($paras->[0][0] eq '~end') {
1502    $self->whine(
1503      $para->[1]{'start_line'},
1504      "=over is the last thing in the document?!"
1505    );
1506    return 1; # But feh, ignore it.
1507  } else {
1508    $list_type = 'block';
1509  }
1510  $para->[1]{'~type'} = $list_type;
1511  push @$curr_open, $para;
1512   # yes, we reuse the paragraph as a stack item
1513
1514  my $content = join ' ', splice @$para, 2;
1515  $para->[1]{'~orig_content'} = $content;
1516  my $overness;
1517  if($content =~ m/^\s*$/s) {
1518    $para->[1]{'indent'} = 4;
1519  } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1520    no integer;
1521    $para->[1]{'indent'} = $1;
1522    if($1 == 0) {
1523      $self->whine(
1524        $para->[1]{'start_line'},
1525        "Can't have a 0 in =over $content"
1526      );
1527      $para->[1]{'indent'} = 4;
1528    }
1529  } else {
1530    $self->whine(
1531      $para->[1]{'start_line'},
1532      "=over should be: '=over' or '=over positive_number'"
1533    );
1534    $para->[1]{'indent'} = 4;
1535  }
1536  DEBUG > 1 and print STDERR "=over found of type $list_type\n";
1537
1538  $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'};
1539  $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1540
1541  return;
1542}
1543
1544sub _ponder_back {
1545  my ($self,$para,$curr_open,$paras) = @_;
1546  # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1547
1548  my $content = join ' ', splice @$para, 2;
1549  if($content =~ m/\S/) {
1550    $self->whine(
1551      $para->[1]{'start_line'},
1552      "=back doesn't take any parameters, but you said =back $content"
1553    );
1554  }
1555
1556  if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1557    DEBUG > 1 and print STDERR "=back happily closes matching =over\n";
1558    # Expected case: we're closing the most recently opened thing
1559    #my $over = pop @$curr_open;
1560    $self->{'content_seen'} ||= 1 unless $self->{'~tried_gen_errata'};
1561    $self->_handle_element_end( my $scratch =
1562      'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1563    );
1564  } else {
1565    DEBUG > 1 and print STDERR "=back found without a matching =over.  Stack: (",
1566        join(', ', map $_->[0], @$curr_open), ").\n";
1567    $self->whine(
1568      $para->[1]{'start_line'},
1569      '=back without =over'
1570    );
1571    return 1; # and ignore it
1572  }
1573}
1574
1575sub _ponder_item {
1576  my ($self,$para,$curr_open,$paras) = @_;
1577  my $over;
1578  unless(@$curr_open and
1579         $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1580    $self->whine(
1581      $para->[1]{'start_line'},
1582      "'=item' outside of any '=over'"
1583    );
1584    unshift @$paras,
1585      ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1586      $para
1587    ;
1588    return 1;
1589  }
1590
1591
1592  my $over_type = $over->[1]{'~type'};
1593
1594  if(!$over_type) {
1595    # Shouldn't happen1
1596    die "Typeless over in stack, starting at line "
1597     . $over->[1]{'start_line'};
1598
1599  } elsif($over_type eq 'block') {
1600    unless($curr_open->[-1][1]{'~bitched_about'}) {
1601      $curr_open->[-1][1]{'~bitched_about'} = 1;
1602      $self->whine(
1603        $curr_open->[-1][1]{'start_line'},
1604        "You can't have =items (as at line "
1605        . $para->[1]{'start_line'}
1606        . ") unless the first thing after the =over is an =item"
1607      );
1608    }
1609    # Just turn it into a paragraph and reconsider it
1610    $para->[0] = '~Para';
1611    unshift @$paras, $para;
1612    return 1;
1613
1614  } elsif($over_type eq 'text') {
1615    my $item_type = $self->_get_item_type($para);
1616      # That kills the content of the item if it's a number or bullet.
1617    DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1618
1619    if($item_type eq 'text') {
1620      # Nothing special needs doing for 'text'
1621    } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1622      $self->whine(
1623          $para->[1]{'start_line'},
1624          "Expected text after =item, not a $item_type"
1625      );
1626      # Undo our clobbering:
1627      push @$para, $para->[1]{'~orig_content'};
1628      delete $para->[1]{'number'};
1629       # Only a PROPER item-number element is allowed
1630       #  to have a number attribute.
1631    } else {
1632      die "Unhandled item type $item_type"; # should never happen
1633    }
1634
1635    # =item-text thingies don't need any assimilation, it seems.
1636
1637  } elsif($over_type eq 'number') {
1638    my $item_type = $self->_get_item_type($para);
1639      # That kills the content of the item if it's a number or bullet.
1640    DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1641
1642    my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1643
1644    if($item_type eq 'bullet') {
1645      # Hm, it's not numeric.  Correct for this.
1646      $para->[1]{'number'} = $expected_value;
1647      $self->whine(
1648        $para->[1]{'start_line'},
1649        "Expected '=item $expected_value'"
1650      );
1651      push @$para, $para->[1]{'~orig_content'};
1652        # restore the bullet, blocking the assimilation of next para
1653
1654    } elsif($item_type eq 'text') {
1655      # Hm, it's not numeric.  Correct for this.
1656      $para->[1]{'number'} = $expected_value;
1657      $self->whine(
1658        $para->[1]{'start_line'},
1659        "Expected '=item $expected_value'"
1660      );
1661      # Text content will still be there and will block next ~Para
1662
1663    } elsif($item_type ne 'number') {
1664      die "Unknown item type $item_type"; # should never happen
1665
1666    } elsif($expected_value == $para->[1]{'number'}) {
1667      DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
1668
1669    } else {
1670      DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
1671       " instead of the expected value of $expected_value\n";
1672      $self->whine(
1673        $para->[1]{'start_line'},
1674        "You have '=item " . $para->[1]{'number'} .
1675        "' instead of the expected '=item $expected_value'"
1676      );
1677      $para->[1]{'number'} = $expected_value;  # correcting!!
1678    }
1679
1680    if(@$para == 2) {
1681      # For the cases where we /didn't/ push to @$para
1682      if($paras->[0][0] eq '~Para') {
1683        DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1684        push @$para, splice @{shift @$paras},2;
1685      } else {
1686        DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1687        push @$para, '';  # Just so it's not contentless
1688      }
1689    }
1690
1691
1692  } elsif($over_type eq 'bullet') {
1693    my $item_type = $self->_get_item_type($para);
1694      # That kills the content of the item if it's a number or bullet.
1695    DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1696
1697    if($item_type eq 'bullet') {
1698      # as expected!
1699
1700      if( $para->[1]{'~_freaky_para_hack'} ) {
1701        DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
1702        push @$para, $para->[1]{'~_freaky_para_hack'};
1703      }
1704
1705    } elsif($item_type eq 'number') {
1706      $self->whine(
1707        $para->[1]{'start_line'},
1708        "Expected '=item *'"
1709      );
1710      push @$para, $para->[1]{'~orig_content'};
1711       # and block assimilation of the next paragraph
1712      delete $para->[1]{'number'};
1713       # Only a PROPER item-number element is allowed
1714       #  to have a number attribute.
1715    } elsif($item_type eq 'text') {
1716      $self->whine(
1717        $para->[1]{'start_line'},
1718        "Expected '=item *'"
1719      );
1720       # But doesn't need processing.  But it'll block assimilation
1721       #  of the next para.
1722    } else {
1723      die "Unhandled item type $item_type"; # should never happen
1724    }
1725
1726    if(@$para == 2) {
1727      # For the cases where we /didn't/ push to @$para
1728      if($paras->[0][0] eq '~Para') {
1729        DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1730        push @$para, splice @{shift @$paras},2;
1731      } else {
1732        DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1733        push @$para, '';  # Just so it's not contentless
1734      }
1735    }
1736
1737  } else {
1738    die "Unhandled =over type \"$over_type\"?";
1739    # Shouldn't happen!
1740  }
1741  $para->[0] .= '-' . $over_type;
1742
1743  return;
1744}
1745
1746sub _ponder_Plain {
1747  my ($self,$para) = @_;
1748  DEBUG and print STDERR " giving plain treatment...\n";
1749  unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1750    or $para->[1]{'~cooked'}
1751  ) {
1752    push @$para,
1753    @{$self->_make_treelet(
1754      join("\n", splice(@$para, 2)),
1755      $para->[1]{'start_line'}
1756    )};
1757  }
1758  # Empty paragraphs don't need a treelet for any reason I can see.
1759  # And precooked paragraphs already have a treelet.
1760  return;
1761}
1762
1763sub _ponder_Verbatim {
1764  my ($self,$para) = @_;
1765  DEBUG and print STDERR " giving verbatim treatment...\n";
1766
1767  $para->[1]{'xml:space'} = 'preserve';
1768
1769  unless ($self->{'_output_is_for_JustPod'}) {
1770    # Fix illegal settings for expand_verbatim_tabs()
1771    # This is because this module doesn't do input error checking, but khw
1772    # doesn't want to add yet another instance of that.
1773    my $tab_width = $self->expand_verbatim_tabs;
1774    $tab_width = $self->expand_verbatim_tabs(8)
1775        if ! defined $tab_width
1776        ||   $tab_width =~ /\D/;
1777
1778    my $indent = $self->strip_verbatim_indent;
1779    if ($indent && ref $indent eq 'CODE') {
1780        my @shifted = (shift @{$para}, shift @{$para});
1781        $indent = $indent->($para);
1782        unshift @{$para}, @shifted;
1783    }
1784
1785    for(my $i = 2; $i < @$para; $i++) {
1786      foreach my $line ($para->[$i]) { # just for aliasing
1787        # Strip indentation.
1788        $line =~ s/^\Q$indent// if $indent;
1789        next unless $tab_width;
1790
1791            # This is commented out because of github issue #85, and the
1792            # current maintainers don't know why it was there in the first
1793            # place.
1794            #&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1795        while( $line =~
1796          # Sort of adapted from Text::Tabs.
1797          s/^([^\t]*)(\t+)/$1.(" " x ((length($2)
1798                                       * $tab_width)
1799                                       -(length($1) % $tab_width)))/e
1800        ) {}
1801
1802        # TODO: whinge about (or otherwise treat) unindented or overlong lines
1803
1804      }
1805    }
1806  }
1807
1808  # Now the VerbatimFormatted hoodoo...
1809  if( $self->{'accept_codes'} and
1810      $self->{'accept_codes'}{'VerbatimFormatted'}
1811  ) {
1812    while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1813     # Kill any number of terminal newlines
1814    $self->_verbatim_format($para);
1815  } elsif ($self->{'codes_in_verbatim'}) {
1816    push @$para,
1817    @{$self->_make_treelet(
1818      join("\n", splice(@$para, 2)),
1819      $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1820    )};
1821    $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1822  } else {
1823    push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1824    $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1825  }
1826  return;
1827}
1828
1829sub _ponder_Data {
1830  my ($self,$para) = @_;
1831  DEBUG and print STDERR " giving data treatment...\n";
1832  $para->[1]{'xml:space'} = 'preserve';
1833  push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1834  return;
1835}
1836
1837
1838
1839
1840###########################################################################
1841
1842sub _traverse_treelet_bit {  # for use only by the routine above
1843  my($self, $name) = splice @_,0,2;
1844
1845  my $scratch;
1846  $self->_handle_element_start(($scratch=$name), shift @_);
1847
1848  while (@_) {
1849    my $x = shift;
1850    if (ref($x)) {
1851      &_traverse_treelet_bit($self, @$x);
1852    } else {
1853      $x .= shift while @_ && !ref($_[0]);
1854      $self->_handle_text($x);
1855    }
1856  }
1857
1858  $self->_handle_element_end($scratch=$name);
1859  return;
1860}
1861
1862#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1863
1864sub _closers_for_all_curr_open {
1865  my $self = $_[0];
1866  my @closers;
1867  foreach my $still_open (@{  $self->{'curr_open'} || return  }) {
1868    my @copy = @$still_open;
1869    $copy[1] = {%{ $copy[1] }};
1870    #$copy[1]{'start_line'} = -1;
1871    if($copy[0] eq '=for') {
1872      $copy[0] = '=end';
1873    } elsif($copy[0] eq '=over') {
1874      $self->whine(
1875        $still_open->[1]{start_line} ,
1876        "=over without closing =back"
1877      );
1878
1879      $copy[0] = '=back';
1880    } else {
1881      die "I don't know how to auto-close an open $copy[0] region";
1882    }
1883
1884    unless( @copy > 2 ) {
1885      push @copy, $copy[1]{'target'};
1886      $copy[-1] = '' unless defined $copy[-1];
1887       # since =over's don't have targets
1888    }
1889
1890    $copy[1]{'fake-closer'} = 1;
1891
1892    DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n";
1893    unshift @closers, \@copy;
1894  }
1895  return @closers;
1896}
1897
1898#--------------------------------------------------------------------------
1899
1900sub _verbatim_format {
1901  my($it, $p) = @_;
1902
1903  my $formatting;
1904
1905  for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1906    DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n";
1907    $p->[$i] .= "\n";
1908     # Unlike with simple Verbatim blocks, we don't end up just doing
1909     # a join("\n", ...) on the contents, so we have to append a
1910     # newline to every line, and then nix the last one later.
1911  }
1912
1913  if( DEBUG > 4 ) {
1914    print STDERR "<<\n";
1915    for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1916      print STDERR "_verbatim_format $i: $p->[$i]";
1917    }
1918    print STDERR ">>\n";
1919  }
1920
1921  for(my $i = $#$p; $i > 2; $i--) {
1922    # work backwards over the lines, except the first (#2)
1923
1924    #next unless $p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s
1925    #        and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1926     # look at a formatty line preceding a nonformatty one
1927    DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n";
1928    if($p->[$i]   =~ m{^#:([ \^\/\%]*)\n?$}s) {
1929      DEBUG > 5 and print STDERR "  It's a formatty line.  ",
1930       "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1931
1932      if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1933        DEBUG > 5 and print STDERR "  Previous line is formatty!  Skipping this one.\n";
1934        next;
1935      } else {
1936        DEBUG > 5 and print STDERR "  Previous line is non-formatty!  Yay!\n";
1937      }
1938    } else {
1939      DEBUG > 5 and print STDERR "  It's not a formatty line.  Ignoring\n";
1940      next;
1941    }
1942
1943    # A formatty line has to have #: in the first two columns, and uses
1944    # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1945    # Example:
1946    #   What do you want?  i like pie. [or whatever]
1947    # #:^^^^^^^^^^^^^^^^^              /////////////
1948
1949
1950    DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1951
1952    $formatting = '  ' . $1;
1953    $formatting =~ s/\s+$//s; # nix trailing whitespace
1954    unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1955      splice @$p,$i,1; # remove this line
1956      $i--; # don't consider next line
1957      next;
1958    }
1959
1960    if( length($formatting) >= length($p->[$i-1]) ) {
1961      $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1962    } else {
1963      $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1964    }
1965    # Make $formatting and the previous line be exactly the same length,
1966    # with $formatting having a " " as the last character.
1967
1968    DEBUG > 4 and print STDERR "Formatting <$formatting>    on <", $p->[$i-1], ">\n";
1969
1970
1971    my @new_line;
1972    while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1973      #print STDERR "Format matches $1\n";
1974
1975      if($2) {
1976        #print STDERR "SKIPPING <$2>\n";
1977        push @new_line,
1978          substr($p->[$i-1], pos($formatting)-length($1), length($1));
1979      } else {
1980        #print STDERR "SNARING $+\n";
1981        push @new_line, [
1982          (
1983            $3 ? 'VerbatimB'  :
1984            $4 ? 'VerbatimI'  :
1985            $5 ? 'VerbatimBI' : die("Should never get called")
1986          ), {},
1987          substr($p->[$i-1], pos($formatting)-length($1), length($1))
1988        ];
1989        #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1990      }
1991    }
1992    my @nixed =
1993      splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1994    DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
1995
1996    DEBUG > 6 and print STDERR "New version of the above line is these tokens (",
1997      scalar(@new_line), "):",
1998      map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1999    $i--; # So the next line we scrutinize is the line before the one
2000          #  that we just went and formatted
2001  }
2002
2003  $p->[0] = 'VerbatimFormatted';
2004
2005  # Collapse adjacent text nodes, just for kicks.
2006  for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
2007    if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
2008      DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
2009      $p->[$i] .= splice @$p, $i+1, 1; # merge
2010      --$i;  # and back up
2011    }
2012  }
2013
2014  # Now look for the last text token, and remove the terminal newline
2015  for( my $i = $#$p; $i >= 2; $i-- ) {
2016    # work backwards over the tokens, even the first
2017    if( !ref($p->[$i]) ) {
2018      if($p->[$i] =~ s/\n$//s) {
2019        DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
2020      } else {
2021        DEBUG > 5 and print STDERR
2022         "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
2023      }
2024      last; # we only want the next one
2025    }
2026  }
2027
2028  return;
2029}
2030
2031
2032#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2033
2034
2035sub _treelet_from_formatting_codes {
2036  # Given a paragraph, returns a treelet.  Full of scary tokenizing code.
2037  #  Like [ '~Top', {'start_line' => $start_line},
2038  #            "I like ",
2039  #            [ 'B', {}, "pie" ],
2040  #            "!"
2041  #       ]
2042  # This illustrates the general format of a treelet.  It is an array:
2043  #     [0]       is a scalar indicating its type.  In the example above, the
2044  #               types are '~Top' and 'B'
2045  #     [1]       is a hash of various flags about it, possibly empty
2046  #     [2] - [N] are an ordered list of the subcomponents of the treelet.
2047  #               Scalars are literal text, refs are sub-treelets, to
2048  #               arbitrary levels.  Stringifying a treelet will recursively
2049  #               stringify the sub-treelets, concatentating everything
2050  #               together to form the exact text of the treelet.
2051
2052  my($self, $para, $start_line, $preserve_space) = @_;
2053
2054  my $treelet = ['~Top', {'start_line' => $start_line},];
2055
2056  unless ($preserve_space || $self->{'preserve_whitespace'}) {
2057    $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
2058    $para =~ s/ $//;
2059    $para =~ s/^ //;
2060  }
2061
2062  # Only apparent problem the above code is that N<<  >> turns into
2063  # N<< >>.  But then, word wrapping does that too!  So don't do that!
2064
2065
2066  # As a Start-code is encountered, the number of opening bracket '<'
2067  # characters minus 1 is pushed onto @stack (so 0 means a single bracket,
2068  # etc).  When closing brackets are found in the text, at least this number
2069  # (plus the 1) will be required to mean the Start-code is terminated.  When
2070  # those are found, @stack is popped.
2071  my @stack;
2072
2073  my @lineage = ($treelet);
2074  my $raw = ''; # raw content of L<> fcode before splitting/processing
2075    # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
2076    # into just 1 ' '. Is this the regex's doing or 'raw's?  Answer is it's
2077    # the 'collapse and trim all whitespace first' lines just above.
2078  my $inL = 0;
2079
2080  DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
2081
2082  # Here begins our frightening tokenizer RE.  The following regex matches
2083  # text in four main parts:
2084  #
2085  #  * Start-codes.  The first alternative matches C< or C<<, the latter
2086  #    followed by some whitespace.  $1 will hold the entire start code
2087  #    (including any space following a multiple-angle-bracket delimiter),
2088  #    and $2 will hold only the additional brackets past the first in a
2089  #    multiple-bracket delimiter.  length($2) + 1 will be the number of
2090  #    closing brackets we have to find.
2091  #
2092  #  * Closing brackets.  Match some amount of whitespace followed by
2093  #    multiple close brackets.  The logic to see if this closes anything
2094  #    is down below.  Note that in order to parse C<<  >> correctly, we
2095  #    have to use look-behind (?<=\s\s), since the match of the starting
2096  #    code will have consumed the whitespace.
2097  #
2098  #  * A single closing bracket, to close a simple code like C<>.
2099  #
2100  #  * Something that isn't a start or end code.  We have to be careful
2101  #    about accepting whitespace, since perlpodspec says that any whitespace
2102  #    before a multiple-bracket closing delimiter should be ignored.
2103  #
2104  while($para =~
2105    m/\G
2106      (?:
2107        # Match starting codes, including the whitespace following a
2108        # multiple-delimiter start code.  $1 gets the whole start code and
2109        # $2 gets all but one of the <s in the multiple-bracket case.
2110        ([A-Z]<(?:(<+)\s+)?)
2111        |
2112        # Match multiple-bracket end codes.  $3 gets the whitespace that
2113        # should be discarded before an end bracket but kept in other cases
2114        # and $4 gets the end brackets themselves.  ($3 can be empty if the
2115        # construct is empty, like C<<  >>, and all the white-space has been
2116        # gobbled up already, considered to be space after the opening
2117        # bracket.  In this case we use look-behind to verify that there are
2118        # at least 2 spaces in a row before the ">".)
2119        (\s+|(?<=\s\s))(>{2,})
2120        |
2121        (\s?>)          # $5: simple end-codes
2122        |
2123        (               # $6: stuff containing no start-codes or end-codes
2124          (?:
2125            [^A-Z\s>]
2126            |
2127            (?:
2128              [A-Z](?!<)
2129            )
2130            |
2131            # whitespace is ok, but we don't want to eat the whitespace before
2132            # a multiple-bracket end code.
2133            # NOTE: we may still have problems with e.g. S<<    >>
2134            (?:
2135              \s(?!\s*>{2,})
2136            )
2137          )+
2138        )
2139      )
2140    /xgo
2141  ) {
2142    DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
2143    if(defined $1) {
2144      my $bracket_count;    # How many '<<<' in a row this has.  Needed for
2145                            # Pod::Simple::JustPod
2146      if(defined $2) {
2147        DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
2148        $bracket_count = length($2) + 1;
2149        push @stack, $bracket_count; # length of the necessary complex
2150                                     # end-code string
2151      } else {
2152        DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
2153        push @stack, 0;  # signal that we're looking for simple
2154        $bracket_count = 1;
2155      }
2156      my $code = substr($1,0,1);
2157      if ('L' eq $code) {
2158        if ($inL) {
2159            $raw .= $1;
2160            $self->scream( $start_line,
2161                           'Nested L<> are illegal.  Pretending inner one is '
2162                         . 'X<...> so can continue looking for other errors.');
2163            $code = "X";
2164        }
2165        else {
2166            $raw = ""; # reset raw content accumulator
2167            $inL = @stack;
2168        }
2169      } else {
2170        $raw .= $1 if $inL;
2171      }
2172      push @lineage, [ $code, {}, ];  # new node object
2173
2174      # Tell Pod::Simple::JustPod how many brackets there were, but to save
2175      # space, not in the most usual case of there was just 1.  It can be
2176      # inferred by the absence of this element.  Similarly, if there is more
2177      # than one bracket, extract the white space between the final bracket
2178      # and the real beginning of the interior.  Save that if it isn't just a
2179      # single space
2180      if ($self->{'_output_is_for_JustPod'} && $bracket_count > 1) {
2181        $lineage[-1][1]{'~bracket_count'} = $bracket_count;
2182        my $lspacer = substr($1, 1 + $bracket_count);
2183        $lineage[-1][1]{'~lspacer'} = $lspacer if $lspacer ne " ";
2184      }
2185      push @{ $lineage[-2] }, $lineage[-1];
2186    } elsif(defined $4) {
2187      DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n";
2188      # This is where it gets messy...
2189      if(! @stack) {
2190        # We saw " >>>>" but needed nothing.  This is ALL just stuff then.
2191        DEBUG > 4 and print STDERR " But it's really just stuff.\n";
2192        push @{ $lineage[-1] }, $3, $4;
2193        next;
2194      } elsif(!$stack[-1]) {
2195        # We saw " >>>>" but needed only ">".  Back pos up.
2196        DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n";
2197        push @{ $lineage[-1] }, $3; # That was a for-real space, too.
2198        pos($para) = pos($para) - length($4) + 1;
2199      } elsif($stack[-1] == length($4)) {
2200        # We found " >>>>", and it was exactly what we needed.  Commonest case.
2201        DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n";
2202      } elsif($stack[-1] < length($4)) {
2203        # We saw " >>>>" but needed only " >>".  Back pos up.
2204        DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n";
2205        pos($para) = pos($para) - length($4) + $stack[-1];
2206      } else {
2207        # We saw " >>>>" but needed " >>>>>>".  So this is all just stuff!
2208        DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n";
2209        push @{ $lineage[-1] }, $3, $4;
2210        next;
2211      }
2212      #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
2213
2214      if ($3 ne " " && $self->{'_output_is_for_JustPod'}) {
2215        if ($3 ne "") {
2216          $lineage[-1][1]{'~rspacer'} = $3;
2217        }
2218        elsif ($lineage[-1][1]{'~lspacer'} eq "  ") {
2219
2220          # Here we had something like C<<  >> which was a false positive
2221          delete $lineage[-1][1]{'~lspacer'};
2222        }
2223        else {
2224          $lineage[-1][1]{'~rspacer'}
2225                                = substr($lineage[-1][1]{'~lspacer'}, -1, 1);
2226          chop $lineage[-1][1]{'~lspacer'};
2227        }
2228      }
2229
2230      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
2231      # Keep the element from being childless
2232
2233      if ($inL == @stack) {
2234        $lineage[-1][1]{'raw'} = $raw;
2235        $inL = 0;
2236      }
2237
2238      pop @stack;
2239      pop @lineage;
2240
2241      $raw .= $3.$4 if $inL;
2242
2243    } elsif(defined $5) {
2244      DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n";
2245
2246      if(@stack and ! $stack[-1]) {
2247        # We're indeed expecting a simple end-code
2248        DEBUG > 4 and print STDERR " It's indeed an end-code.\n";
2249
2250        if(length($5) == 2) { # There was a space there: " >"
2251          push @{ $lineage[-1] }, ' ';
2252        } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
2253          push @{ $lineage[-1] }, ''; # keep it from being really childless
2254        }
2255
2256        if ($inL == @stack) {
2257          $lineage[-1][1]{'raw'} = $raw;
2258          $inL = 0;
2259        }
2260
2261        pop @stack;
2262        pop @lineage;
2263      } else {
2264        DEBUG > 4 and print STDERR " It's just stuff.\n";
2265        push @{ $lineage[-1] }, $5;
2266      }
2267
2268      $raw .= $5 if $inL;
2269
2270    } elsif(defined $6) {
2271      DEBUG > 3 and print STDERR "Found stuff \"$6\"\n";
2272      push @{ $lineage[-1] }, $6;
2273      $raw .= $6 if $inL;
2274        # XXX does not capture multiplace whitespaces -- 'raw' ends up with
2275        #     at most 1 leading/trailing whitespace, why not all of it?
2276        #     Answer, because we deliberately trimmed it above
2277
2278    } else {
2279      # should never ever ever ever happen
2280      DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n";
2281      die "SPORK 512512!";
2282    }
2283  }
2284
2285  if(@stack) { # Uhoh, some sequences weren't closed.
2286    my $x= "...";
2287    while(@stack) {
2288      push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
2289      # Hmmmmm!
2290
2291      my $code         = (pop @lineage)->[0];
2292      my $ender_length =  pop @stack;
2293      if($ender_length) {
2294        --$ender_length;
2295        $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
2296      } else {
2297        $x = $code . "<$x>";
2298      }
2299    }
2300    DEBUG > 1 and print STDERR "Unterminated $x sequence\n";
2301    $self->whine($start_line,
2302      "Unterminated $x sequence",
2303    );
2304  }
2305
2306  return $treelet;
2307}
2308
2309#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2310
2311sub text_content_of_treelet {  # method: $parser->text_content_of_treelet($lol)
2312  return stringify_lol($_[1]);
2313}
2314
2315sub stringify_lol {  # function: stringify_lol($lol)
2316  my $string_form = '';
2317  _stringify_lol( $_[0] => \$string_form );
2318  return $string_form;
2319}
2320
2321sub _stringify_lol {  # the real recursor
2322  my($lol, $to) = @_;
2323  for(my $i = 2; $i < @$lol; ++$i) {
2324    if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
2325      _stringify_lol( $lol->[$i], $to);  # recurse!
2326    } else {
2327      $$to .= $lol->[$i];
2328    }
2329  }
2330  return;
2331}
2332
2333#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2334
2335sub _dump_curr_open { # return a string representation of the stack
2336  my $curr_open = $_[0]{'curr_open'};
2337
2338  return '[empty]' unless @$curr_open;
2339  return join '; ',
2340    map {;
2341           ($_->[0] eq '=for')
2342             ? ( ($_->[1]{'~really'} || '=over')
2343               . ' ' . $_->[1]{'target'})
2344             : $_->[0]
2345        }
2346    @$curr_open
2347  ;
2348}
2349
2350###########################################################################
2351my %pretty_form = (
2352  "\a" => '\a', # ding!
2353  "\b" => '\b', # BS
2354  "\e" => '\e', # ESC
2355  "\f" => '\f', # FF
2356  "\t" => '\t', # tab
2357  "\cm" => '\cm',
2358  "\cj" => '\cj',
2359  "\n" => '\n', # probably overrides one of either \cm or \cj
2360  '"' => '\"',
2361  '\\' => '\\\\',
2362  '$' => '\\$',
2363  '@' => '\\@',
2364  '%' => '\\%',
2365  '#' => '\\#',
2366);
2367
2368sub pretty { # adopted from Class::Classless
2369  # Not the most brilliant routine, but passable.
2370  # Don't give it a cyclic data structure!
2371  my @stuff = @_; # copy
2372  my $x;
2373  my $out =
2374    # join ",\n" .
2375    join ", ",
2376    map {;
2377    if(!defined($_)) {
2378      "undef";
2379    } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
2380      $x = "[ " . pretty(@$_) . " ]" ;
2381      $x;
2382    } elsif(ref($_) eq 'SCALAR') {
2383      $x = "\\" . pretty($$_) ;
2384      $x;
2385    } elsif(ref($_) eq 'HASH') {
2386      my $hr = $_;
2387      $x = "{" . join(", ",
2388        map(pretty($_) . '=>' . pretty($hr->{$_}),
2389            sort keys %$hr ) ) . "}" ;
2390      $x;
2391    } elsif(!length($_)) { q{''} # empty string
2392    } elsif(
2393      $_ eq '0' # very common case
2394      or(
2395         m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
2396         and $_ ne '-0' # the strange case that RE lets thru
2397      )
2398    ) { $_;
2399    } else {
2400        # Yes, explicitly name every character desired. There are shorcuts one
2401        # could make, but I (Karl Williamson) was afraid that some Perl
2402        # releases would have bugs in some of them. For example [A-Z] works
2403        # even on EBCDIC platforms to match exactly the 26 uppercase English
2404        # letters, but I don't know if it has always worked without bugs. It
2405        # seemed safest just to list the characters.
2406        # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
2407        s<([^ !"#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
2408         <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
2409         #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
2410      qq{"$_"};
2411    }
2412  } @stuff;
2413  # $out =~ s/\n */ /g if length($out) < 75;
2414  return $out;
2415}
2416
2417#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2418
2419# A rather unsubtle method of blowing away all the state information
2420# from a parser object so it can be reused. Provided as a utility for
2421# backward compatibility in Pod::Man, etc. but not recommended for
2422# general use.
2423
2424sub reinit {
2425  my $self = shift;
2426  foreach (qw(source_dead source_filename doc_has_started
2427start_of_pod_block content_seen last_was_blank paras curr_open
2428line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen
2429Title)) {
2430
2431    delete $self->{$_};
2432  }
2433}
2434
2435#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
24361;
2437
2438