1#############################################################################
2# Pod/Checker.pm -- check pod documents for syntax errors
3#
4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Checker;
11use strict;
12
13use vars qw($VERSION @ISA @EXPORT %VALID_COMMANDS %VALID_SEQUENCES);
14$VERSION = '1.60';  ## Current version of this package
15require  5.005;    ## requires this Perl version or later
16
17use Pod::ParseUtils; ## for hyperlinks and lists
18
19=head1 NAME
20
21Pod::Checker, podchecker() - check pod documents for syntax errors
22
23=head1 SYNOPSIS
24
25  use Pod::Checker;
26
27  $num_errors = podchecker($filepath, $outputpath, %options);
28
29  my $checker = new Pod::Checker %options;
30  $checker->parse_from_file($filepath, \*STDERR);
31
32=head1 OPTIONS/ARGUMENTS
33
34C<$filepath> is the input POD to read and C<$outputpath> is
35where to write POD syntax error messages. Either argument may be a scalar
36indicating a file-path, or else a reference to an open filehandle.
37If unspecified, the input-file it defaults to C<\*STDIN>, and
38the output-file defaults to C<\*STDERR>.
39
40=head2 podchecker()
41
42This function can take a hash of options:
43
44=over 4
45
46=item B<-warnings> =E<gt> I<val>
47
48Turn warnings on/off. I<val> is usually 1 for on, but higher values
49trigger additional warnings. See L<"Warnings">.
50
51=back
52
53=head1 DESCRIPTION
54
55B<podchecker> will perform syntax checking of Perl5 POD format documentation.
56
57Curious/ambitious users are welcome to propose additional features they wish
58to see in B<Pod::Checker> and B<podchecker> and verify that the checks are
59consistent with L<perlpod>.
60
61The following checks are currently performed:
62
63=over 4
64
65=item *
66
67Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences,
68and unterminated interior sequences.
69
70=item *
71
72Check for proper balancing of C<=begin> and C<=end>. The contents of such
73a block are generally ignored, i.e. no syntax checks are performed.
74
75=item *
76
77Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
78
79=item *
80
81Check for same nested interior-sequences (e.g.
82C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
83
84=item *
85
86Check for malformed or non-existing entities C<EE<lt>...E<gt>>.
87
88=item *
89
90Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod>
91for details.
92
93=item *
94
95Check for unresolved document-internal links. This check may also reveal
96misspelled links that seem to be internal links but should be links
97to something else.
98
99=back
100
101=head1 DIAGNOSTICS
102
103=head2 Errors
104
105=over 4
106
107=item * empty =headn
108
109A heading (C<=head1> or C<=head2>) without any text? That ain't no
110heading!
111
112=item * =over on line I<N> without closing =back
113
114The C<=over> command does not have a corresponding C<=back> before the
115next heading (C<=head1> or C<=head2>) or the end of the file.
116
117=item * =item without previous =over
118
119=item * =back without previous =over
120
121An C<=item> or C<=back> command has been found outside a
122C<=over>/C<=back> block.
123
124=item * No argument for =begin
125
126A C<=begin> command was found that is not followed by the formatter
127specification.
128
129=item * =end without =begin
130
131A standalone C<=end> command was found.
132
133=item * Nested =begin's
134
135There were at least two consecutive C<=begin> commands without
136the corresponding C<=end>. Only one C<=begin> may be active at
137a time.
138
139=item * =for without formatter specification
140
141There is no specification of the formatter after the C<=for> command.
142
143=item * Apparent command =foo not preceded by blank line
144
145A command which has ended up in the middle of a paragraph or other command,
146such as
147
148  =item one
149  =item two <-- bad
150
151=item * unresolved internal link I<NAME>
152
153The given link to I<NAME> does not have a matching node in the current
154POD. This also happened when a single word node name is not enclosed in
155C<"">.
156
157=item * Unknown command "I<CMD>"
158
159An invalid POD command has been found. Valid are C<=head1>, C<=head2>,
160C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>,
161C<=for>, C<=pod>, C<=cut>
162
163=item * Unknown interior-sequence "I<SEQ>"
164
165An invalid markup command has been encountered. Valid are:
166C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>,
167C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>,
168C<ZE<lt>E<gt>>
169
170=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt>
171
172Two nested identical markup commands have been found. Generally this
173does not make sense.
174
175=item * garbled entity I<STRING>
176
177The I<STRING> found cannot be interpreted as a character entity.
178
179=item * Entity number out of range
180
181An entity specified by number (dec, hex, oct) is out of range (1-255).
182
183=item * malformed link LE<lt>E<gt>
184
185The link found cannot be parsed because it does not conform to the
186syntax described in L<perlpod>.
187
188=item * nonempty ZE<lt>E<gt>
189
190The C<ZE<lt>E<gt>> sequence is supposed to be empty.
191
192=item * empty XE<lt>E<gt>
193
194The index entry specified contains nothing but whitespace.
195
196=item * Spurious text after =pod / =cut
197
198The commands C<=pod> and C<=cut> do not take any arguments.
199
200=item * Spurious =cut command
201
202A C<=cut> command was found without a preceding POD paragraph.
203
204=item * Spurious =pod command
205
206A C<=pod> command was found after a preceding POD paragraph.
207
208=item * Spurious character(s) after =back
209
210The C<=back> command does not take any arguments.
211
212=back
213
214=head2 Warnings
215
216These may not necessarily cause trouble, but indicate mediocre style.
217
218=over 4
219
220=item * multiple occurrence of link target I<name>
221
222The POD file has some C<=item> and/or C<=head> commands that have
223the same text. Potential hyperlinks to such a text cannot be unique then.
224This warning is printed only with warning level greater than one.
225
226=item * line containing nothing but whitespace in paragraph
227
228There is some whitespace on a seemingly empty line. POD is very sensitive
229to such things, so this is flagged. B<vi> users switch on the B<list>
230option to avoid this problem.
231
232=begin _disabled_
233
234=item * file does not start with =head
235
236The file starts with a different POD directive than head.
237This is most probably something you do not want.
238
239=end _disabled_
240
241=item * previous =item has no contents
242
243There is a list C<=item> right above the flagged line that has no
244text contents. You probably want to delete empty items.
245
246=item * preceding non-item paragraph(s)
247
248A list introduced by C<=over> starts with a text or verbatim paragraph,
249but continues with C<=item>s. Move the non-item paragraph out of the
250C<=over>/C<=back> block.
251
252=item * =item type mismatch (I<one> vs. I<two>)
253
254A list started with e.g. a bullet-like C<=item> and continued with a
255numbered one. This is obviously inconsistent. For most translators the
256type of the I<first> C<=item> determines the type of the list.
257
258=item * I<N> unescaped C<E<lt>E<gt>> in paragraph
259
260Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>>
261can potentially cause errors as they could be misinterpreted as
262markup commands. This is only printed when the -warnings level is
263greater than 1.
264
265=item * Unknown entity
266
267A character entity was found that does not belong to the standard
268ISO set or the POD specials C<verbar> and C<sol>.
269
270=item * No items in =over
271
272The list opened with C<=over> does not contain any items.
273
274=item * No argument for =item
275
276C<=item> without any parameters is deprecated. It should either be followed
277by C<*> to indicate an unordered list, by a number (optionally followed
278by a dot) to indicate an ordered (numbered) list or simple text for a
279definition list.
280
281=item * empty section in previous paragraph
282
283The previous section (introduced by a C<=head> command) does not contain
284any text. This usually indicates that something is missing. Note: A
285C<=head1> followed immediately by C<=head2> does not trigger this warning.
286
287=item * Verbatim paragraph in NAME section
288
289The NAME section (C<=head1 NAME>) should consist of a single paragraph
290with the script/module name, followed by a dash `-' and a very short
291description of what the thing is good for.
292
293=item * =headI<n> without preceding higher level
294
295For example if there is a C<=head2> in the POD file prior to a
296C<=head1>.
297
298=back
299
300=head2 Hyperlinks
301
302There are some warnings with respect to malformed hyperlinks:
303
304=over 4
305
306=item * ignoring leading/trailing whitespace in link
307
308There is whitespace at the beginning or the end of the contents of
309LE<lt>...E<gt>.
310
311=item * (section) in '$page' deprecated
312
313There is a section detected in the page name of LE<lt>...E<gt>, e.g.
314C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only.
315Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able
316to expand this to appropriate code. For links to (builtin) functions,
317please say C<LE<lt>perlfunc/mkdirE<gt>>, without ().
318
319=item * alternative text/node '%s' contains non-escaped | or /
320
321The characters C<|> and C</> are special in the LE<lt>...E<gt> context.
322Although the hyperlink parser does its best to determine which "/" is
323text and which is a delimiter in case of doubt, one ought to escape
324these literal characters like this:
325
326  /     E<sol>
327  |     E<verbar>
328
329=back
330
331=head1 RETURN VALUE
332
333B<podchecker> returns the number of POD syntax errors found or -1 if
334there were no POD commands at all found in the file.
335
336=head1 EXAMPLES
337
338See L</SYNOPSIS>
339
340=head1 INTERFACE
341
342While checking, this module collects document properties, e.g. the nodes
343for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>).
344POD translators can use this feature to syntax-check and get the nodes in
345a first pass before actually starting to convert. This is expensive in terms
346of execution time, but allows for very robust conversions.
347
348Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror>
349method to print errors and warnings. The summary output (e.g.
350"Pod syntax OK") has been dropped from the module and has been included in
351B<podchecker> (the script). This allows users of B<Pod::Checker> to
352control completely the output behavior. Users of B<podchecker> (the script)
353get the well-known behavior.
354
355=cut
356
357#############################################################################
358
359#use diagnostics;
360use Carp qw(croak);
361use Exporter;
362use Pod::Parser;
363
364@ISA = qw(Pod::Parser);
365@EXPORT = qw(&podchecker);
366
367my %VALID_COMMANDS = (
368    'pod'    =>  1,
369    'cut'    =>  1,
370    'head1'  =>  1,
371    'head2'  =>  1,
372    'head3'  =>  1,
373    'head4'  =>  1,
374    'over'   =>  1,
375    'back'   =>  1,
376    'item'   =>  1,
377    'for'    =>  1,
378    'begin'  =>  1,
379    'end'    =>  1,
380    'encoding' =>  1,
381);
382
383my %VALID_SEQUENCES = (
384    'I'  =>  1,
385    'B'  =>  1,
386    'S'  =>  1,
387    'C'  =>  1,
388    'L'  =>  1,
389    'F'  =>  1,
390    'X'  =>  1,
391    'Z'  =>  1,
392    'E'  =>  1,
393);
394
395# stolen from HTML::Entities
396my %ENTITIES = (
397 # Some normal chars that have special meaning in SGML context
398 amp    => '&',  # ampersand
399'gt'    => '>',  # greater than
400'lt'    => '<',  # less than
401 quot   => '"',  # double quote
402
403 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
404 AElig  => '�',  # capital AE diphthong (ligature)
405 Aacute => '�',  # capital A, acute accent
406 Acirc  => '�',  # capital A, circumflex accent
407 Agrave => '�',  # capital A, grave accent
408 Aring  => '�',  # capital A, ring
409 Atilde => '�',  # capital A, tilde
410 Auml   => '�',  # capital A, dieresis or umlaut mark
411 Ccedil => '�',  # capital C, cedilla
412 ETH    => '�',  # capital Eth, Icelandic
413 Eacute => '�',  # capital E, acute accent
414 Ecirc  => '�',  # capital E, circumflex accent
415 Egrave => '�',  # capital E, grave accent
416 Euml   => '�',  # capital E, dieresis or umlaut mark
417 Iacute => '�',  # capital I, acute accent
418 Icirc  => '�',  # capital I, circumflex accent
419 Igrave => '�',  # capital I, grave accent
420 Iuml   => '�',  # capital I, dieresis or umlaut mark
421 Ntilde => '�',  # capital N, tilde
422 Oacute => '�',  # capital O, acute accent
423 Ocirc  => '�',  # capital O, circumflex accent
424 Ograve => '�',  # capital O, grave accent
425 Oslash => '�',  # capital O, slash
426 Otilde => '�',  # capital O, tilde
427 Ouml   => '�',  # capital O, dieresis or umlaut mark
428 THORN  => '�',  # capital THORN, Icelandic
429 Uacute => '�',  # capital U, acute accent
430 Ucirc  => '�',  # capital U, circumflex accent
431 Ugrave => '�',  # capital U, grave accent
432 Uuml   => '�',  # capital U, dieresis or umlaut mark
433 Yacute => '�',  # capital Y, acute accent
434 aacute => '�',  # small a, acute accent
435 acirc  => '�',  # small a, circumflex accent
436 aelig  => '�',  # small ae diphthong (ligature)
437 agrave => '�',  # small a, grave accent
438 aring  => '�',  # small a, ring
439 atilde => '�',  # small a, tilde
440 auml   => '�',  # small a, dieresis or umlaut mark
441 ccedil => '�',  # small c, cedilla
442 eacute => '�',  # small e, acute accent
443 ecirc  => '�',  # small e, circumflex accent
444 egrave => '�',  # small e, grave accent
445 eth    => '�',  # small eth, Icelandic
446 euml   => '�',  # small e, dieresis or umlaut mark
447 iacute => '�',  # small i, acute accent
448 icirc  => '�',  # small i, circumflex accent
449 igrave => '�',  # small i, grave accent
450 iuml   => '�',  # small i, dieresis or umlaut mark
451 ntilde => '�',  # small n, tilde
452 oacute => '�',  # small o, acute accent
453 ocirc  => '�',  # small o, circumflex accent
454 ograve => '�',  # small o, grave accent
455 oslash => '�',  # small o, slash
456 otilde => '�',  # small o, tilde
457 ouml   => '�',  # small o, dieresis or umlaut mark
458 szlig  => '�',  # small sharp s, German (sz ligature)
459 thorn  => '�',  # small thorn, Icelandic
460 uacute => '�',  # small u, acute accent
461 ucirc  => '�',  # small u, circumflex accent
462 ugrave => '�',  # small u, grave accent
463 uuml   => '�',  # small u, dieresis or umlaut mark
464 yacute => '�',  # small y, acute accent
465 yuml   => '�',  # small y, dieresis or umlaut mark
466
467 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
468 copy   => '�',  # copyright sign
469 reg    => '�',  # registered sign
470 nbsp   => "\240", # non breaking space
471
472 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
473 iexcl  => '�',
474 cent   => '�',
475 pound  => '�',
476 curren => '�',
477 yen    => '�',
478 brvbar => '�',
479 sect   => '�',
480 uml    => '�',
481 ordf   => '�',
482 laquo  => '�',
483'not'   => '�',    # not is a keyword in perl
484 shy    => '�',
485 macr   => '�',
486 deg    => '�',
487 plusmn => '�',
488 sup1   => '�',
489 sup2   => '�',
490 sup3   => '�',
491 acute  => '�',
492 micro  => '�',
493 para   => '�',
494 middot => '�',
495 cedil  => '�',
496 ordm   => '�',
497 raquo  => '�',
498 frac14 => '�',
499 frac12 => '�',
500 frac34 => '�',
501 iquest => '�',
502'times' => '�',    # times is a keyword in perl
503 divide => '�',
504
505# some POD special entities
506 verbar => '|',
507 sol => '/'
508);
509
510##---------------------------------------------------------------------------
511
512##---------------------------------
513## Function definitions begin here
514##---------------------------------
515
516sub podchecker {
517    my ($infile, $outfile, %options) = @_;
518    local $_;
519
520    ## Set defaults
521    $infile  ||= \*STDIN;
522    $outfile ||= \*STDERR;
523
524    ## Now create a pod checker
525    my $checker = new Pod::Checker(%options);
526
527    ## Now check the pod document for errors
528    $checker->parse_from_file($infile, $outfile);
529
530    ## Return the number of errors found
531    return $checker->num_errors();
532}
533
534##---------------------------------------------------------------------------
535
536##-------------------------------
537## Method definitions begin here
538##-------------------------------
539
540##################################
541
542=over 4
543
544=item C<Pod::Checker-E<gt>new( %options )>
545
546Return a reference to a new Pod::Checker object that inherits from
547Pod::Parser and is used for calling the required methods later. The
548following options are recognized:
549
550C<-warnings =E<gt> num>
551  Print warnings if C<num> is true. The higher the value of C<num>,
552the more warnings are printed. Currently there are only levels 1 and 2.
553
554C<-quiet =E<gt> num>
555  If C<num> is true, do not print any errors/warnings. This is useful
556when Pod::Checker is used to munge POD code into plain text from within
557POD formatters.
558
559=cut
560
561## sub new {
562##     my $this = shift;
563##     my $class = ref($this) || $this;
564##     my %params = @_;
565##     my $self = {%params};
566##     bless $self, $class;
567##     $self->initialize();
568##     return $self;
569## }
570
571sub initialize {
572    my $self = shift;
573    ## Initialize number of errors, and setup an error function to
574    ## increment this number and then print to the designated output.
575    $self->{_NUM_ERRORS} = 0;
576    $self->{_NUM_WARNINGS} = 0;
577    $self->{-quiet} ||= 0;
578    # set the error handling subroutine
579    $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror');
580    $self->{_commands} = 0; # total number of POD commands encountered
581    $self->{_list_stack} = []; # stack for nested lists
582    $self->{_have_begin} = ''; # stores =begin
583    $self->{_links} = []; # stack for internal hyperlinks
584    $self->{_nodes} = []; # stack for =head/=item nodes
585    $self->{_index} = []; # text in X<>
586    # print warnings?
587    $self->{-warnings} = 1 unless(defined $self->{-warnings});
588    $self->{_current_head1} = ''; # the current =head1 block
589    $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings});
590}
591
592##################################
593
594=item C<$checker-E<gt>poderror( @args )>
595
596=item C<$checker-E<gt>poderror( {%opts}, @args )>
597
598Internal method for printing errors and warnings. If no options are
599given, simply prints "@_". The following options are recognized and used
600to form the output:
601
602  -msg
603
604A message to print prior to C<@args>.
605
606  -line
607
608The line number the error occurred in.
609
610  -file
611
612The file (name) the error occurred in.
613
614  -severity
615
616The error level, should be 'WARNING' or 'ERROR'.
617
618=cut
619
620# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
621sub poderror {
622    my $self = shift;
623    my %opts = (ref $_[0]) ? %{shift()} : ();
624
625    ## Retrieve options
626    chomp( my $msg  = ($opts{-msg} || '')."@_" );
627    my $line = (exists $opts{-line}) ? " at line $opts{-line}" : '';
628    my $file = (exists $opts{-file}) ? " in file $opts{-file}" : '';
629    unless (exists $opts{-severity}) {
630       ## See if can find severity in message prefix
631       $opts{-severity} = $1  if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// );
632    }
633    my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : '';
634
635    ## Increment error count and print message "
636    ++($self->{_NUM_ERRORS})
637        if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
638    ++($self->{_NUM_WARNINGS})
639        if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING'));
640    unless($self->{-quiet}) {
641      my $out_fh = $self->output_handle() || \*STDERR;
642      print $out_fh ($severity, $msg, $line, $file, "\n")
643        if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING');
644    }
645}
646
647##################################
648
649=item C<$checker-E<gt>num_errors()>
650
651Set (if argument specified) and retrieve the number of errors found.
652
653=cut
654
655sub num_errors {
656   return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
657}
658
659##################################
660
661=item C<$checker-E<gt>num_warnings()>
662
663Set (if argument specified) and retrieve the number of warnings found.
664
665=cut
666
667sub num_warnings {
668   return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS};
669}
670
671##################################
672
673=item C<$checker-E<gt>name()>
674
675Set (if argument specified) and retrieve the canonical name of POD as
676found in the C<=head1 NAME> section.
677
678=cut
679
680sub name {
681    return (@_ > 1 && $_[1]) ?
682        ($_[0]->{-name} = $_[1]) : $_[0]->{-name};
683}
684
685##################################
686
687=item C<$checker-E<gt>node()>
688
689Add (if argument specified) and retrieve the nodes (as defined by C<=headX>
690and C<=item>) of the current POD. The nodes are returned in the order of
691their occurrence. They consist of plain text, each piece of whitespace is
692collapsed to a single blank.
693
694=cut
695
696sub node {
697    my ($self,$text) = @_;
698    if(defined $text) {
699        $text =~ s/\s+$//s; # strip trailing whitespace
700        $text =~ s/\s+/ /gs; # collapse whitespace
701        # add node, order important!
702        push(@{$self->{_nodes}}, $text);
703        # keep also a uniqueness counter
704        $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
705        return $text;
706    }
707    @{$self->{_nodes}};
708}
709
710##################################
711
712=item C<$checker-E<gt>idx()>
713
714Add (if argument specified) and retrieve the index entries (as defined by
715C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece
716of whitespace is collapsed to a single blank.
717
718=cut
719
720# set/return index entries of current POD
721sub idx {
722    my ($self,$text) = @_;
723    if(defined $text) {
724        $text =~ s/\s+$//s; # strip trailing whitespace
725        $text =~ s/\s+/ /gs; # collapse whitespace
726        # add node, order important!
727        push(@{$self->{_index}}, $text);
728        # keep also a uniqueness counter
729        $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s);
730        return $text;
731    }
732    @{$self->{_index}};
733}
734
735##################################
736
737=item C<$checker-E<gt>hyperlink()>
738
739Add (if argument specified) and retrieve the hyperlinks (as defined by
740C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line
741number and C<Pod::Hyperlink> object.
742
743=back
744
745=cut
746
747# set/return hyperlinks of the current POD
748sub hyperlink {
749    my $self = shift;
750    if($_[0]) {
751        push(@{$self->{_links}}, $_[0]);
752        return $_[0];
753    }
754    @{$self->{_links}};
755}
756
757## overrides for Pod::Parser
758
759sub end_pod {
760    ## Do some final checks and
761    ## print the number of errors found
762    my $self   = shift;
763    my $infile = $self->input_file();
764
765    if(@{$self->{_list_stack}}) {
766        my $list;
767        while(($list = $self->_close_list('EOF',$infile)) &&
768          $list->indent() ne 'auto') {
769            $self->poderror({ -line => 'EOF', -file => $infile,
770                -severity => 'ERROR', -msg => '=over on line ' .
771                $list->start() . ' without closing =back' });
772        }
773    }
774
775    # check validity of document internal hyperlinks
776    # first build the node names from the paragraph text
777    my %nodes;
778    foreach($self->node()) {
779        $nodes{$_} = 1;
780        if(/^(\S+)\s+\S/) {
781            # we have more than one word. Use the first as a node, too.
782            # This is used heavily in perlfunc.pod
783            $nodes{$1} ||= 2; # derived node
784        }
785    }
786    foreach($self->idx()) {
787        $nodes{$_} = 3; # index node
788    }
789    foreach($self->hyperlink()) {
790        my ($line,$link) = @$_;
791        # _TODO_ what if there is a link to the page itself by the name,
792        # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION">
793        if($link->node() && !$link->page() && $link->type() ne 'hyperlink') {
794            my $node = $self->_check_ptree($self->parse_text($link->node(),
795                $line), $line, $infile, 'L');
796            if($node && !$nodes{$node}) {
797                $self->poderror({ -line => $line || '', -file => $infile,
798                    -severity => 'ERROR',
799                    -msg => "unresolved internal link '$node'"});
800            }
801        }
802    }
803
804    # check the internal nodes for uniqueness. This pertains to
805    # =headX, =item and X<...>
806    if($self->{-warnings} && $self->{-warnings}>1) {
807      foreach(grep($self->{_unique_nodes}->{$_} > 1,
808        keys %{$self->{_unique_nodes}})) {
809          $self->poderror({ -line => '-', -file => $infile,
810            -severity => 'WARNING',
811            -msg => "multiple occurrence of link target '$_'"});
812      }
813    }
814
815    # no POD found here
816    $self->num_errors(-1) if($self->{_commands} == 0);
817}
818
819# check a POD command directive
820sub command {
821    my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
822    my ($file, $line) = $pod_para->file_line;
823    ## Check the command syntax
824    my $arg; # this will hold the command argument
825    if (! $VALID_COMMANDS{$cmd}) {
826       $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
827                         -msg => "Unknown command '$cmd'" });
828    }
829    else { # found a valid command
830        $self->{_commands}++; # delete this line if below is enabled again
831
832	$self->_commands_in_paragraphs($paragraph, $pod_para);
833
834        ##### following check disabled due to strong request
835        #if(!$self->{_commands}++ && $cmd !~ /^head/) {
836        #    $self->poderror({ -line => $line, -file => $file,
837        #         -severity => 'WARNING',
838        #         -msg => "file does not start with =head" });
839        #}
840
841        # check syntax of particular command
842        if($cmd eq 'over') {
843            # check for argument
844            $arg = $self->interpolate_and_check($paragraph, $line,$file);
845            my $indent = 4; # default
846            if($arg && $arg =~ /^\s*(\d+)\s*$/) {
847                $indent = $1;
848            }
849            # start a new list
850            $self->_open_list($indent,$line,$file);
851        }
852        elsif($cmd eq 'item') {
853            # are we in a list?
854            unless(@{$self->{_list_stack}}) {
855                $self->poderror({ -line => $line, -file => $file,
856                     -severity => 'ERROR',
857                     -msg => '=item without previous =over' });
858                # auto-open in case we encounter many more
859                $self->_open_list('auto',$line,$file);
860            }
861            my $list = $self->{_list_stack}->[0];
862            # check whether the previous item had some contents
863            if(defined $self->{_list_item_contents} &&
864              $self->{_list_item_contents} == 0) {
865                $self->poderror({ -line => $line, -file => $file,
866                     -severity => 'WARNING',
867                     -msg => 'previous =item has no contents' });
868            }
869            if($list->{_has_par}) {
870                $self->poderror({ -line => $line, -file => $file,
871                     -severity => 'WARNING',
872                     -msg => 'preceding non-item paragraph(s)' });
873                delete $list->{_has_par};
874            }
875            # check for argument
876            $arg = $self->interpolate_and_check($paragraph, $line, $file);
877            if($arg && $arg =~ /(\S+)/) {
878                $arg =~ s/[\s\n]+$//;
879                my $type;
880                if($arg =~ /^[*]\s*(\S*.*)/) {
881                  $type = 'bullet';
882                  $self->{_list_item_contents} = $1 ? 1 : 0;
883                  $arg = $1;
884                }
885                elsif($arg =~ /^\d+\.?\s+(\S*)/) {
886                  $type = 'number';
887                  $self->{_list_item_contents} = $1 ? 1 : 0;
888                  $arg = $1;
889                }
890                else {
891                  $type = 'definition';
892                  $self->{_list_item_contents} = 1;
893                }
894                my $first = $list->type();
895                if($first && $first ne $type) {
896                    $self->poderror({ -line => $line, -file => $file,
897                       -severity => 'WARNING',
898                       -msg => "=item type mismatch ('$first' vs. '$type')"});
899                }
900                else { # first item
901                    $list->type($type);
902                }
903            }
904            else {
905                $self->poderror({ -line => $line, -file => $file,
906                     -severity => 'WARNING',
907                     -msg => 'No argument for =item' });
908                $arg = ' '; # empty
909                $self->{_list_item_contents} = 0;
910            }
911            # add this item
912            $list->item($arg);
913            # remember this node
914            $self->node($arg);
915        }
916        elsif($cmd eq 'back') {
917            # check if we have an open list
918            unless(@{$self->{_list_stack}}) {
919                $self->poderror({ -line => $line, -file => $file,
920                         -severity => 'ERROR',
921                         -msg => '=back without previous =over' });
922            }
923            else {
924                # check for spurious characters
925                $arg = $self->interpolate_and_check($paragraph, $line,$file);
926                if($arg && $arg =~ /\S/) {
927                    $self->poderror({ -line => $line, -file => $file,
928                         -severity => 'ERROR',
929                         -msg => 'Spurious character(s) after =back' });
930                }
931                # close list
932                my $list = $self->_close_list($line,$file);
933                # check for empty lists
934                if(!$list->item() && $self->{-warnings}) {
935                    $self->poderror({ -line => $line, -file => $file,
936                         -severity => 'WARNING',
937                         -msg => 'No items in =over (at line ' .
938                         $list->start() . ') / =back list'});
939                }
940            }
941        }
942        elsif($cmd =~ /^head(\d+)/) {
943            my $hnum = $1;
944            $self->{"_have_head_$hnum"}++; # count head types
945            if($hnum > 1 && !$self->{'_have_head_'.($hnum -1)}) {
946              $self->poderror({ -line => $line, -file => $file,
947                   -severity => 'WARNING',
948                   -msg => "=head$hnum without preceding higher level"});
949            }
950            # check whether the previous =head section had some contents
951            if(defined $self->{_commands_in_head} &&
952              $self->{_commands_in_head} == 0 &&
953              defined $self->{_last_head} &&
954              $self->{_last_head} >= $hnum) {
955                $self->poderror({ -line => $line, -file => $file,
956                     -severity => 'WARNING',
957                     -msg => 'empty section in previous paragraph'});
958            }
959            $self->{_commands_in_head} = -1;
960            $self->{_last_head} = $hnum;
961            # check if there is an open list
962            if(@{$self->{_list_stack}}) {
963                my $list;
964                while(($list = $self->_close_list($line,$file)) &&
965                  $list->indent() ne 'auto') {
966                    $self->poderror({ -line => $line, -file => $file,
967                         -severity => 'ERROR',
968                         -msg => '=over on line '. $list->start() .
969                         " without closing =back (at $cmd)" });
970                }
971            }
972            # remember this node
973            $arg = $self->interpolate_and_check($paragraph, $line,$file);
974            $arg =~ s/[\s\n]+$//s;
975            $self->node($arg);
976            unless(length($arg)) {
977                $self->poderror({ -line => $line, -file => $file,
978                     -severity => 'ERROR',
979                     -msg => "empty =$cmd"});
980            }
981            if($cmd eq 'head1') {
982                $self->{_current_head1} = $arg;
983            } else {
984                $self->{_current_head1} = '';
985            }
986        }
987        elsif($cmd eq 'begin') {
988            if($self->{_have_begin}) {
989                # already have a begin
990                $self->poderror({ -line => $line, -file => $file,
991                     -severity => 'ERROR',
992                     -msg => q{Nested =begin's (first at line } .
993                     $self->{_have_begin} . ')'});
994            }
995            else {
996                # check for argument
997                $arg = $self->interpolate_and_check($paragraph, $line,$file);
998                unless($arg && $arg =~ /(\S+)/) {
999                    $self->poderror({ -line => $line, -file => $file,
1000                         -severity => 'ERROR',
1001                         -msg => 'No argument for =begin'});
1002                }
1003                # remember the =begin
1004                $self->{_have_begin} = "$line:$1";
1005            }
1006        }
1007        elsif($cmd eq 'end') {
1008            if($self->{_have_begin}) {
1009                # close the existing =begin
1010                $self->{_have_begin} = '';
1011                # check for spurious characters
1012                $arg = $self->interpolate_and_check($paragraph, $line,$file);
1013                # the closing argument is optional
1014                #if($arg && $arg =~ /\S/) {
1015                #    $self->poderror({ -line => $line, -file => $file,
1016                #         -severity => 'WARNING',
1017                #         -msg => "Spurious character(s) after =end" });
1018                #}
1019            }
1020            else {
1021                # don't have a matching =begin
1022                $self->poderror({ -line => $line, -file => $file,
1023                     -severity => 'ERROR',
1024                     -msg => '=end without =begin' });
1025            }
1026        }
1027        elsif($cmd eq 'for') {
1028            unless($paragraph =~ /\s*(\S+)\s*/) {
1029                $self->poderror({ -line => $line, -file => $file,
1030                     -severity => 'ERROR',
1031                     -msg => '=for without formatter specification' });
1032            }
1033            $arg = ''; # do not expand paragraph below
1034        }
1035        elsif($cmd =~ /^(pod|cut)$/) {
1036            # check for argument
1037            $arg = $self->interpolate_and_check($paragraph, $line,$file);
1038            if($arg && $arg =~ /(\S+)/) {
1039                $self->poderror({ -line => $line, -file => $file,
1040                      -severity => 'ERROR',
1041                      -msg => "Spurious text after =$cmd"});
1042            }
1043	    if($cmd eq 'cut' && (!$self->{_PREVIOUS} || $self->{_PREVIOUS} eq 'cut')) {
1044                $self->poderror({ -line => $line, -file => $file,
1045                      -severity => 'ERROR',
1046                      -msg => "Spurious =cut command"});
1047	    }
1048	    if($cmd eq 'pod' && $self->{_PREVIOUS} && $self->{_PREVIOUS} ne 'cut') {
1049                $self->poderror({ -line => $line, -file => $file,
1050                      -severity => 'ERROR',
1051                      -msg => "Spurious =pod command"});
1052	    }
1053        }
1054    $self->{_commands_in_head}++;
1055    ## Check the interior sequences in the command-text
1056    $self->interpolate_and_check($paragraph, $line,$file)
1057        unless(defined $arg);
1058    }
1059}
1060
1061sub _open_list
1062{
1063    my ($self,$indent,$line,$file) = @_;
1064    my $list = Pod::List->new(
1065           -indent => $indent,
1066           -start => $line,
1067           -file => $file);
1068    unshift(@{$self->{_list_stack}}, $list);
1069    undef $self->{_list_item_contents};
1070    $list;
1071}
1072
1073sub _close_list
1074{
1075    my ($self,$line,$file) = @_;
1076    my $list = shift(@{$self->{_list_stack}});
1077    if(defined $self->{_list_item_contents} &&
1078      $self->{_list_item_contents} == 0) {
1079        $self->poderror({ -line => $line, -file => $file,
1080            -severity => 'WARNING',
1081            -msg => 'previous =item has no contents' });
1082    }
1083    undef $self->{_list_item_contents};
1084    $list;
1085}
1086
1087# process a block of some text
1088sub interpolate_and_check {
1089    my ($self, $paragraph, $line, $file) = @_;
1090    ## Check the interior sequences in the command-text
1091    # and return the text
1092    $self->_check_ptree(
1093        $self->parse_text($paragraph,$line), $line, $file, '');
1094}
1095
1096sub _check_ptree {
1097    my ($self,$ptree,$line,$file,$nestlist) = @_;
1098    local($_);
1099    my $text = '';
1100    # process each node in the parse tree
1101    foreach(@$ptree) {
1102        # regular text chunk
1103        unless(ref) {
1104            # count the unescaped angle brackets
1105            # complain only when warning level is greater than 1
1106            if($self->{-warnings} && $self->{-warnings}>1) {
1107              my $count;
1108              if($count = tr/<>/<>/) {
1109                $self->poderror({ -line => $line, -file => $file,
1110                     -severity => 'WARNING',
1111                     -msg => "$count unescaped <> in paragraph" });
1112                }
1113            }
1114            $text .= $_;
1115            next;
1116        }
1117        # have an interior sequence
1118        my $cmd = $_->cmd_name();
1119        my $contents = $_->parse_tree();
1120        ($file,$line) = $_->file_line();
1121        # check for valid tag
1122        if (! $VALID_SEQUENCES{$cmd}) {
1123            $self->poderror({ -line => $line, -file => $file,
1124                 -severity => 'ERROR',
1125                 -msg => qq(Unknown interior-sequence '$cmd')});
1126            # expand it anyway
1127            $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1128            next;
1129        }
1130        if(index($nestlist, $cmd) != -1) {
1131            $self->poderror({ -line => $line, -file => $file,
1132                 -severity => 'WARNING',
1133                 -msg => "nested commands $cmd<...$cmd<...>...>"});
1134            # _TODO_ should we add the contents anyway?
1135            # expand it anyway, see below
1136        }
1137        if($cmd eq 'E') {
1138            # preserve entities
1139            if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
1140                $self->poderror({ -line => $line, -file => $file,
1141                    -severity => 'ERROR',
1142                    -msg => 'garbled entity ' . $_->raw_text()});
1143                next;
1144            }
1145            my $ent = $$contents[0];
1146            my $val;
1147            if($ent =~ /^0x[0-9a-f]+$/i) {
1148                # hexadec entity
1149                $val = hex($ent);
1150            }
1151            elsif($ent =~ /^0\d+$/) {
1152                # octal
1153                $val = oct($ent);
1154            }
1155            elsif($ent =~ /^\d+$/) {
1156                # numeric entity
1157                $val = $ent;
1158            }
1159            if(defined $val) {
1160                if($val>0 && $val<256) {
1161                    $text .= chr($val);
1162                }
1163                else {
1164                    $self->poderror({ -line => $line, -file => $file,
1165                        -severity => 'ERROR',
1166                        -msg => 'Entity number out of range ' . $_->raw_text()});
1167                }
1168            }
1169            elsif($ENTITIES{$ent}) {
1170                # known ISO entity
1171                $text .= $ENTITIES{$ent};
1172            }
1173            else {
1174                $self->poderror({ -line => $line, -file => $file,
1175                    -severity => 'WARNING',
1176                    -msg => 'Unknown entity ' . $_->raw_text()});
1177                $text .= "E<$ent>";
1178            }
1179        }
1180        elsif($cmd eq 'L') {
1181            # try to parse the hyperlink
1182            my $link = Pod::Hyperlink->new($contents->raw_text());
1183            unless(defined $link) {
1184                $self->poderror({ -line => $line, -file => $file,
1185                    -severity => 'ERROR',
1186                    -msg => 'malformed link ' . $_->raw_text() ." : $@"});
1187                next;
1188            }
1189            $link->line($line); # remember line
1190            if($self->{-warnings}) {
1191                foreach my $w ($link->warning()) {
1192                    $self->poderror({ -line => $line, -file => $file,
1193                        -severity => 'WARNING',
1194                        -msg => $w });
1195                }
1196            }
1197            # check the link text
1198            $text .= $self->_check_ptree($self->parse_text($link->text(),
1199                $line), $line, $file, "$nestlist$cmd");
1200            # remember link
1201            $self->hyperlink([$line,$link]);
1202        }
1203        elsif($cmd =~ /[BCFIS]/) {
1204            # add the guts
1205            $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1206        }
1207        elsif($cmd eq 'Z') {
1208            if(length($contents->raw_text())) {
1209                $self->poderror({ -line => $line, -file => $file,
1210                    -severity => 'ERROR',
1211                    -msg => 'Nonempty Z<>'});
1212            }
1213        }
1214        elsif($cmd eq 'X') {
1215            my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
1216            if($idx =~ /^\s*$/s) {
1217                $self->poderror({ -line => $line, -file => $file,
1218                    -severity => 'ERROR',
1219                    -msg => 'Empty X<>'});
1220            }
1221            else {
1222                # remember this node
1223                $self->idx($idx);
1224            }
1225        }
1226        else {
1227            # not reached
1228            croak 'internal error';
1229        }
1230    }
1231    $text;
1232}
1233
1234# process a block of verbatim text
1235sub verbatim {
1236    ## Nothing particular to check
1237    my ($self, $paragraph, $line_num, $pod_para) = @_;
1238
1239    $self->_preproc_par($paragraph);
1240    $self->_commands_in_paragraphs($paragraph, $pod_para);
1241
1242    if($self->{_current_head1} eq 'NAME') {
1243        my ($file, $line) = $pod_para->file_line;
1244        $self->poderror({ -line => $line, -file => $file,
1245            -severity => 'WARNING',
1246            -msg => 'Verbatim paragraph in NAME section' });
1247    }
1248}
1249
1250# process a block of regular text
1251sub textblock {
1252    my ($self, $paragraph, $line_num, $pod_para) = @_;
1253    my ($file, $line) = $pod_para->file_line;
1254
1255    $self->_preproc_par($paragraph);
1256    $self->_commands_in_paragraphs($paragraph, $pod_para);
1257
1258    # skip this paragraph if in a =begin block
1259    unless($self->{_have_begin}) {
1260        my $block = $self->interpolate_and_check($paragraph, $line,$file);
1261        if($self->{_current_head1} eq 'NAME') {
1262            if($block =~ /^\s*(\S+?)\s*[,-]/) {
1263                # this is the canonical name
1264                $self->{-name} = $1 unless(defined $self->{-name});
1265            }
1266        }
1267    }
1268}
1269
1270sub _preproc_par
1271{
1272    my $self = shift;
1273    $_[0] =~ s/[\s\n]+$//;
1274    if($_[0]) {
1275        $self->{_commands_in_head}++;
1276        $self->{_list_item_contents}++ if(defined $self->{_list_item_contents});
1277        if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) {
1278            $self->{_list_stack}->[0]->{_has_par} = 1;
1279        }
1280    }
1281}
1282
1283# look for =foo commands at the start of a line within a paragraph, as for
1284# instance the following which prints as "* one =item two".
1285#
1286#     =item one
1287#     =item two
1288#
1289# Examples of =foo written in docs are expected to be indented in a verbatim
1290# or marked up C<=foo> so won't be caught.  A double-angle C<< =foo >> could
1291# have the =foo at the start of a line, but that should be unlikely and is
1292# easily enough dealt with by not putting a newline after the C<<.
1293#
1294sub _commands_in_paragraphs {
1295  my ($self, $str, $pod_para) = @_;
1296  while ($str =~ /[^\n]\n=([a-z][a-z0-9]+)/sg) {
1297    my $cmd = $1;
1298    my $pos = pos($str);
1299    if ($VALID_COMMANDS{$cmd}) {
1300      my ($file, $line) = $pod_para->file_line;
1301      my $part = substr($str, 0, $pos);
1302      $line += ($part =~ tr/\n//);  # count of newlines
1303
1304      $self->poderror
1305        ({ -line => $line, -file => $file,
1306           -severity => 'ERROR',
1307           -msg => "Apparent command =$cmd not preceded by blank line"});
1308    }
1309  }
1310}
1311
13121;
1313
1314__END__
1315
1316=head1 AUTHOR
1317
1318Please report bugs using L<http://rt.cpan.org>.
1319
1320Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
1321Marek Rouchal E<lt>marekr@cpan.orgE<gt>
1322
1323Based on code for B<Pod::Text::pod2text()> written by
1324Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
1325
1326B<Pod::Checker> is part of the Pod-Checker distribution, and is based on
1327L<Pod::Parser>.
1328
1329=cut
1330
1331