1############################################################################# 2# Pod/Checker.pm -- check pod documents for syntax errors 3# 4# Copyright (C) 1994-2000 by Bradford Appleton. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::Checker; 11use strict; 12 13use vars qw($VERSION @ISA @EXPORT %VALID_COMMANDS %VALID_SEQUENCES); 14$VERSION = '1.60'; ## Current version of this package 15require 5.005; ## requires this Perl version or later 16 17use Pod::ParseUtils; ## for hyperlinks and lists 18 19=head1 NAME 20 21Pod::Checker, podchecker() - check pod documents for syntax errors 22 23=head1 SYNOPSIS 24 25 use Pod::Checker; 26 27 $num_errors = podchecker($filepath, $outputpath, %options); 28 29 my $checker = new Pod::Checker %options; 30 $checker->parse_from_file($filepath, \*STDERR); 31 32=head1 OPTIONS/ARGUMENTS 33 34C<$filepath> is the input POD to read and C<$outputpath> is 35where to write POD syntax error messages. Either argument may be a scalar 36indicating a file-path, or else a reference to an open filehandle. 37If unspecified, the input-file it defaults to C<\*STDIN>, and 38the output-file defaults to C<\*STDERR>. 39 40=head2 podchecker() 41 42This function can take a hash of options: 43 44=over 4 45 46=item B<-warnings> =E<gt> I<val> 47 48Turn warnings on/off. I<val> is usually 1 for on, but higher values 49trigger additional warnings. See L<"Warnings">. 50 51=back 52 53=head1 DESCRIPTION 54 55B<podchecker> will perform syntax checking of Perl5 POD format documentation. 56 57Curious/ambitious users are welcome to propose additional features they wish 58to see in B<Pod::Checker> and B<podchecker> and verify that the checks are 59consistent with L<perlpod>. 60 61The following checks are currently performed: 62 63=over 4 64 65=item * 66 67Unknown '=xxxx' commands, unknown 'XE<lt>...E<gt>' interior-sequences, 68and unterminated interior sequences. 69 70=item * 71 72Check for proper balancing of C<=begin> and C<=end>. The contents of such 73a block are generally ignored, i.e. no syntax checks are performed. 74 75=item * 76 77Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>. 78 79=item * 80 81Check for same nested interior-sequences (e.g. 82C<LE<lt>...LE<lt>...E<gt>...E<gt>>). 83 84=item * 85 86Check for malformed or non-existing entities C<EE<lt>...E<gt>>. 87 88=item * 89 90Check for correct syntax of hyperlinks C<LE<lt>...E<gt>>. See L<perlpod> 91for details. 92 93=item * 94 95Check for unresolved document-internal links. This check may also reveal 96misspelled links that seem to be internal links but should be links 97to something else. 98 99=back 100 101=head1 DIAGNOSTICS 102 103=head2 Errors 104 105=over 4 106 107=item * empty =headn 108 109A heading (C<=head1> or C<=head2>) without any text? That ain't no 110heading! 111 112=item * =over on line I<N> without closing =back 113 114The C<=over> command does not have a corresponding C<=back> before the 115next heading (C<=head1> or C<=head2>) or the end of the file. 116 117=item * =item without previous =over 118 119=item * =back without previous =over 120 121An C<=item> or C<=back> command has been found outside a 122C<=over>/C<=back> block. 123 124=item * No argument for =begin 125 126A C<=begin> command was found that is not followed by the formatter 127specification. 128 129=item * =end without =begin 130 131A standalone C<=end> command was found. 132 133=item * Nested =begin's 134 135There were at least two consecutive C<=begin> commands without 136the corresponding C<=end>. Only one C<=begin> may be active at 137a time. 138 139=item * =for without formatter specification 140 141There is no specification of the formatter after the C<=for> command. 142 143=item * Apparent command =foo not preceded by blank line 144 145A command which has ended up in the middle of a paragraph or other command, 146such as 147 148 =item one 149 =item two <-- bad 150 151=item * unresolved internal link I<NAME> 152 153The given link to I<NAME> does not have a matching node in the current 154POD. This also happened when a single word node name is not enclosed in 155C<"">. 156 157=item * Unknown command "I<CMD>" 158 159An invalid POD command has been found. Valid are C<=head1>, C<=head2>, 160C<=head3>, C<=head4>, C<=over>, C<=item>, C<=back>, C<=begin>, C<=end>, 161C<=for>, C<=pod>, C<=cut> 162 163=item * Unknown interior-sequence "I<SEQ>" 164 165An invalid markup command has been encountered. Valid are: 166C<BE<lt>E<gt>>, C<CE<lt>E<gt>>, C<EE<lt>E<gt>>, C<FE<lt>E<gt>>, 167C<IE<lt>E<gt>>, C<LE<lt>E<gt>>, C<SE<lt>E<gt>>, C<XE<lt>E<gt>>, 168C<ZE<lt>E<gt>> 169 170=item * nested commands I<CMD>E<lt>...I<CMD>E<lt>...E<gt>...E<gt> 171 172Two nested identical markup commands have been found. Generally this 173does not make sense. 174 175=item * garbled entity I<STRING> 176 177The I<STRING> found cannot be interpreted as a character entity. 178 179=item * Entity number out of range 180 181An entity specified by number (dec, hex, oct) is out of range (1-255). 182 183=item * malformed link LE<lt>E<gt> 184 185The link found cannot be parsed because it does not conform to the 186syntax described in L<perlpod>. 187 188=item * nonempty ZE<lt>E<gt> 189 190The C<ZE<lt>E<gt>> sequence is supposed to be empty. 191 192=item * empty XE<lt>E<gt> 193 194The index entry specified contains nothing but whitespace. 195 196=item * Spurious text after =pod / =cut 197 198The commands C<=pod> and C<=cut> do not take any arguments. 199 200=item * Spurious =cut command 201 202A C<=cut> command was found without a preceding POD paragraph. 203 204=item * Spurious =pod command 205 206A C<=pod> command was found after a preceding POD paragraph. 207 208=item * Spurious character(s) after =back 209 210The C<=back> command does not take any arguments. 211 212=back 213 214=head2 Warnings 215 216These may not necessarily cause trouble, but indicate mediocre style. 217 218=over 4 219 220=item * multiple occurrence of link target I<name> 221 222The POD file has some C<=item> and/or C<=head> commands that have 223the same text. Potential hyperlinks to such a text cannot be unique then. 224This warning is printed only with warning level greater than one. 225 226=item * line containing nothing but whitespace in paragraph 227 228There is some whitespace on a seemingly empty line. POD is very sensitive 229to such things, so this is flagged. B<vi> users switch on the B<list> 230option to avoid this problem. 231 232=begin _disabled_ 233 234=item * file does not start with =head 235 236The file starts with a different POD directive than head. 237This is most probably something you do not want. 238 239=end _disabled_ 240 241=item * previous =item has no contents 242 243There is a list C<=item> right above the flagged line that has no 244text contents. You probably want to delete empty items. 245 246=item * preceding non-item paragraph(s) 247 248A list introduced by C<=over> starts with a text or verbatim paragraph, 249but continues with C<=item>s. Move the non-item paragraph out of the 250C<=over>/C<=back> block. 251 252=item * =item type mismatch (I<one> vs. I<two>) 253 254A list started with e.g. a bullet-like C<=item> and continued with a 255numbered one. This is obviously inconsistent. For most translators the 256type of the I<first> C<=item> determines the type of the list. 257 258=item * I<N> unescaped C<E<lt>E<gt>> in paragraph 259 260Angle brackets not written as C<E<lt>ltE<gt>> and C<E<lt>gtE<gt>> 261can potentially cause errors as they could be misinterpreted as 262markup commands. This is only printed when the -warnings level is 263greater than 1. 264 265=item * Unknown entity 266 267A character entity was found that does not belong to the standard 268ISO set or the POD specials C<verbar> and C<sol>. 269 270=item * No items in =over 271 272The list opened with C<=over> does not contain any items. 273 274=item * No argument for =item 275 276C<=item> without any parameters is deprecated. It should either be followed 277by C<*> to indicate an unordered list, by a number (optionally followed 278by a dot) to indicate an ordered (numbered) list or simple text for a 279definition list. 280 281=item * empty section in previous paragraph 282 283The previous section (introduced by a C<=head> command) does not contain 284any text. This usually indicates that something is missing. Note: A 285C<=head1> followed immediately by C<=head2> does not trigger this warning. 286 287=item * Verbatim paragraph in NAME section 288 289The NAME section (C<=head1 NAME>) should consist of a single paragraph 290with the script/module name, followed by a dash `-' and a very short 291description of what the thing is good for. 292 293=item * =headI<n> without preceding higher level 294 295For example if there is a C<=head2> in the POD file prior to a 296C<=head1>. 297 298=back 299 300=head2 Hyperlinks 301 302There are some warnings with respect to malformed hyperlinks: 303 304=over 4 305 306=item * ignoring leading/trailing whitespace in link 307 308There is whitespace at the beginning or the end of the contents of 309LE<lt>...E<gt>. 310 311=item * (section) in '$page' deprecated 312 313There is a section detected in the page name of LE<lt>...E<gt>, e.g. 314C<LE<lt>passwd(2)E<gt>>. POD hyperlinks may point to POD documents only. 315Please write C<CE<lt>passwd(2)E<gt>> instead. Some formatters are able 316to expand this to appropriate code. For links to (builtin) functions, 317please say C<LE<lt>perlfunc/mkdirE<gt>>, without (). 318 319=item * alternative text/node '%s' contains non-escaped | or / 320 321The characters C<|> and C</> are special in the LE<lt>...E<gt> context. 322Although the hyperlink parser does its best to determine which "/" is 323text and which is a delimiter in case of doubt, one ought to escape 324these literal characters like this: 325 326 / E<sol> 327 | E<verbar> 328 329=back 330 331=head1 RETURN VALUE 332 333B<podchecker> returns the number of POD syntax errors found or -1 if 334there were no POD commands at all found in the file. 335 336=head1 EXAMPLES 337 338See L</SYNOPSIS> 339 340=head1 INTERFACE 341 342While checking, this module collects document properties, e.g. the nodes 343for hyperlinks (C<=headX>, C<=item>) and index entries (C<XE<lt>E<gt>>). 344POD translators can use this feature to syntax-check and get the nodes in 345a first pass before actually starting to convert. This is expensive in terms 346of execution time, but allows for very robust conversions. 347 348Since PodParser-1.24 the B<Pod::Checker> module uses only the B<poderror> 349method to print errors and warnings. The summary output (e.g. 350"Pod syntax OK") has been dropped from the module and has been included in 351B<podchecker> (the script). This allows users of B<Pod::Checker> to 352control completely the output behavior. Users of B<podchecker> (the script) 353get the well-known behavior. 354 355=cut 356 357############################################################################# 358 359#use diagnostics; 360use Carp qw(croak); 361use Exporter; 362use Pod::Parser; 363 364@ISA = qw(Pod::Parser); 365@EXPORT = qw(&podchecker); 366 367my %VALID_COMMANDS = ( 368 'pod' => 1, 369 'cut' => 1, 370 'head1' => 1, 371 'head2' => 1, 372 'head3' => 1, 373 'head4' => 1, 374 'over' => 1, 375 'back' => 1, 376 'item' => 1, 377 'for' => 1, 378 'begin' => 1, 379 'end' => 1, 380 'encoding' => 1, 381); 382 383my %VALID_SEQUENCES = ( 384 'I' => 1, 385 'B' => 1, 386 'S' => 1, 387 'C' => 1, 388 'L' => 1, 389 'F' => 1, 390 'X' => 1, 391 'Z' => 1, 392 'E' => 1, 393); 394 395# stolen from HTML::Entities 396my %ENTITIES = ( 397 # Some normal chars that have special meaning in SGML context 398 amp => '&', # ampersand 399'gt' => '>', # greater than 400'lt' => '<', # less than 401 quot => '"', # double quote 402 403 # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML 404 AElig => '�', # capital AE diphthong (ligature) 405 Aacute => '�', # capital A, acute accent 406 Acirc => '�', # capital A, circumflex accent 407 Agrave => '�', # capital A, grave accent 408 Aring => '�', # capital A, ring 409 Atilde => '�', # capital A, tilde 410 Auml => '�', # capital A, dieresis or umlaut mark 411 Ccedil => '�', # capital C, cedilla 412 ETH => '�', # capital Eth, Icelandic 413 Eacute => '�', # capital E, acute accent 414 Ecirc => '�', # capital E, circumflex accent 415 Egrave => '�', # capital E, grave accent 416 Euml => '�', # capital E, dieresis or umlaut mark 417 Iacute => '�', # capital I, acute accent 418 Icirc => '�', # capital I, circumflex accent 419 Igrave => '�', # capital I, grave accent 420 Iuml => '�', # capital I, dieresis or umlaut mark 421 Ntilde => '�', # capital N, tilde 422 Oacute => '�', # capital O, acute accent 423 Ocirc => '�', # capital O, circumflex accent 424 Ograve => '�', # capital O, grave accent 425 Oslash => '�', # capital O, slash 426 Otilde => '�', # capital O, tilde 427 Ouml => '�', # capital O, dieresis or umlaut mark 428 THORN => '�', # capital THORN, Icelandic 429 Uacute => '�', # capital U, acute accent 430 Ucirc => '�', # capital U, circumflex accent 431 Ugrave => '�', # capital U, grave accent 432 Uuml => '�', # capital U, dieresis or umlaut mark 433 Yacute => '�', # capital Y, acute accent 434 aacute => '�', # small a, acute accent 435 acirc => '�', # small a, circumflex accent 436 aelig => '�', # small ae diphthong (ligature) 437 agrave => '�', # small a, grave accent 438 aring => '�', # small a, ring 439 atilde => '�', # small a, tilde 440 auml => '�', # small a, dieresis or umlaut mark 441 ccedil => '�', # small c, cedilla 442 eacute => '�', # small e, acute accent 443 ecirc => '�', # small e, circumflex accent 444 egrave => '�', # small e, grave accent 445 eth => '�', # small eth, Icelandic 446 euml => '�', # small e, dieresis or umlaut mark 447 iacute => '�', # small i, acute accent 448 icirc => '�', # small i, circumflex accent 449 igrave => '�', # small i, grave accent 450 iuml => '�', # small i, dieresis or umlaut mark 451 ntilde => '�', # small n, tilde 452 oacute => '�', # small o, acute accent 453 ocirc => '�', # small o, circumflex accent 454 ograve => '�', # small o, grave accent 455 oslash => '�', # small o, slash 456 otilde => '�', # small o, tilde 457 ouml => '�', # small o, dieresis or umlaut mark 458 szlig => '�', # small sharp s, German (sz ligature) 459 thorn => '�', # small thorn, Icelandic 460 uacute => '�', # small u, acute accent 461 ucirc => '�', # small u, circumflex accent 462 ugrave => '�', # small u, grave accent 463 uuml => '�', # small u, dieresis or umlaut mark 464 yacute => '�', # small y, acute accent 465 yuml => '�', # small y, dieresis or umlaut mark 466 467 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) 468 copy => '�', # copyright sign 469 reg => '�', # registered sign 470 nbsp => "\240", # non breaking space 471 472 # Additional ISO-8859/1 entities listed in rfc1866 (section 14) 473 iexcl => '�', 474 cent => '�', 475 pound => '�', 476 curren => '�', 477 yen => '�', 478 brvbar => '�', 479 sect => '�', 480 uml => '�', 481 ordf => '�', 482 laquo => '�', 483'not' => '�', # not is a keyword in perl 484 shy => '�', 485 macr => '�', 486 deg => '�', 487 plusmn => '�', 488 sup1 => '�', 489 sup2 => '�', 490 sup3 => '�', 491 acute => '�', 492 micro => '�', 493 para => '�', 494 middot => '�', 495 cedil => '�', 496 ordm => '�', 497 raquo => '�', 498 frac14 => '�', 499 frac12 => '�', 500 frac34 => '�', 501 iquest => '�', 502'times' => '�', # times is a keyword in perl 503 divide => '�', 504 505# some POD special entities 506 verbar => '|', 507 sol => '/' 508); 509 510##--------------------------------------------------------------------------- 511 512##--------------------------------- 513## Function definitions begin here 514##--------------------------------- 515 516sub podchecker { 517 my ($infile, $outfile, %options) = @_; 518 local $_; 519 520 ## Set defaults 521 $infile ||= \*STDIN; 522 $outfile ||= \*STDERR; 523 524 ## Now create a pod checker 525 my $checker = new Pod::Checker(%options); 526 527 ## Now check the pod document for errors 528 $checker->parse_from_file($infile, $outfile); 529 530 ## Return the number of errors found 531 return $checker->num_errors(); 532} 533 534##--------------------------------------------------------------------------- 535 536##------------------------------- 537## Method definitions begin here 538##------------------------------- 539 540################################## 541 542=over 4 543 544=item C<Pod::Checker-E<gt>new( %options )> 545 546Return a reference to a new Pod::Checker object that inherits from 547Pod::Parser and is used for calling the required methods later. The 548following options are recognized: 549 550C<-warnings =E<gt> num> 551 Print warnings if C<num> is true. The higher the value of C<num>, 552the more warnings are printed. Currently there are only levels 1 and 2. 553 554C<-quiet =E<gt> num> 555 If C<num> is true, do not print any errors/warnings. This is useful 556when Pod::Checker is used to munge POD code into plain text from within 557POD formatters. 558 559=cut 560 561## sub new { 562## my $this = shift; 563## my $class = ref($this) || $this; 564## my %params = @_; 565## my $self = {%params}; 566## bless $self, $class; 567## $self->initialize(); 568## return $self; 569## } 570 571sub initialize { 572 my $self = shift; 573 ## Initialize number of errors, and setup an error function to 574 ## increment this number and then print to the designated output. 575 $self->{_NUM_ERRORS} = 0; 576 $self->{_NUM_WARNINGS} = 0; 577 $self->{-quiet} ||= 0; 578 # set the error handling subroutine 579 $self->errorsub($self->{-quiet} ? sub { 1; } : 'poderror'); 580 $self->{_commands} = 0; # total number of POD commands encountered 581 $self->{_list_stack} = []; # stack for nested lists 582 $self->{_have_begin} = ''; # stores =begin 583 $self->{_links} = []; # stack for internal hyperlinks 584 $self->{_nodes} = []; # stack for =head/=item nodes 585 $self->{_index} = []; # text in X<> 586 # print warnings? 587 $self->{-warnings} = 1 unless(defined $self->{-warnings}); 588 $self->{_current_head1} = ''; # the current =head1 block 589 $self->parseopts(-process_cut_cmd => 1, -warnings => $self->{-warnings}); 590} 591 592################################## 593 594=item C<$checker-E<gt>poderror( @args )> 595 596=item C<$checker-E<gt>poderror( {%opts}, @args )> 597 598Internal method for printing errors and warnings. If no options are 599given, simply prints "@_". The following options are recognized and used 600to form the output: 601 602 -msg 603 604A message to print prior to C<@args>. 605 606 -line 607 608The line number the error occurred in. 609 610 -file 611 612The file (name) the error occurred in. 613 614 -severity 615 616The error level, should be 'WARNING' or 'ERROR'. 617 618=cut 619 620# Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args ) 621sub poderror { 622 my $self = shift; 623 my %opts = (ref $_[0]) ? %{shift()} : (); 624 625 ## Retrieve options 626 chomp( my $msg = ($opts{-msg} || '')."@_" ); 627 my $line = (exists $opts{-line}) ? " at line $opts{-line}" : ''; 628 my $file = (exists $opts{-file}) ? " in file $opts{-file}" : ''; 629 unless (exists $opts{-severity}) { 630 ## See if can find severity in message prefix 631 $opts{-severity} = $1 if ( $msg =~ s/^\**\s*([A-Z]{3,}):\s+// ); 632 } 633 my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : ''; 634 635 ## Increment error count and print message " 636 ++($self->{_NUM_ERRORS}) 637 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR')); 638 ++($self->{_NUM_WARNINGS}) 639 if(!%opts || ($opts{-severity} && $opts{-severity} eq 'WARNING')); 640 unless($self->{-quiet}) { 641 my $out_fh = $self->output_handle() || \*STDERR; 642 print $out_fh ($severity, $msg, $line, $file, "\n") 643 if($self->{-warnings} || !%opts || $opts{-severity} ne 'WARNING'); 644 } 645} 646 647################################## 648 649=item C<$checker-E<gt>num_errors()> 650 651Set (if argument specified) and retrieve the number of errors found. 652 653=cut 654 655sub num_errors { 656 return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS}; 657} 658 659################################## 660 661=item C<$checker-E<gt>num_warnings()> 662 663Set (if argument specified) and retrieve the number of warnings found. 664 665=cut 666 667sub num_warnings { 668 return (@_ > 1) ? ($_[0]->{_NUM_WARNINGS} = $_[1]) : $_[0]->{_NUM_WARNINGS}; 669} 670 671################################## 672 673=item C<$checker-E<gt>name()> 674 675Set (if argument specified) and retrieve the canonical name of POD as 676found in the C<=head1 NAME> section. 677 678=cut 679 680sub name { 681 return (@_ > 1 && $_[1]) ? 682 ($_[0]->{-name} = $_[1]) : $_[0]->{-name}; 683} 684 685################################## 686 687=item C<$checker-E<gt>node()> 688 689Add (if argument specified) and retrieve the nodes (as defined by C<=headX> 690and C<=item>) of the current POD. The nodes are returned in the order of 691their occurrence. They consist of plain text, each piece of whitespace is 692collapsed to a single blank. 693 694=cut 695 696sub node { 697 my ($self,$text) = @_; 698 if(defined $text) { 699 $text =~ s/\s+$//s; # strip trailing whitespace 700 $text =~ s/\s+/ /gs; # collapse whitespace 701 # add node, order important! 702 push(@{$self->{_nodes}}, $text); 703 # keep also a uniqueness counter 704 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); 705 return $text; 706 } 707 @{$self->{_nodes}}; 708} 709 710################################## 711 712=item C<$checker-E<gt>idx()> 713 714Add (if argument specified) and retrieve the index entries (as defined by 715C<XE<lt>E<gt>>) of the current POD. They consist of plain text, each piece 716of whitespace is collapsed to a single blank. 717 718=cut 719 720# set/return index entries of current POD 721sub idx { 722 my ($self,$text) = @_; 723 if(defined $text) { 724 $text =~ s/\s+$//s; # strip trailing whitespace 725 $text =~ s/\s+/ /gs; # collapse whitespace 726 # add node, order important! 727 push(@{$self->{_index}}, $text); 728 # keep also a uniqueness counter 729 $self->{_unique_nodes}->{$text}++ if($text !~ /^\s*$/s); 730 return $text; 731 } 732 @{$self->{_index}}; 733} 734 735################################## 736 737=item C<$checker-E<gt>hyperlink()> 738 739Add (if argument specified) and retrieve the hyperlinks (as defined by 740C<LE<lt>E<gt>>) of the current POD. They consist of a 2-item array: line 741number and C<Pod::Hyperlink> object. 742 743=back 744 745=cut 746 747# set/return hyperlinks of the current POD 748sub hyperlink { 749 my $self = shift; 750 if($_[0]) { 751 push(@{$self->{_links}}, $_[0]); 752 return $_[0]; 753 } 754 @{$self->{_links}}; 755} 756 757## overrides for Pod::Parser 758 759sub end_pod { 760 ## Do some final checks and 761 ## print the number of errors found 762 my $self = shift; 763 my $infile = $self->input_file(); 764 765 if(@{$self->{_list_stack}}) { 766 my $list; 767 while(($list = $self->_close_list('EOF',$infile)) && 768 $list->indent() ne 'auto') { 769 $self->poderror({ -line => 'EOF', -file => $infile, 770 -severity => 'ERROR', -msg => '=over on line ' . 771 $list->start() . ' without closing =back' }); 772 } 773 } 774 775 # check validity of document internal hyperlinks 776 # first build the node names from the paragraph text 777 my %nodes; 778 foreach($self->node()) { 779 $nodes{$_} = 1; 780 if(/^(\S+)\s+\S/) { 781 # we have more than one word. Use the first as a node, too. 782 # This is used heavily in perlfunc.pod 783 $nodes{$1} ||= 2; # derived node 784 } 785 } 786 foreach($self->idx()) { 787 $nodes{$_} = 3; # index node 788 } 789 foreach($self->hyperlink()) { 790 my ($line,$link) = @$_; 791 # _TODO_ what if there is a link to the page itself by the name, 792 # e.g. in Tk::Pod : L<Tk::Pod/"DESCRIPTION"> 793 if($link->node() && !$link->page() && $link->type() ne 'hyperlink') { 794 my $node = $self->_check_ptree($self->parse_text($link->node(), 795 $line), $line, $infile, 'L'); 796 if($node && !$nodes{$node}) { 797 $self->poderror({ -line => $line || '', -file => $infile, 798 -severity => 'ERROR', 799 -msg => "unresolved internal link '$node'"}); 800 } 801 } 802 } 803 804 # check the internal nodes for uniqueness. This pertains to 805 # =headX, =item and X<...> 806 if($self->{-warnings} && $self->{-warnings}>1) { 807 foreach(grep($self->{_unique_nodes}->{$_} > 1, 808 keys %{$self->{_unique_nodes}})) { 809 $self->poderror({ -line => '-', -file => $infile, 810 -severity => 'WARNING', 811 -msg => "multiple occurrence of link target '$_'"}); 812 } 813 } 814 815 # no POD found here 816 $self->num_errors(-1) if($self->{_commands} == 0); 817} 818 819# check a POD command directive 820sub command { 821 my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_; 822 my ($file, $line) = $pod_para->file_line; 823 ## Check the command syntax 824 my $arg; # this will hold the command argument 825 if (! $VALID_COMMANDS{$cmd}) { 826 $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR', 827 -msg => "Unknown command '$cmd'" }); 828 } 829 else { # found a valid command 830 $self->{_commands}++; # delete this line if below is enabled again 831 832 $self->_commands_in_paragraphs($paragraph, $pod_para); 833 834 ##### following check disabled due to strong request 835 #if(!$self->{_commands}++ && $cmd !~ /^head/) { 836 # $self->poderror({ -line => $line, -file => $file, 837 # -severity => 'WARNING', 838 # -msg => "file does not start with =head" }); 839 #} 840 841 # check syntax of particular command 842 if($cmd eq 'over') { 843 # check for argument 844 $arg = $self->interpolate_and_check($paragraph, $line,$file); 845 my $indent = 4; # default 846 if($arg && $arg =~ /^\s*(\d+)\s*$/) { 847 $indent = $1; 848 } 849 # start a new list 850 $self->_open_list($indent,$line,$file); 851 } 852 elsif($cmd eq 'item') { 853 # are we in a list? 854 unless(@{$self->{_list_stack}}) { 855 $self->poderror({ -line => $line, -file => $file, 856 -severity => 'ERROR', 857 -msg => '=item without previous =over' }); 858 # auto-open in case we encounter many more 859 $self->_open_list('auto',$line,$file); 860 } 861 my $list = $self->{_list_stack}->[0]; 862 # check whether the previous item had some contents 863 if(defined $self->{_list_item_contents} && 864 $self->{_list_item_contents} == 0) { 865 $self->poderror({ -line => $line, -file => $file, 866 -severity => 'WARNING', 867 -msg => 'previous =item has no contents' }); 868 } 869 if($list->{_has_par}) { 870 $self->poderror({ -line => $line, -file => $file, 871 -severity => 'WARNING', 872 -msg => 'preceding non-item paragraph(s)' }); 873 delete $list->{_has_par}; 874 } 875 # check for argument 876 $arg = $self->interpolate_and_check($paragraph, $line, $file); 877 if($arg && $arg =~ /(\S+)/) { 878 $arg =~ s/[\s\n]+$//; 879 my $type; 880 if($arg =~ /^[*]\s*(\S*.*)/) { 881 $type = 'bullet'; 882 $self->{_list_item_contents} = $1 ? 1 : 0; 883 $arg = $1; 884 } 885 elsif($arg =~ /^\d+\.?\s+(\S*)/) { 886 $type = 'number'; 887 $self->{_list_item_contents} = $1 ? 1 : 0; 888 $arg = $1; 889 } 890 else { 891 $type = 'definition'; 892 $self->{_list_item_contents} = 1; 893 } 894 my $first = $list->type(); 895 if($first && $first ne $type) { 896 $self->poderror({ -line => $line, -file => $file, 897 -severity => 'WARNING', 898 -msg => "=item type mismatch ('$first' vs. '$type')"}); 899 } 900 else { # first item 901 $list->type($type); 902 } 903 } 904 else { 905 $self->poderror({ -line => $line, -file => $file, 906 -severity => 'WARNING', 907 -msg => 'No argument for =item' }); 908 $arg = ' '; # empty 909 $self->{_list_item_contents} = 0; 910 } 911 # add this item 912 $list->item($arg); 913 # remember this node 914 $self->node($arg); 915 } 916 elsif($cmd eq 'back') { 917 # check if we have an open list 918 unless(@{$self->{_list_stack}}) { 919 $self->poderror({ -line => $line, -file => $file, 920 -severity => 'ERROR', 921 -msg => '=back without previous =over' }); 922 } 923 else { 924 # check for spurious characters 925 $arg = $self->interpolate_and_check($paragraph, $line,$file); 926 if($arg && $arg =~ /\S/) { 927 $self->poderror({ -line => $line, -file => $file, 928 -severity => 'ERROR', 929 -msg => 'Spurious character(s) after =back' }); 930 } 931 # close list 932 my $list = $self->_close_list($line,$file); 933 # check for empty lists 934 if(!$list->item() && $self->{-warnings}) { 935 $self->poderror({ -line => $line, -file => $file, 936 -severity => 'WARNING', 937 -msg => 'No items in =over (at line ' . 938 $list->start() . ') / =back list'}); 939 } 940 } 941 } 942 elsif($cmd =~ /^head(\d+)/) { 943 my $hnum = $1; 944 $self->{"_have_head_$hnum"}++; # count head types 945 if($hnum > 1 && !$self->{'_have_head_'.($hnum -1)}) { 946 $self->poderror({ -line => $line, -file => $file, 947 -severity => 'WARNING', 948 -msg => "=head$hnum without preceding higher level"}); 949 } 950 # check whether the previous =head section had some contents 951 if(defined $self->{_commands_in_head} && 952 $self->{_commands_in_head} == 0 && 953 defined $self->{_last_head} && 954 $self->{_last_head} >= $hnum) { 955 $self->poderror({ -line => $line, -file => $file, 956 -severity => 'WARNING', 957 -msg => 'empty section in previous paragraph'}); 958 } 959 $self->{_commands_in_head} = -1; 960 $self->{_last_head} = $hnum; 961 # check if there is an open list 962 if(@{$self->{_list_stack}}) { 963 my $list; 964 while(($list = $self->_close_list($line,$file)) && 965 $list->indent() ne 'auto') { 966 $self->poderror({ -line => $line, -file => $file, 967 -severity => 'ERROR', 968 -msg => '=over on line '. $list->start() . 969 " without closing =back (at $cmd)" }); 970 } 971 } 972 # remember this node 973 $arg = $self->interpolate_and_check($paragraph, $line,$file); 974 $arg =~ s/[\s\n]+$//s; 975 $self->node($arg); 976 unless(length($arg)) { 977 $self->poderror({ -line => $line, -file => $file, 978 -severity => 'ERROR', 979 -msg => "empty =$cmd"}); 980 } 981 if($cmd eq 'head1') { 982 $self->{_current_head1} = $arg; 983 } else { 984 $self->{_current_head1} = ''; 985 } 986 } 987 elsif($cmd eq 'begin') { 988 if($self->{_have_begin}) { 989 # already have a begin 990 $self->poderror({ -line => $line, -file => $file, 991 -severity => 'ERROR', 992 -msg => q{Nested =begin's (first at line } . 993 $self->{_have_begin} . ')'}); 994 } 995 else { 996 # check for argument 997 $arg = $self->interpolate_and_check($paragraph, $line,$file); 998 unless($arg && $arg =~ /(\S+)/) { 999 $self->poderror({ -line => $line, -file => $file, 1000 -severity => 'ERROR', 1001 -msg => 'No argument for =begin'}); 1002 } 1003 # remember the =begin 1004 $self->{_have_begin} = "$line:$1"; 1005 } 1006 } 1007 elsif($cmd eq 'end') { 1008 if($self->{_have_begin}) { 1009 # close the existing =begin 1010 $self->{_have_begin} = ''; 1011 # check for spurious characters 1012 $arg = $self->interpolate_and_check($paragraph, $line,$file); 1013 # the closing argument is optional 1014 #if($arg && $arg =~ /\S/) { 1015 # $self->poderror({ -line => $line, -file => $file, 1016 # -severity => 'WARNING', 1017 # -msg => "Spurious character(s) after =end" }); 1018 #} 1019 } 1020 else { 1021 # don't have a matching =begin 1022 $self->poderror({ -line => $line, -file => $file, 1023 -severity => 'ERROR', 1024 -msg => '=end without =begin' }); 1025 } 1026 } 1027 elsif($cmd eq 'for') { 1028 unless($paragraph =~ /\s*(\S+)\s*/) { 1029 $self->poderror({ -line => $line, -file => $file, 1030 -severity => 'ERROR', 1031 -msg => '=for without formatter specification' }); 1032 } 1033 $arg = ''; # do not expand paragraph below 1034 } 1035 elsif($cmd =~ /^(pod|cut)$/) { 1036 # check for argument 1037 $arg = $self->interpolate_and_check($paragraph, $line,$file); 1038 if($arg && $arg =~ /(\S+)/) { 1039 $self->poderror({ -line => $line, -file => $file, 1040 -severity => 'ERROR', 1041 -msg => "Spurious text after =$cmd"}); 1042 } 1043 if($cmd eq 'cut' && (!$self->{_PREVIOUS} || $self->{_PREVIOUS} eq 'cut')) { 1044 $self->poderror({ -line => $line, -file => $file, 1045 -severity => 'ERROR', 1046 -msg => "Spurious =cut command"}); 1047 } 1048 if($cmd eq 'pod' && $self->{_PREVIOUS} && $self->{_PREVIOUS} ne 'cut') { 1049 $self->poderror({ -line => $line, -file => $file, 1050 -severity => 'ERROR', 1051 -msg => "Spurious =pod command"}); 1052 } 1053 } 1054 $self->{_commands_in_head}++; 1055 ## Check the interior sequences in the command-text 1056 $self->interpolate_and_check($paragraph, $line,$file) 1057 unless(defined $arg); 1058 } 1059} 1060 1061sub _open_list 1062{ 1063 my ($self,$indent,$line,$file) = @_; 1064 my $list = Pod::List->new( 1065 -indent => $indent, 1066 -start => $line, 1067 -file => $file); 1068 unshift(@{$self->{_list_stack}}, $list); 1069 undef $self->{_list_item_contents}; 1070 $list; 1071} 1072 1073sub _close_list 1074{ 1075 my ($self,$line,$file) = @_; 1076 my $list = shift(@{$self->{_list_stack}}); 1077 if(defined $self->{_list_item_contents} && 1078 $self->{_list_item_contents} == 0) { 1079 $self->poderror({ -line => $line, -file => $file, 1080 -severity => 'WARNING', 1081 -msg => 'previous =item has no contents' }); 1082 } 1083 undef $self->{_list_item_contents}; 1084 $list; 1085} 1086 1087# process a block of some text 1088sub interpolate_and_check { 1089 my ($self, $paragraph, $line, $file) = @_; 1090 ## Check the interior sequences in the command-text 1091 # and return the text 1092 $self->_check_ptree( 1093 $self->parse_text($paragraph,$line), $line, $file, ''); 1094} 1095 1096sub _check_ptree { 1097 my ($self,$ptree,$line,$file,$nestlist) = @_; 1098 local($_); 1099 my $text = ''; 1100 # process each node in the parse tree 1101 foreach(@$ptree) { 1102 # regular text chunk 1103 unless(ref) { 1104 # count the unescaped angle brackets 1105 # complain only when warning level is greater than 1 1106 if($self->{-warnings} && $self->{-warnings}>1) { 1107 my $count; 1108 if($count = tr/<>/<>/) { 1109 $self->poderror({ -line => $line, -file => $file, 1110 -severity => 'WARNING', 1111 -msg => "$count unescaped <> in paragraph" }); 1112 } 1113 } 1114 $text .= $_; 1115 next; 1116 } 1117 # have an interior sequence 1118 my $cmd = $_->cmd_name(); 1119 my $contents = $_->parse_tree(); 1120 ($file,$line) = $_->file_line(); 1121 # check for valid tag 1122 if (! $VALID_SEQUENCES{$cmd}) { 1123 $self->poderror({ -line => $line, -file => $file, 1124 -severity => 'ERROR', 1125 -msg => qq(Unknown interior-sequence '$cmd')}); 1126 # expand it anyway 1127 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1128 next; 1129 } 1130 if(index($nestlist, $cmd) != -1) { 1131 $self->poderror({ -line => $line, -file => $file, 1132 -severity => 'WARNING', 1133 -msg => "nested commands $cmd<...$cmd<...>...>"}); 1134 # _TODO_ should we add the contents anyway? 1135 # expand it anyway, see below 1136 } 1137 if($cmd eq 'E') { 1138 # preserve entities 1139 if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) { 1140 $self->poderror({ -line => $line, -file => $file, 1141 -severity => 'ERROR', 1142 -msg => 'garbled entity ' . $_->raw_text()}); 1143 next; 1144 } 1145 my $ent = $$contents[0]; 1146 my $val; 1147 if($ent =~ /^0x[0-9a-f]+$/i) { 1148 # hexadec entity 1149 $val = hex($ent); 1150 } 1151 elsif($ent =~ /^0\d+$/) { 1152 # octal 1153 $val = oct($ent); 1154 } 1155 elsif($ent =~ /^\d+$/) { 1156 # numeric entity 1157 $val = $ent; 1158 } 1159 if(defined $val) { 1160 if($val>0 && $val<256) { 1161 $text .= chr($val); 1162 } 1163 else { 1164 $self->poderror({ -line => $line, -file => $file, 1165 -severity => 'ERROR', 1166 -msg => 'Entity number out of range ' . $_->raw_text()}); 1167 } 1168 } 1169 elsif($ENTITIES{$ent}) { 1170 # known ISO entity 1171 $text .= $ENTITIES{$ent}; 1172 } 1173 else { 1174 $self->poderror({ -line => $line, -file => $file, 1175 -severity => 'WARNING', 1176 -msg => 'Unknown entity ' . $_->raw_text()}); 1177 $text .= "E<$ent>"; 1178 } 1179 } 1180 elsif($cmd eq 'L') { 1181 # try to parse the hyperlink 1182 my $link = Pod::Hyperlink->new($contents->raw_text()); 1183 unless(defined $link) { 1184 $self->poderror({ -line => $line, -file => $file, 1185 -severity => 'ERROR', 1186 -msg => 'malformed link ' . $_->raw_text() ." : $@"}); 1187 next; 1188 } 1189 $link->line($line); # remember line 1190 if($self->{-warnings}) { 1191 foreach my $w ($link->warning()) { 1192 $self->poderror({ -line => $line, -file => $file, 1193 -severity => 'WARNING', 1194 -msg => $w }); 1195 } 1196 } 1197 # check the link text 1198 $text .= $self->_check_ptree($self->parse_text($link->text(), 1199 $line), $line, $file, "$nestlist$cmd"); 1200 # remember link 1201 $self->hyperlink([$line,$link]); 1202 } 1203 elsif($cmd =~ /[BCFIS]/) { 1204 # add the guts 1205 $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1206 } 1207 elsif($cmd eq 'Z') { 1208 if(length($contents->raw_text())) { 1209 $self->poderror({ -line => $line, -file => $file, 1210 -severity => 'ERROR', 1211 -msg => 'Nonempty Z<>'}); 1212 } 1213 } 1214 elsif($cmd eq 'X') { 1215 my $idx = $self->_check_ptree($contents, $line, $file, "$nestlist$cmd"); 1216 if($idx =~ /^\s*$/s) { 1217 $self->poderror({ -line => $line, -file => $file, 1218 -severity => 'ERROR', 1219 -msg => 'Empty X<>'}); 1220 } 1221 else { 1222 # remember this node 1223 $self->idx($idx); 1224 } 1225 } 1226 else { 1227 # not reached 1228 croak 'internal error'; 1229 } 1230 } 1231 $text; 1232} 1233 1234# process a block of verbatim text 1235sub verbatim { 1236 ## Nothing particular to check 1237 my ($self, $paragraph, $line_num, $pod_para) = @_; 1238 1239 $self->_preproc_par($paragraph); 1240 $self->_commands_in_paragraphs($paragraph, $pod_para); 1241 1242 if($self->{_current_head1} eq 'NAME') { 1243 my ($file, $line) = $pod_para->file_line; 1244 $self->poderror({ -line => $line, -file => $file, 1245 -severity => 'WARNING', 1246 -msg => 'Verbatim paragraph in NAME section' }); 1247 } 1248} 1249 1250# process a block of regular text 1251sub textblock { 1252 my ($self, $paragraph, $line_num, $pod_para) = @_; 1253 my ($file, $line) = $pod_para->file_line; 1254 1255 $self->_preproc_par($paragraph); 1256 $self->_commands_in_paragraphs($paragraph, $pod_para); 1257 1258 # skip this paragraph if in a =begin block 1259 unless($self->{_have_begin}) { 1260 my $block = $self->interpolate_and_check($paragraph, $line,$file); 1261 if($self->{_current_head1} eq 'NAME') { 1262 if($block =~ /^\s*(\S+?)\s*[,-]/) { 1263 # this is the canonical name 1264 $self->{-name} = $1 unless(defined $self->{-name}); 1265 } 1266 } 1267 } 1268} 1269 1270sub _preproc_par 1271{ 1272 my $self = shift; 1273 $_[0] =~ s/[\s\n]+$//; 1274 if($_[0]) { 1275 $self->{_commands_in_head}++; 1276 $self->{_list_item_contents}++ if(defined $self->{_list_item_contents}); 1277 if(@{$self->{_list_stack}} && !$self->{_list_stack}->[0]->item()) { 1278 $self->{_list_stack}->[0]->{_has_par} = 1; 1279 } 1280 } 1281} 1282 1283# look for =foo commands at the start of a line within a paragraph, as for 1284# instance the following which prints as "* one =item two". 1285# 1286# =item one 1287# =item two 1288# 1289# Examples of =foo written in docs are expected to be indented in a verbatim 1290# or marked up C<=foo> so won't be caught. A double-angle C<< =foo >> could 1291# have the =foo at the start of a line, but that should be unlikely and is 1292# easily enough dealt with by not putting a newline after the C<<. 1293# 1294sub _commands_in_paragraphs { 1295 my ($self, $str, $pod_para) = @_; 1296 while ($str =~ /[^\n]\n=([a-z][a-z0-9]+)/sg) { 1297 my $cmd = $1; 1298 my $pos = pos($str); 1299 if ($VALID_COMMANDS{$cmd}) { 1300 my ($file, $line) = $pod_para->file_line; 1301 my $part = substr($str, 0, $pos); 1302 $line += ($part =~ tr/\n//); # count of newlines 1303 1304 $self->poderror 1305 ({ -line => $line, -file => $file, 1306 -severity => 'ERROR', 1307 -msg => "Apparent command =$cmd not preceded by blank line"}); 1308 } 1309 } 1310} 1311 13121; 1313 1314__END__ 1315 1316=head1 AUTHOR 1317 1318Please report bugs using L<http://rt.cpan.org>. 1319 1320Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version), 1321Marek Rouchal E<lt>marekr@cpan.orgE<gt> 1322 1323Based on code for B<Pod::Text::pod2text()> written by 1324Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> 1325 1326B<Pod::Checker> is part of the Pod-Checker distribution, and is based on 1327L<Pod::Parser>. 1328 1329=cut 1330 1331