1############################################################################# 2# Pod/Select.pm -- function to select portions of POD docs 3# 4# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved. 5# This file is part of "PodParser". PodParser is free software; 6# you can redistribute it and/or modify it under the same terms 7# as Perl itself. 8############################################################################# 9 10package Pod::Select; 11use strict; 12 13use vars qw($VERSION @ISA @EXPORT $MAX_HEADING_LEVEL %myData @section_headings @selected_sections); 14$VERSION = '1.60'; ## Current version of this package 15require 5.005; ## requires this Perl version or later 16 17############################################################################# 18 19=head1 NAME 20 21Pod::Select, podselect() - extract selected sections of POD from input 22 23=head1 SYNOPSIS 24 25 use Pod::Select; 26 27 ## Select all the POD sections for each file in @filelist 28 ## and print the result on standard output. 29 podselect(@filelist); 30 31 ## Same as above, but write to tmp.out 32 podselect({-output => "tmp.out"}, @filelist): 33 34 ## Select from the given filelist, only those POD sections that are 35 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. 36 podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist): 37 38 ## Select the "DESCRIPTION" section of the PODs from STDIN and write 39 ## the result to STDERR. 40 podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN); 41 42or 43 44 use Pod::Select; 45 46 ## Create a parser object for selecting POD sections from the input 47 $parser = new Pod::Select(); 48 49 ## Select all the POD sections for each file in @filelist 50 ## and print the result to tmp.out. 51 $parser->parse_from_file("<&STDIN", "tmp.out"); 52 53 ## Select from the given filelist, only those POD sections that are 54 ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS. 55 $parser->select("NAME|SYNOPSIS", "OPTIONS"); 56 for (@filelist) { $parser->parse_from_file($_); } 57 58 ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from 59 ## STDIN and write the result to STDERR. 60 $parser->select("DESCRIPTION"); 61 $parser->add_selection("SEE ALSO"); 62 $parser->parse_from_filehandle(\*STDIN, \*STDERR); 63 64=head1 REQUIRES 65 66perl5.005, Pod::Parser, Exporter, Carp 67 68=head1 EXPORTS 69 70podselect() 71 72=head1 DESCRIPTION 73 74B<podselect()> is a function which will extract specified sections of 75pod documentation from an input stream. This ability is provided by the 76B<Pod::Select> module which is a subclass of B<Pod::Parser>. 77B<Pod::Select> provides a method named B<select()> to specify the set of 78POD sections to select for processing/printing. B<podselect()> merely 79creates a B<Pod::Select> object and then invokes the B<podselect()> 80followed by B<parse_from_file()>. 81 82=head1 SECTION SPECIFICATIONS 83 84B<podselect()> and B<Pod::Select::select()> may be given one or more 85"section specifications" to restrict the text processed to only the 86desired set of sections and their corresponding subsections. A section 87specification is a string containing one or more Perl-style regular 88expressions separated by forward slashes ("/"). If you need to use a 89forward slash literally within a section title you can escape it with a 90backslash ("\/"). 91 92The formal syntax of a section specification is: 93 94=over 4 95 96=item * 97 98I<head1-title-regex>/I<head2-title-regex>/... 99 100=back 101 102Any omitted or empty regular expressions will default to ".*". 103Please note that each regular expression given is implicitly 104anchored by adding "^" and "$" to the beginning and end. Also, if a 105given regular expression starts with a "!" character, then the 106expression is I<negated> (so C<!foo> would match anything I<except> 107C<foo>). 108 109Some example section specifications follow. 110 111=over 4 112 113=item * 114 115Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections: 116 117C<NAME|SYNOPSIS> 118 119=item * 120 121Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION> 122section: 123 124C<DESCRIPTION/Question|Answer> 125 126=item * 127 128Match the C<Comments> subsection of I<all> sections: 129 130C</Comments> 131 132=item * 133 134Match all subsections of C<DESCRIPTION> I<except> for C<Comments>: 135 136C<DESCRIPTION/!Comments> 137 138=item * 139 140Match the C<DESCRIPTION> section but do I<not> match any of its subsections: 141 142C<DESCRIPTION/!.+> 143 144=item * 145 146Match all top level sections but none of their subsections: 147 148C</!.+> 149 150=back 151 152=begin _NOT_IMPLEMENTED_ 153 154=head1 RANGE SPECIFICATIONS 155 156B<podselect()> and B<Pod::Select::select()> may be given one or more 157"range specifications" to restrict the text processed to only the 158desired ranges of paragraphs in the desired set of sections. A range 159specification is a string containing a single Perl-style regular 160expression (a regex), or else two Perl-style regular expressions 161(regexs) separated by a ".." (Perl's "range" operator is ".."). 162The regexs in a range specification are delimited by forward slashes 163("/"). If you need to use a forward slash literally within a regex you 164can escape it with a backslash ("\/"). 165 166The formal syntax of a range specification is: 167 168=over 4 169 170=item * 171 172/I<start-range-regex>/[../I<end-range-regex>/] 173 174=back 175 176Where each the item inside square brackets (the ".." followed by the 177end-range-regex) is optional. Each "range-regex" is of the form: 178 179 =cmd-expr text-expr 180 181Where I<cmd-expr> is intended to match the name of one or more POD 182commands, and I<text-expr> is intended to match the paragraph text for 183the command. If a range-regex is supposed to match a POD command, then 184the first character of the regex (the one after the initial '/') 185absolutely I<must> be a single '=' character; it may not be anything 186else (not even a regex meta-character) if it is supposed to match 187against the name of a POD command. 188 189If no I<=cmd-expr> is given then the text-expr will be matched against 190plain textblocks unless it is preceded by a space, in which case it is 191matched against verbatim text-blocks. If no I<text-expr> is given then 192only the command-portion of the paragraph is matched against. 193 194Note that these two expressions are each implicitly anchored. This 195means that when matching against the command-name, there will be an 196implicit '^' and '$' around the given I<=cmd-expr>; and when matching 197against the paragraph text there will be an implicit '\A' and '\Z' 198around the given I<text-expr>. 199 200Unlike with section-specs, the '!' character does I<not> have any special 201meaning (negation or otherwise) at the beginning of a range-spec! 202 203Some example range specifications follow. 204 205=over 4 206 207=item 208Match all C<=for html> paragraphs: 209 210C</=for html/> 211 212=item 213Match all paragraphs between C<=begin html> and C<=end html> 214(note that this will I<not> work correctly if such sections 215are nested): 216 217C</=begin html/../=end html/> 218 219=item 220Match all paragraphs between the given C<=item> name until the end of the 221current section: 222 223C</=item mine/../=head\d/> 224 225=item 226Match all paragraphs between the given C<=item> until the next item, or 227until the end of the itemized list (note that this will I<not> work as 228desired if the item contains an itemized list nested within it): 229 230C</=item mine/../=(item|back)/> 231 232=back 233 234=end _NOT_IMPLEMENTED_ 235 236=cut 237 238############################################################################# 239 240#use diagnostics; 241use Carp; 242use Pod::Parser 1.04; 243 244@ISA = qw(Pod::Parser); 245@EXPORT = qw(&podselect); 246 247## Maximum number of heading levels supported for '=headN' directives 248*MAX_HEADING_LEVEL = \3; 249 250############################################################################# 251 252=head1 OBJECT METHODS 253 254The following methods are provided in this module. Each one takes a 255reference to the object itself as an implicit first parameter. 256 257=cut 258 259##--------------------------------------------------------------------------- 260 261## =begin _PRIVATE_ 262## 263## =head1 B<_init_headings()> 264## 265## Initialize the current set of active section headings. 266## 267## =cut 268## 269## =end _PRIVATE_ 270 271sub _init_headings { 272 my $self = shift; 273 local *myData = $self; 274 275 ## Initialize current section heading titles if necessary 276 unless (defined $myData{_SECTION_HEADINGS}) { 277 local *section_headings = $myData{_SECTION_HEADINGS} = []; 278 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 279 $section_headings[$i] = ''; 280 } 281 } 282} 283 284##--------------------------------------------------------------------------- 285 286=head1 B<curr_headings()> 287 288 ($head1, $head2, $head3, ...) = $parser->curr_headings(); 289 $head1 = $parser->curr_headings(1); 290 291This method returns a list of the currently active section headings and 292subheadings in the document being parsed. The list of headings returned 293corresponds to the most recently parsed paragraph of the input. 294 295If an argument is given, it must correspond to the desired section 296heading number, in which case only the specified section heading is 297returned. If there is no current section heading at the specified 298level, then C<undef> is returned. 299 300=cut 301 302sub curr_headings { 303 my $self = shift; 304 $self->_init_headings() unless (defined $self->{_SECTION_HEADINGS}); 305 my @headings = @{ $self->{_SECTION_HEADINGS} }; 306 return (@_ > 0 and $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings; 307} 308 309##--------------------------------------------------------------------------- 310 311=head1 B<select()> 312 313 $parser->select($section_spec1,$section_spec2,...); 314 315This method is used to select the particular sections and subsections of 316POD documentation that are to be printed and/or processed. The existing 317set of selected sections is I<replaced> with the given set of sections. 318See B<add_selection()> for adding to the current set of selected 319sections. 320 321Each of the C<$section_spec> arguments should be a section specification 322as described in L<"SECTION SPECIFICATIONS">. The section specifications 323are parsed by this method and the resulting regular expressions are 324stored in the invoking object. 325 326If no C<$section_spec> arguments are given, then the existing set of 327selected sections is cleared out (which means C<all> sections will be 328processed). 329 330This method should I<not> normally be overridden by subclasses. 331 332=cut 333 334sub select { 335 my ($self, @sections) = @_; 336 local *myData = $self; 337 local $_; 338 339### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?) 340 341 ##--------------------------------------------------------------------- 342 ## The following is a blatant hack for backward compatibility, and for 343 ## implementing add_selection(). If the *first* *argument* is the 344 ## string "+", then the remaining section specifications are *added* 345 ## to the current set of selections; otherwise the given section 346 ## specifications will *replace* the current set of selections. 347 ## 348 ## This should probably be fixed someday, but for the present time, 349 ## it seems incredibly unlikely that "+" would ever correspond to 350 ## a legitimate section heading 351 ##--------------------------------------------------------------------- 352 my $add = ($sections[0] eq '+') ? shift(@sections) : ''; 353 354 ## Reset the set of sections to use 355 unless (@sections) { 356 delete $myData{_SELECTED_SECTIONS} unless ($add); 357 return; 358 } 359 $myData{_SELECTED_SECTIONS} = [] 360 unless ($add && exists $myData{_SELECTED_SECTIONS}); 361 local *selected_sections = $myData{_SELECTED_SECTIONS}; 362 363 ## Compile each spec 364 for my $spec (@sections) { 365 if ( defined($_ = _compile_section_spec($spec)) ) { 366 ## Store them in our sections array 367 push(@selected_sections, $_); 368 } 369 else { 370 carp qq{Ignoring section spec "$spec"!\n}; 371 } 372 } 373} 374 375##--------------------------------------------------------------------------- 376 377=head1 B<add_selection()> 378 379 $parser->add_selection($section_spec1,$section_spec2,...); 380 381This method is used to add to the currently selected sections and 382subsections of POD documentation that are to be printed and/or 383processed. See <select()> for replacing the currently selected sections. 384 385Each of the C<$section_spec> arguments should be a section specification 386as described in L<"SECTION SPECIFICATIONS">. The section specifications 387are parsed by this method and the resulting regular expressions are 388stored in the invoking object. 389 390This method should I<not> normally be overridden by subclasses. 391 392=cut 393 394sub add_selection { 395 my $self = shift; 396 return $self->select('+', @_); 397} 398 399##--------------------------------------------------------------------------- 400 401=head1 B<clear_selections()> 402 403 $parser->clear_selections(); 404 405This method takes no arguments, it has the exact same effect as invoking 406<select()> with no arguments. 407 408=cut 409 410sub clear_selections { 411 my $self = shift; 412 return $self->select(); 413} 414 415##--------------------------------------------------------------------------- 416 417=head1 B<match_section()> 418 419 $boolean = $parser->match_section($heading1,$heading2,...); 420 421Returns a value of true if the given section and subsection heading 422titles match any of the currently selected section specifications in 423effect from prior calls to B<select()> and B<add_selection()> (or if 424there are no explicitly selected/deselected sections). 425 426The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of 427the corresponding sections, subsections, etc. to try and match. If 428C<$headingN> is omitted then it defaults to the current corresponding 429section heading title in the input. 430 431This method should I<not> normally be overridden by subclasses. 432 433=cut 434 435sub match_section { 436 my $self = shift; 437 my (@headings) = @_; 438 local *myData = $self; 439 440 ## Return true if no restrictions were explicitly specified 441 my $selections = (exists $myData{_SELECTED_SECTIONS}) 442 ? $myData{_SELECTED_SECTIONS} : undef; 443 return 1 unless ((defined $selections) && @{$selections}); 444 445 ## Default any unspecified sections to the current one 446 my @current_headings = $self->curr_headings(); 447 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 448 (defined $headings[$i]) or $headings[$i] = $current_headings[$i]; 449 } 450 451 ## Look for a match against the specified section expressions 452 for my $section_spec ( @{$selections} ) { 453 ##------------------------------------------------------ 454 ## Each portion of this spec must match in order for 455 ## the spec to be matched. So we will start with a 456 ## match-value of 'true' and logically 'and' it with 457 ## the results of matching a given element of the spec. 458 ##------------------------------------------------------ 459 my $match = 1; 460 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 461 my $regex = $section_spec->[$i]; 462 my $negated = ($regex =~ s/^\!//); 463 $match &= ($negated ? ($headings[$i] !~ /${regex}/) 464 : ($headings[$i] =~ /${regex}/)); 465 last unless ($match); 466 } 467 return 1 if ($match); 468 } 469 return 0; ## no match 470} 471 472##--------------------------------------------------------------------------- 473 474=head1 B<is_selected()> 475 476 $boolean = $parser->is_selected($paragraph); 477 478This method is used to determine if the block of text given in 479C<$paragraph> falls within the currently selected set of POD sections 480and subsections to be printed or processed. This method is also 481responsible for keeping track of the current input section and 482subsections. It is assumed that C<$paragraph> is the most recently read 483(but not yet processed) input paragraph. 484 485The value returned will be true if the C<$paragraph> and the rest of the 486text in the same section as C<$paragraph> should be selected (included) 487for processing; otherwise a false value is returned. 488 489=cut 490 491sub is_selected { 492 my ($self, $paragraph) = @_; 493 local $_; 494 local *myData = $self; 495 496 $self->_init_headings() unless (defined $myData{_SECTION_HEADINGS}); 497 498 ## Keep track of current sections levels and headings 499 $_ = $paragraph; 500 if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*?)\s*$/) 501 { 502 ## This is a section heading command 503 my ($level, $heading) = ($2, $3); 504 $level = 1 + (length($1) / 3) if ((! length $level) || (length $1)); 505 ## Reset the current section heading at this level 506 $myData{_SECTION_HEADINGS}->[$level - 1] = $heading; 507 ## Reset subsection headings of this one to empty 508 for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) { 509 $myData{_SECTION_HEADINGS}->[$i] = ''; 510 } 511 } 512 513 return $self->match_section(); 514} 515 516############################################################################# 517 518=head1 EXPORTED FUNCTIONS 519 520The following functions are exported by this module. Please note that 521these are functions (not methods) and therefore C<do not> take an 522implicit first argument. 523 524=cut 525 526##--------------------------------------------------------------------------- 527 528=head1 B<podselect()> 529 530 podselect(\%options,@filelist); 531 532B<podselect> will print the raw (untranslated) POD paragraphs of all 533POD sections in the given input files specified by C<@filelist> 534according to the given options. 535 536If any argument to B<podselect> is a reference to a hash 537(associative array) then the values with the following keys are 538processed as follows: 539 540=over 4 541 542=item B<-output> 543 544A string corresponding to the desired output file (or ">&STDOUT" 545or ">&STDERR"). The default is to use standard output. 546 547=item B<-sections> 548 549A reference to an array of sections specifications (as described in 550L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD 551sections and subsections to be selected from input. If no section 552specifications are given, then all sections of the PODs are used. 553 554=begin _NOT_IMPLEMENTED_ 555 556=item B<-ranges> 557 558A reference to an array of range specifications (as described in 559L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD 560paragraphs to be selected from the desired input sections. If no range 561specifications are given, then all paragraphs of the desired sections 562are used. 563 564=end _NOT_IMPLEMENTED_ 565 566=back 567 568All other arguments should correspond to the names of input files 569containing POD sections. A file name of "-" or "<&STDIN" will 570be interpreted to mean standard input (which is the default if no 571filenames are given). 572 573=cut 574 575sub podselect { 576 my(@argv) = @_; 577 my %defaults = (); 578 my $pod_parser = new Pod::Select(%defaults); 579 my $num_inputs = 0; 580 my $output = '>&STDOUT'; 581 my %opts; 582 local $_; 583 for (@argv) { 584 if (ref($_)) { 585 next unless (ref($_) eq 'HASH'); 586 %opts = (%defaults, %{$_}); 587 588 ##------------------------------------------------------------- 589 ## Need this for backward compatibility since we formerly used 590 ## options that were all uppercase words rather than ones that 591 ## looked like Unix command-line options. 592 ## to be uppercase keywords) 593 ##------------------------------------------------------------- 594 %opts = map { 595 my ($key, $val) = (lc $_, $opts{$_}); 596 $key =~ s/^(?=\w)/-/; 597 $key =~ /^-se[cl]/ and $key = '-sections'; 598 #! $key eq '-range' and $key .= 's'; 599 ($key => $val); 600 } (keys %opts); 601 602 ## Process the options 603 (exists $opts{'-output'}) and $output = $opts{'-output'}; 604 605 ## Select the desired sections 606 $pod_parser->select(@{ $opts{'-sections'} }) 607 if ( (defined $opts{'-sections'}) 608 && ((ref $opts{'-sections'}) eq 'ARRAY') ); 609 610 #! ## Select the desired paragraph ranges 611 #! $pod_parser->select(@{ $opts{'-ranges'} }) 612 #! if ( (defined $opts{'-ranges'}) 613 #! && ((ref $opts{'-ranges'}) eq 'ARRAY') ); 614 } 615 else { 616 $pod_parser->parse_from_file($_, $output); 617 ++$num_inputs; 618 } 619 } 620 $pod_parser->parse_from_file('-') unless ($num_inputs > 0); 621} 622 623############################################################################# 624 625=head1 PRIVATE METHODS AND DATA 626 627B<Pod::Select> makes uses a number of internal methods and data fields 628which clients should not need to see or use. For the sake of avoiding 629name collisions with client data and methods, these methods and fields 630are briefly discussed here. Determined hackers may obtain further 631information about them by reading the B<Pod::Select> source code. 632 633Private data fields are stored in the hash-object whose reference is 634returned by the B<new()> constructor for this class. The names of all 635private methods and data-fields used by B<Pod::Select> begin with a 636prefix of "_" and match the regular expression C</^_\w+$/>. 637 638=cut 639 640##--------------------------------------------------------------------------- 641 642=begin _PRIVATE_ 643 644=head1 B<_compile_section_spec()> 645 646 $listref = $parser->_compile_section_spec($section_spec); 647 648This function (note it is a function and I<not> a method) takes a 649section specification (as described in L<"SECTION SPECIFICATIONS">) 650given in C<$section_sepc>, and compiles it into a list of regular 651expressions. If C<$section_spec> has no syntax errors, then a reference 652to the list (array) of corresponding regular expressions is returned; 653otherwise C<undef> is returned and an error message is printed (using 654B<carp>) for each invalid regex. 655 656=end _PRIVATE_ 657 658=cut 659 660sub _compile_section_spec { 661 my ($section_spec) = @_; 662 my (@regexs, $negated); 663 664 ## Compile the spec into a list of regexs 665 local $_ = $section_spec; 666 s{\\\\}{\001}g; ## handle escaped backward slashes 667 s{\\/}{\002}g; ## handle escaped forward slashes 668 669 ## Parse the regexs for the heading titles 670 @regexs = split(/\//, $_, $MAX_HEADING_LEVEL); 671 672 ## Set default regex for ommitted levels 673 for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) { 674 $regexs[$i] = '.*' unless ((defined $regexs[$i]) 675 && (length $regexs[$i])); 676 } 677 ## Modify the regexs as needed and validate their syntax 678 my $bad_regexs = 0; 679 for (@regexs) { 680 $_ .= '.+' if ($_ eq '!'); 681 s{\001}{\\\\}g; ## restore escaped backward slashes 682 s{\002}{\\/}g; ## restore escaped forward slashes 683 $negated = s/^\!//; ## check for negation 684 eval "m{$_}"; ## check regex syntax 685 if ($@) { 686 ++$bad_regexs; 687 carp qq{Bad regular expression /$_/ in "$section_spec": $@\n}; 688 } 689 else { 690 ## Add the forward and rear anchors (and put the negator back) 691 $_ = '^' . $_ unless (/^\^/); 692 $_ = $_ . '$' unless (/\$$/); 693 $_ = '!' . $_ if ($negated); 694 } 695 } 696 return (! $bad_regexs) ? [ @regexs ] : undef; 697} 698 699##--------------------------------------------------------------------------- 700 701=begin _PRIVATE_ 702 703=head2 $self->{_SECTION_HEADINGS} 704 705A reference to an array of the current section heading titles for each 706heading level (note that the first heading level title is at index 0). 707 708=end _PRIVATE_ 709 710=cut 711 712##--------------------------------------------------------------------------- 713 714=begin _PRIVATE_ 715 716=head2 $self->{_SELECTED_SECTIONS} 717 718A reference to an array of references to arrays. Each subarray is a list 719of anchored regular expressions (preceded by a "!" if the expression is to 720be negated). The index of the expression in the subarray should correspond 721to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}> 722that it is to be matched against. 723 724=end _PRIVATE_ 725 726=cut 727 728############################################################################# 729 730=head1 SEE ALSO 731 732L<Pod::Parser> 733 734=head1 AUTHOR 735 736Please report bugs using L<http://rt.cpan.org>. 737 738Brad Appleton E<lt>bradapp@enteract.comE<gt> 739 740Based on code for B<pod2text> written by 741Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> 742 743B<Pod::Select> is part of the L<Pod::Parser> distribution. 744 745=cut 746 7471; 748# vim: ts=4 sw=4 et 749