1#############################################################################
2# Pod/Select.pm -- function to select portions of POD docs
3#
4# Copyright (C) 1996-2000 by Bradford Appleton. All rights reserved.
5# This file is part of "PodParser". PodParser is free software;
6# you can redistribute it and/or modify it under the same terms
7# as Perl itself.
8#############################################################################
9
10package Pod::Select;
11use strict;
12use warnings;
13
14use vars qw($VERSION @ISA @EXPORT $MAX_HEADING_LEVEL %myData @section_headings @selected_sections);
15$VERSION = '1.60'; ## Current version of this package
16require  5.005;    ## requires this Perl version or later
17
18#############################################################################
19
20=head1 NAME
21
22Pod::Select, podselect() - extract selected sections of POD from input
23
24=head1 SYNOPSIS
25
26    use Pod::Select;
27
28    ## Select all the POD sections for each file in @filelist
29    ## and print the result on standard output.
30    podselect(@filelist);
31
32    ## Same as above, but write to tmp.out
33    podselect({-output => "tmp.out"}, @filelist):
34
35    ## Select from the given filelist, only those POD sections that are
36    ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
37    podselect({-sections => ["NAME|SYNOPSIS", "OPTIONS"]}, @filelist):
38
39    ## Select the "DESCRIPTION" section of the PODs from STDIN and write
40    ## the result to STDERR.
41    podselect({-output => ">&STDERR", -sections => ["DESCRIPTION"]}, \*STDIN);
42
43or
44
45    use Pod::Select;
46
47    ## Create a parser object for selecting POD sections from the input
48    $parser = Pod::Select->new();
49
50    ## Select all the POD sections for each file in @filelist
51    ## and print the result to tmp.out.
52    $parser->parse_from_file("<&STDIN", "tmp.out");
53
54    ## Select from the given filelist, only those POD sections that are
55    ## within a 1st level section named any of: NAME, SYNOPSIS, OPTIONS.
56    $parser->select("NAME|SYNOPSIS", "OPTIONS");
57    for (@filelist) { $parser->parse_from_file($_); }
58
59    ## Select the "DESCRIPTION" and "SEE ALSO" sections of the PODs from
60    ## STDIN and write the result to STDERR.
61    $parser->select("DESCRIPTION");
62    $parser->add_selection("SEE ALSO");
63    $parser->parse_from_filehandle(\*STDIN, \*STDERR);
64
65=head1 REQUIRES
66
67perl5.005, Pod::Parser, Exporter, Carp
68
69=head1 EXPORTS
70
71podselect()
72
73=head1 DESCRIPTION
74
75B<podselect()> is a function which will extract specified sections of
76pod documentation from an input stream. This ability is provided by the
77B<Pod::Select> module which is a subclass of B<Pod::Parser>.
78B<Pod::Select> provides a method named B<select()> to specify the set of
79POD sections to select for processing/printing. B<podselect()> merely
80creates a B<Pod::Select> object and then invokes the B<podselect()>
81followed by B<parse_from_file()>.
82
83=head1 SECTION SPECIFICATIONS
84
85B<podselect()> and B<Pod::Select::select()> may be given one or more
86"section specifications" to restrict the text processed to only the
87desired set of sections and their corresponding subsections.  A section
88specification is a string containing one or more Perl-style regular
89expressions separated by forward slashes ("/").  If you need to use a
90forward slash literally within a section title you can escape it with a
91backslash ("\/").
92
93The formal syntax of a section specification is:
94
95=over 4
96
97=item *
98
99I<head1-title-regex>/I<head2-title-regex>/...
100
101=back
102
103Any omitted or empty regular expressions will default to ".*".
104Please note that each regular expression given is implicitly
105anchored by adding "^" and "$" to the beginning and end.  Also, if a
106given regular expression starts with a "!" character, then the
107expression is I<negated> (so C<!foo> would match anything I<except>
108C<foo>).
109
110Some example section specifications follow.
111
112=over 4
113
114=item *
115
116Match the C<NAME> and C<SYNOPSIS> sections and all of their subsections:
117
118C<NAME|SYNOPSIS>
119
120=item *
121
122Match only the C<Question> and C<Answer> subsections of the C<DESCRIPTION>
123section:
124
125C<DESCRIPTION/Question|Answer>
126
127=item *
128
129Match the C<Comments> subsection of I<all> sections:
130
131C</Comments>
132
133=item *
134
135Match all subsections of C<DESCRIPTION> I<except> for C<Comments>:
136
137C<DESCRIPTION/!Comments>
138
139=item *
140
141Match the C<DESCRIPTION> section but do I<not> match any of its subsections:
142
143C<DESCRIPTION/!.+>
144
145=item *
146
147Match all top level sections but none of their subsections:
148
149C</!.+>
150
151=back
152
153=begin _NOT_IMPLEMENTED_
154
155=head1 RANGE SPECIFICATIONS
156
157B<podselect()> and B<Pod::Select::select()> may be given one or more
158"range specifications" to restrict the text processed to only the
159desired ranges of paragraphs in the desired set of sections. A range
160specification is a string containing a single Perl-style regular
161expression (a regex), or else two Perl-style regular expressions
162(regexs) separated by a ".." (Perl's "range" operator is "..").
163The regexs in a range specification are delimited by forward slashes
164("/").  If you need to use a forward slash literally within a regex you
165can escape it with a backslash ("\/").
166
167The formal syntax of a range specification is:
168
169=over 4
170
171=item *
172
173/I<start-range-regex>/[../I<end-range-regex>/]
174
175=back
176
177Where each the item inside square brackets (the ".." followed by the
178end-range-regex) is optional. Each "range-regex" is of the form:
179
180    =cmd-expr text-expr
181
182Where I<cmd-expr> is intended to match the name of one or more POD
183commands, and I<text-expr> is intended to match the paragraph text for
184the command. If a range-regex is supposed to match a POD command, then
185the first character of the regex (the one after the initial '/')
186absolutely I<must> be a single '=' character; it may not be anything
187else (not even a regex meta-character) if it is supposed to match
188against the name of a POD command.
189
190If no I<=cmd-expr> is given then the text-expr will be matched against
191plain textblocks unless it is preceded by a space, in which case it is
192matched against verbatim text-blocks. If no I<text-expr> is given then
193only the command-portion of the paragraph is matched against.
194
195Note that these two expressions are each implicitly anchored. This
196means that when matching against the command-name, there will be an
197implicit '^' and '$' around the given I<=cmd-expr>; and when matching
198against the paragraph text there will be an implicit '\A' and '\Z'
199around the given I<text-expr>.
200
201Unlike with section-specs, the '!' character does I<not> have any special
202meaning (negation or otherwise) at the beginning of a range-spec!
203
204Some example range specifications follow.
205
206=over 4
207
208=item
209Match all C<=for html> paragraphs:
210
211C</=for html/>
212
213=item
214Match all paragraphs between C<=begin html> and C<=end html>
215(note that this will I<not> work correctly if such sections
216are nested):
217
218C</=begin html/../=end html/>
219
220=item
221Match all paragraphs between the given C<=item> name until the end of the
222current section:
223
224C</=item mine/../=head\d/>
225
226=item
227Match all paragraphs between the given C<=item> until the next item, or
228until the end of the itemized list (note that this will I<not> work as
229desired if the item contains an itemized list nested within it):
230
231C</=item mine/../=(item|back)/>
232
233=back
234
235=end _NOT_IMPLEMENTED_
236
237=cut
238
239#############################################################################
240
241#use diagnostics;
242use Carp;
243use Pod::Parser 1.04;
244
245@ISA = qw(Pod::Parser);
246@EXPORT = qw(&podselect);
247
248## Maximum number of heading levels supported for '=headN' directives
249*MAX_HEADING_LEVEL = \3;
250
251#############################################################################
252
253=head1 OBJECT METHODS
254
255The following methods are provided in this module. Each one takes a
256reference to the object itself as an implicit first parameter.
257
258=cut
259
260##---------------------------------------------------------------------------
261
262## =begin _PRIVATE_
263##
264## =head1 B<_init_headings()>
265##
266## Initialize the current set of active section headings.
267##
268## =cut
269##
270## =end _PRIVATE_
271
272sub _init_headings {
273    my $self = shift;
274    local *myData = $self;
275
276    ## Initialize current section heading titles if necessary
277    unless (defined $myData{_SECTION_HEADINGS}) {
278        local *section_headings = $myData{_SECTION_HEADINGS} = [];
279        for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
280            $section_headings[$i] = '';
281        }
282    }
283}
284
285##---------------------------------------------------------------------------
286
287=head1 B<curr_headings()>
288
289            ($head1, $head2, $head3, ...) = $parser->curr_headings();
290            $head1 = $parser->curr_headings(1);
291
292This method returns a list of the currently active section headings and
293subheadings in the document being parsed. The list of headings returned
294corresponds to the most recently parsed paragraph of the input.
295
296If an argument is given, it must correspond to the desired section
297heading number, in which case only the specified section heading is
298returned. If there is no current section heading at the specified
299level, then C<undef> is returned.
300
301=cut
302
303sub curr_headings {
304    my $self = shift;
305    $self->_init_headings()  unless (defined $self->{_SECTION_HEADINGS});
306    my @headings = @{ $self->{_SECTION_HEADINGS} };
307    return (@_ > 0  and  $_[0] =~ /^\d+$/) ? $headings[$_[0] - 1] : @headings;
308}
309
310##---------------------------------------------------------------------------
311
312=head1 B<select()>
313
314            $parser->select($section_spec1,$section_spec2,...);
315
316This method is used to select the particular sections and subsections of
317POD documentation that are to be printed and/or processed. The existing
318set of selected sections is I<replaced> with the given set of sections.
319See B<add_selection()> for adding to the current set of selected
320sections.
321
322Each of the C<$section_spec> arguments should be a section specification
323as described in L<"SECTION SPECIFICATIONS">.  The section specifications
324are parsed by this method and the resulting regular expressions are
325stored in the invoking object.
326
327If no C<$section_spec> arguments are given, then the existing set of
328selected sections is cleared out (which means C<all> sections will be
329processed).
330
331This method should I<not> normally be overridden by subclasses.
332
333=cut
334
335sub select {
336    my ($self, @sections) = @_;
337    local *myData = $self;
338    local $_;
339
340### NEED TO DISCERN A SECTION-SPEC FROM A RANGE-SPEC (look for m{^/.+/$}?)
341
342    ##---------------------------------------------------------------------
343    ## The following is a blatant hack for backward compatibility, and for
344    ## implementing add_selection(). If the *first* *argument* is the
345    ## string "+", then the remaining section specifications are *added*
346    ## to the current set of selections; otherwise the given section
347    ## specifications will *replace* the current set of selections.
348    ##
349    ## This should probably be fixed someday, but for the present time,
350    ## it seems incredibly unlikely that "+" would ever correspond to
351    ## a legitimate section heading
352    ##---------------------------------------------------------------------
353    my $add = ($sections[0] eq '+') ? shift(@sections) : '';
354
355    ## Reset the set of sections to use
356    unless (@sections) {
357        delete $myData{_SELECTED_SECTIONS}  unless ($add);
358        return;
359    }
360    $myData{_SELECTED_SECTIONS} = []
361        unless ($add  &&  exists $myData{_SELECTED_SECTIONS});
362    local *selected_sections = $myData{_SELECTED_SECTIONS};
363
364    ## Compile each spec
365    for my $spec (@sections) {
366        if ( defined($_ = _compile_section_spec($spec)) ) {
367            ## Store them in our sections array
368            push(@selected_sections, $_);
369        }
370        else {
371            carp qq{Ignoring section spec "$spec"!\n};
372        }
373    }
374}
375
376##---------------------------------------------------------------------------
377
378=head1 B<add_selection()>
379
380            $parser->add_selection($section_spec1,$section_spec2,...);
381
382This method is used to add to the currently selected sections and
383subsections of POD documentation that are to be printed and/or
384processed. See <select()> for replacing the currently selected sections.
385
386Each of the C<$section_spec> arguments should be a section specification
387as described in L<"SECTION SPECIFICATIONS">. The section specifications
388are parsed by this method and the resulting regular expressions are
389stored in the invoking object.
390
391This method should I<not> normally be overridden by subclasses.
392
393=cut
394
395sub add_selection {
396    my $self = shift;
397    return $self->select('+', @_);
398}
399
400##---------------------------------------------------------------------------
401
402=head1 B<clear_selections()>
403
404            $parser->clear_selections();
405
406This method takes no arguments, it has the exact same effect as invoking
407<select()> with no arguments.
408
409=cut
410
411sub clear_selections {
412    my $self = shift;
413    return $self->select();
414}
415
416##---------------------------------------------------------------------------
417
418=head1 B<match_section()>
419
420            $boolean = $parser->match_section($heading1,$heading2,...);
421
422Returns a value of true if the given section and subsection heading
423titles match any of the currently selected section specifications in
424effect from prior calls to B<select()> and B<add_selection()> (or if
425there are no explicitly selected/deselected sections).
426
427The arguments C<$heading1>, C<$heading2>, etc. are the heading titles of
428the corresponding sections, subsections, etc. to try and match.  If
429C<$headingN> is omitted then it defaults to the current corresponding
430section heading title in the input.
431
432This method should I<not> normally be overridden by subclasses.
433
434=cut
435
436sub match_section {
437    my $self = shift;
438    my (@headings) = @_;
439    local *myData = $self;
440
441    ## Return true if no restrictions were explicitly specified
442    my $selections = (exists $myData{_SELECTED_SECTIONS})
443                       ?  $myData{_SELECTED_SECTIONS}  :  undef;
444    return  1  unless ((defined $selections) && @{$selections});
445
446    ## Default any unspecified sections to the current one
447    my @current_headings = $self->curr_headings();
448    for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
449        (defined $headings[$i])  or  $headings[$i] = $current_headings[$i];
450    }
451
452    ## Look for a match against the specified section expressions
453    for my $section_spec ( @{$selections} ) {
454        ##------------------------------------------------------
455        ## Each portion of this spec must match in order for
456        ## the spec to be matched. So we will start with a
457        ## match-value of 'true' and logically 'and' it with
458        ## the results of matching a given element of the spec.
459        ##------------------------------------------------------
460        my $match = 1;
461        for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
462            my $regex   = $section_spec->[$i];
463            my $negated = ($regex =~ s/^\!//);
464            $match  &= ($negated ? ($headings[$i] !~ /${regex}/)
465                                 : ($headings[$i] =~ /${regex}/));
466            last unless ($match);
467        }
468        return  1  if ($match);
469    }
470    return  0;  ## no match
471}
472
473##---------------------------------------------------------------------------
474
475=head1 B<is_selected()>
476
477            $boolean = $parser->is_selected($paragraph);
478
479This method is used to determine if the block of text given in
480C<$paragraph> falls within the currently selected set of POD sections
481and subsections to be printed or processed. This method is also
482responsible for keeping track of the current input section and
483subsections. It is assumed that C<$paragraph> is the most recently read
484(but not yet processed) input paragraph.
485
486The value returned will be true if the C<$paragraph> and the rest of the
487text in the same section as C<$paragraph> should be selected (included)
488for processing; otherwise a false value is returned.
489
490=cut
491
492sub is_selected {
493    my ($self, $paragraph) = @_;
494    local $_;
495    local *myData = $self;
496
497    $self->_init_headings()  unless (defined $myData{_SECTION_HEADINGS});
498
499    ## Keep track of current sections levels and headings
500    $_ = $paragraph;
501    if (/^=((?:sub)*)(?:head(?:ing)?|sec(?:tion)?)(\d*)\s+(.*?)\s*$/)
502    {
503        ## This is a section heading command
504        my ($level, $heading) = ($2, $3);
505        $level = 1 + (length($1) / 3)  if ((! length $level) || (length $1));
506        ## Reset the current section heading at this level
507        $myData{_SECTION_HEADINGS}->[$level - 1] = $heading;
508        ## Reset subsection headings of this one to empty
509        for (my $i = $level; $i < $MAX_HEADING_LEVEL; ++$i) {
510            $myData{_SECTION_HEADINGS}->[$i] = '';
511        }
512    }
513
514    return  $self->match_section();
515}
516
517#############################################################################
518
519=head1 EXPORTED FUNCTIONS
520
521The following functions are exported by this module. Please note that
522these are functions (not methods) and therefore C<do not> take an
523implicit first argument.
524
525=cut
526
527##---------------------------------------------------------------------------
528
529=head1 B<podselect()>
530
531            podselect(\%options,@filelist);
532
533B<podselect> will print the raw (untranslated) POD paragraphs of all
534POD sections in the given input files specified by C<@filelist>
535according to the given options.
536
537If any argument to B<podselect> is a reference to a hash
538(associative array) then the values with the following keys are
539processed as follows:
540
541=over 4
542
543=item B<-output>
544
545A string corresponding to the desired output file (or ">&STDOUT"
546or ">&STDERR"). The default is to use standard output.
547
548=item B<-sections>
549
550A reference to an array of sections specifications (as described in
551L<"SECTION SPECIFICATIONS">) which indicate the desired set of POD
552sections and subsections to be selected from input. If no section
553specifications are given, then all sections of the PODs are used.
554
555=begin _NOT_IMPLEMENTED_
556
557=item B<-ranges>
558
559A reference to an array of range specifications (as described in
560L<"RANGE SPECIFICATIONS">) which indicate the desired range of POD
561paragraphs to be selected from the desired input sections. If no range
562specifications are given, then all paragraphs of the desired sections
563are used.
564
565=end _NOT_IMPLEMENTED_
566
567=back
568
569All other arguments should correspond to the names of input files
570containing POD sections. A file name of "-" or "<&STDIN" will
571be interpreted to mean standard input (which is the default if no
572filenames are given).
573
574=cut 
575
576sub podselect {
577    my(@argv) = @_;
578    my %defaults = ();
579    my $pod_parser = Pod::Select->new(%defaults);
580    my $num_inputs = 0;
581    my $output = '>&STDOUT';
582    my %opts;
583    local $_;
584    for (@argv) {
585        if (ref($_)) {
586        next unless (ref($_) eq 'HASH');
587            %opts = (%defaults, %{$_});
588
589            ##-------------------------------------------------------------
590            ## Need this for backward compatibility since we formerly used
591            ## options that were all uppercase words rather than ones that
592            ## looked like Unix command-line options.
593            ## to be uppercase keywords)
594            ##-------------------------------------------------------------
595            %opts = map {
596                my ($key, $val) = (lc $_, $opts{$_});
597                $key =~ s/^(?=\w)/-/;
598                $key =~ /^-se[cl]/  and  $key  = '-sections';
599                #! $key eq '-range'    and  $key .= 's';
600                ($key => $val);
601            } (keys %opts);
602
603            ## Process the options
604            (exists $opts{'-output'})  and  $output = $opts{'-output'};
605
606            ## Select the desired sections
607            $pod_parser->select(@{ $opts{'-sections'} })
608                if ( (defined $opts{'-sections'})
609                     && ((ref $opts{'-sections'}) eq 'ARRAY') );
610
611            #! ## Select the desired paragraph ranges
612            #! $pod_parser->select(@{ $opts{'-ranges'} })
613            #!     if ( (defined $opts{'-ranges'})
614            #!          && ((ref $opts{'-ranges'}) eq 'ARRAY') );
615        }
616        else {
617            $pod_parser->parse_from_file($_, $output);
618            ++$num_inputs;
619        }
620    }
621    $pod_parser->parse_from_file('-')  unless ($num_inputs > 0);
622}
623
624#############################################################################
625
626=head1 PRIVATE METHODS AND DATA
627
628B<Pod::Select> makes uses a number of internal methods and data fields
629which clients should not need to see or use. For the sake of avoiding
630name collisions with client data and methods, these methods and fields
631are briefly discussed here. Determined hackers may obtain further
632information about them by reading the B<Pod::Select> source code.
633
634Private data fields are stored in the hash-object whose reference is
635returned by the B<new()> constructor for this class. The names of all
636private methods and data-fields used by B<Pod::Select> begin with a
637prefix of "_" and match the regular expression C</^_\w+$/>.
638
639=cut
640
641##---------------------------------------------------------------------------
642
643=begin _PRIVATE_
644
645=head1 B<_compile_section_spec()>
646
647            $listref = $parser->_compile_section_spec($section_spec);
648
649This function (note it is a function and I<not> a method) takes a
650section specification (as described in L<"SECTION SPECIFICATIONS">)
651given in C<$section_sepc>, and compiles it into a list of regular
652expressions. If C<$section_spec> has no syntax errors, then a reference
653to the list (array) of corresponding regular expressions is returned;
654otherwise C<undef> is returned and an error message is printed (using
655B<carp>) for each invalid regex.
656
657=end _PRIVATE_
658
659=cut
660
661sub _compile_section_spec {
662    my ($section_spec) = @_;
663    my (@regexs, $negated);
664
665    ## Compile the spec into a list of regexs
666    local $_ = $section_spec;
667    s{\\\\}{\001}g;  ## handle escaped backward slashes
668    s{\\/}{\002}g;   ## handle escaped forward slashes
669
670    ## Parse the regexs for the heading titles
671    @regexs = split(/\//, $_, $MAX_HEADING_LEVEL);
672
673    ## Set default regex for ommitted levels
674    for (my $i = 0; $i < $MAX_HEADING_LEVEL; ++$i) {
675        $regexs[$i]  = '.*'  unless ((defined $regexs[$i])
676                                     && (length $regexs[$i]));
677    }
678    ## Modify the regexs as needed and validate their syntax
679    my $bad_regexs = 0;
680    for (@regexs) {
681        $_ .= '.+'  if ($_ eq '!');
682        s{\001}{\\\\}g;       ## restore escaped backward slashes
683        s{\002}{\\/}g;        ## restore escaped forward slashes
684        $negated = s/^\!//;   ## check for negation
685        eval "m{$_}";         ## check regex syntax
686        if ($@) {
687            ++$bad_regexs;
688            carp qq{Bad regular expression /$_/ in "$section_spec": $@\n};
689        }
690        else {
691            ## Add the forward and rear anchors (and put the negator back)
692            $_ = '^' . $_  unless (/^\^/);
693            $_ = $_ . '$'  unless (/\$$/);
694            $_ = '!' . $_  if ($negated);
695        }
696    }
697    return  (! $bad_regexs) ? [ @regexs ] : undef;
698}
699
700##---------------------------------------------------------------------------
701
702=begin _PRIVATE_
703
704=head2 $self->{_SECTION_HEADINGS}
705
706A reference to an array of the current section heading titles for each
707heading level (note that the first heading level title is at index 0).
708
709=end _PRIVATE_
710
711=cut
712
713##---------------------------------------------------------------------------
714
715=begin _PRIVATE_
716
717=head2 $self->{_SELECTED_SECTIONS}
718
719A reference to an array of references to arrays. Each subarray is a list
720of anchored regular expressions (preceded by a "!" if the expression is to
721be negated). The index of the expression in the subarray should correspond
722to the index of the heading title in C<$self-E<gt>{_SECTION_HEADINGS}>
723that it is to be matched against.
724
725=end _PRIVATE_
726
727=cut
728
729#############################################################################
730
731=head1 SEE ALSO
732
733L<Pod::Parser>
734
735=head1 AUTHOR
736
737Please report bugs using L<http://rt.cpan.org>.
738
739Brad Appleton E<lt>bradapp@enteract.comE<gt>
740
741Based on code for B<pod2text> written by
742Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
743
744B<Pod::Select> is part of the L<Pod::Parser> distribution.
745
746=cut
747
7481;
749# vim: ts=4 sw=4 et
750