1
2package Tree::Parser;
3
4use strict;
5use warnings;
6
7our $VERSION = '0.15';
8
9use Scalar::Util qw(blessed);
10
11use Tree::Simple;
12use Array::Iterator;
13
14### constructor
15
16sub new {
17	my ($_class, $input) = @_;
18	my $class = ref($_class) || $_class;
19	my $tree_parser = {};
20	bless($tree_parser, $class);
21    $tree_parser->_init($input);
22	return $tree_parser;
23}
24
25sub _init {
26    my ($self, $input) = @_;
27    # make slots for our 2 filters
28    $self->{parse_filter} = undef;
29    $self->{deparse_filter} = undef;
30    $self->{deparse_filter_cleanup} = undef;
31    # check the input and decide what to
32    # do with it
33    if ($input) {
34        # we accept a Tree::Simple object
35        # and expect then it to be deparsed
36        if (blessed($input) && $input->isa("Tree::Simple")) {
37            $self->{iterator} = undef;
38            $self->{tree} = $input;
39        }
40        # or we can take a number of types of input
41        # see prepareInput below
42        else {
43            $self->{iterator} = $self->prepareInput($input);
44            $self->{tree} = Tree::Simple->new(Tree::Simple->ROOT);
45        }
46    }
47    # if no input is given we create
48    # an empty tree a no iterator
49    else {
50        $self->{iterator} = undef;
51        $self->{tree} = Tree::Simple->new(Tree::Simple->ROOT);
52    }
53}
54
55### methods
56
57sub setFileEncoding {
58    my ($self, $file_encoding) = @_;
59    (defined($file_encoding)) || die "Insufficient Arguments : file_encoding must be defined";
60    $self->{file_encoding} = $file_encoding;
61}
62
63sub setInput {
64    my ($self, $input) = @_;
65    (defined($input)) || die "Insufficient Arguments : input undefined";
66    $self->{iterator} = $self->prepareInput($input);
67}
68
69# prepareInput accepts any of the follow
70# types of arguments:
71# 	- a .tree file
72# 	- an array reference of lines
73# 	- a single string of code (can have embedded newlines)
74# and then returns an iterator.
75# references will be stringified, unless they are array references or
76# Array::Iterator objects.
77sub prepareInput {
78	my ($self, $input) = @_;
79
80    # already an A:I instance
81    return $input
82        if blessed($input) and $input->isa('Array::Iterator');
83
84    # a simple array
85    return Array::Iterator->new($input)
86        if ref($input) eq 'ARRAY';
87
88    # stringifies to something that ends in .tree
89	if ($input =~ /\.tree$/) {
90	    IS_A_FILE:
91	        my $encoding = (defined $self->{file_encoding}
92	            ? (":" . $self->{file_encoding})
93	            : '');
94    		open(TREE_FILE, ("<" . $encoding), $input) || die "cannot open file: $!";
95    		my @lines = <TREE_FILE>;
96    		close(TREE_FILE);
97    		return Array::Iterator->new(@lines);
98	}
99    # everything else
100	else {
101        my @lines;
102        if ($input =~ /\n/) {
103            @lines = split /\n/ => $input;
104            (scalar(@lines) > 1)
105                || die "Incorrect Object Type : input looked like a single string, but only a single line ($input) unable to parse input into line (" . (join "==" => @lines) . ")";
106        }
107        elsif ($input =~ /^\(/) {
108            @lines = grep { $_ ne "" } split /(\(|\)|\s|\")/ => $input; #"
109        }
110        else {
111            # lets check if it is a file though
112            goto IS_A_FILE if -f $input;
113            # otherwise, croak on this sucker ...
114            die "Incorrect Object Type : input looked like a single string, but has no newlines or does not start with paren";
115        }
116		return Array::Iterator->new(@lines);
117	}
118}
119
120## ----------------------------------------------------------------------------
121## Filters
122## ----------------------------------------------------------------------------
123
124## tab indented filters
125## ----------------------------------------------
126{
127    my $TAB_INDENTED_PARSE = sub ($) {
128        my ($line_iterator) = @_;
129        my $line = $line_iterator->next();
130        my ($tabs, $node) = $line =~ /(\t*)(.*)/;
131        my $depth = length $tabs;
132        return ($depth, $node);
133    };
134
135    my $TAB_INDENTED_DEPARSE = sub ($) {
136        my ($tree) = @_;
137        return ("\t" x $tree->getDepth()) . $tree->getNodeValue();
138    };
139
140    sub useTabIndentedFilters {
141        my ($self) = @_;
142        $self->{parse_filter} = $TAB_INDENTED_PARSE;
143        $self->{deparse_filter} = $TAB_INDENTED_DEPARSE;
144        $self->{deparse_filter_cleanup} = undef;
145    }
146}
147
148## space indented filters
149## ----------------------------------------------
150{
151    my $make_SPACE_INDENTED_PARSE = sub {
152        my ($num_spaces) = @_;
153        return sub ($) {
154            my ($line_iterator) = @_;
155            my $line = $line_iterator->next();
156            my ($spaces, $node) = $line =~ /(\s*)(.*)/;
157            my $depth = (length($spaces) / $num_spaces) ;
158            return ($depth, $node);
159            };
160    };
161
162    my $make_SPACE_INDENTED_DEPARSE = sub {
163        my ($num_spaces) = @_;
164        my $spaces = (" " x $num_spaces);
165        return sub ($) {
166                my ($tree) = @_;
167                return ($spaces x $tree->getDepth()) . $tree->getNodeValue();
168            };
169    };
170
171    sub useSpaceIndentedFilters {
172        my ($self, $num_spaces) = @_;
173        $num_spaces ||= 4;
174        $self->{parse_filter} = $make_SPACE_INDENTED_PARSE->($num_spaces);
175        $self->{deparse_filter} = $make_SPACE_INDENTED_DEPARSE->($num_spaces);
176        $self->{deparse_filter_cleanup} = undef;
177    }
178}
179
180## space indented filters
181## ----------------------------------------------
182{
183
184    my @default_level_identifiers = (1 .. 100);
185
186    my $make_DOT_SEPERATED_LEVEL_PARSE = sub {
187        my (@level_identifiers) = @_;
188        @level_identifiers = @default_level_identifiers unless @level_identifiers;
189        return sub {
190            my ($line_iterator) = @_;
191            my $line = $line_iterator->next();
192            my $level_identifiers_reg_ex = join "|" => @level_identifiers;
193            my ($numbers, $value) = $line =~ /([($level_identifiers_reg_ex)\.]*)\s(.*)/;
194            # now split the numbers
195            my @numbers = split /\./ => $numbers;
196            # we know the depth of the tree by home many
197            # numbers are present, and we assume we were
198            # given them in sequential order anyway
199            my $depth = $#numbers;
200            return ($depth, $value);
201        };
202    };
203
204    my $make_DOT_SEPERATED_LEVEL_DEPARSE = sub {
205        my (@level_identifiers) = @_;
206        @level_identifiers = @default_level_identifiers unless @level_identifiers;
207        return sub {
208            my ($tree) = @_;
209            my @numbers = $level_identifiers[$tree->getIndex()];
210            my $current_tree = $tree->getParent();
211            until ($current_tree->isRoot()) {
212                unshift @numbers => $level_identifiers[$current_tree->getIndex()];
213                $current_tree = $current_tree->getParent();
214            }
215            return ((join "." => @numbers) . " " . $tree->getNodeValue());
216        };
217    };
218
219    sub useDotSeparatedLevelFilters {
220        my ($self, @level_identifiers) = @_;
221        $self->{parse_filter} = $make_DOT_SEPERATED_LEVEL_PARSE->(@level_identifiers);
222        $self->{deparse_filter} = $make_DOT_SEPERATED_LEVEL_DEPARSE->(@level_identifiers);
223        $self->{deparse_filter_cleanup} = undef;
224    }
225
226    *useDotSeperatedLevelFilters = \&useDotSeparatedLevelFilters;
227
228}
229
230## nested parens filters
231## ----------------------------------------------
232{
233
234    my $make_NESTED_PARENS_PARSE = sub {
235        my @paren_stack;
236        return sub {
237            my ($line_iterator) = @_;
238            my $line = $line_iterator->next();
239            my $node = "";
240            while (!$node && $node ne 0) {
241                if ($line eq "(") {
242                    push @paren_stack => $line;
243                    last unless $line_iterator->hasNext();
244                    $line = $line_iterator->next();
245                }
246                elsif ($line eq ")") {
247                    pop @paren_stack;
248                    last unless $line_iterator->hasNext();
249                    $line = $line_iterator->next();
250                }
251                elsif ($line eq '"') {
252                    $line = ""; # clear the quote
253                    while ($line_iterator->hasNext()) {
254                        my $next = $line_iterator->next();
255                        last if $next eq '"';
256                        $line .= $next;
257                    }
258                }
259                elsif ($line eq ' ') {
260                    # discard misc whitespace
261                    $line = $line_iterator->next();
262                    next;
263                }
264                else {
265                    $node = $line;
266                }
267            }
268            my $depth = $#paren_stack;
269            $depth = 0 if $depth < 0;
270            return ($depth, $node);
271        };
272    };
273
274    # this is used in clean up as well
275    my $prev_depth;
276    my $NESTED_PARENS_DEPARSE = sub {
277        my ($tree) = @_;
278        my $output = "";
279        unless (defined($prev_depth)) {
280            $output .= "(";
281            $prev_depth = $tree->getDepth();
282        }
283        else {
284            my $current_depth = $tree->getDepth();
285            if ($prev_depth == $current_depth) {
286                $output .= " ";
287            }
288            elsif ($prev_depth < $current_depth) {
289                $output .= " (";
290            }
291            elsif ($prev_depth > $current_depth) {
292                my $delta = $prev_depth - $current_depth;
293                $output .= ")" x $delta . " ";
294            }
295            $prev_depth = $current_depth;
296        }
297        my $current_node = $tree->getNodeValue();
298        $current_node = '"' . $current_node . '"' if $current_node =~ /\s/;
299        $output .= $current_node;
300        return $output;
301    };
302
303    my $NESTED_PARENS_CLEANUP = sub {
304        my $closing_parens = $prev_depth;
305        # unset this so it can be used again
306        undef $prev_depth;
307        return @_, (")" x ($closing_parens + 1))
308    };
309
310    sub useNestedParensFilters {
311        my ($self) = @_;
312        $self->{parse_filter} = $make_NESTED_PARENS_PARSE->();
313        $self->{deparse_filter} = $NESTED_PARENS_DEPARSE;
314        $self->{deparse_filter_cleanup} = $NESTED_PARENS_CLEANUP;
315    }
316}
317
318## manual filters
319## ----------------------------------------------
320# a filter is a subroutine reference
321# which gets executed upon each line
322# and it must return two values:
323# 	- the depth of the node
324# 	- the value of the node (which can
325#	  be anything; string, array ref,
326# 	  object instanace, you name it)
327# NOTE:
328# if a filter is not specified, then
329# the parsers iterator is expected to
330# return the dual values.
331
332sub setParseFilter {
333	my ($self, $filter) = @_;
334	(defined($filter) && ref($filter) eq "CODE")
335        || die "Insufficient Arguments : parse filter must be a code reference";
336	$self->{parse_filter} = $filter;
337}
338
339sub setDeparseFilter {
340	my ($self, $filter) = @_;
341	(defined($filter) && ref($filter) eq "CODE")
342        || die "Insufficient Arguments : parse filter must be a code reference";
343	$self->{deparse_filter} = $filter;
344}
345
346## ----------------------------------------------------------------------------
347
348sub getTree {
349	my ($self) = @_;
350	return $self->{tree};
351}
352
353# deparse creates either:
354# 	- an array of lines
355# 	- or one large string
356# which contains the values
357# created by the sub ref
358# (unfilter) passed as an argument
359sub deparse {
360	my ($self) = @_;
361	(defined($self->{deparse_filter}))
362        || die "Parse Error : no deparse filter is specified";
363	(!$self->{tree}->isLeaf())
364        || die "Parse Error : Tree is a leaf node, cannot de-parse a tree that has not be created yet";
365    return $self->_deparse();
366}
367
368# parser front end
369sub parse {
370	my ($self) = @_;
371    (defined($self->{parse_filter}))
372        || die "Parse Error : No parse filter is specified to parse with";
373	(defined($self->{iterator}))
374        || die "Parse Error : no input has yet been defined, there is nothing to parse";
375	return $self->_parse();
376}
377
378## private methods
379
380sub _deparse {
381    my ($self) = @_;
382	my @lines;
383	$self->{tree}->traverse(sub {
384		my ($tree) = @_;
385		push @lines => $self->{deparse_filter}->($tree);
386		});
387    @lines = $self->{deparse_filter_cleanup}->(@lines) if defined $self->{deparse_filter_cleanup};
388	return wantarray ?
389				@lines
390				:
391				join("\n" => @lines);
392}
393
394# private method which parses given
395# an iterator and a tree
396sub _parse {
397	my ($self) = @_;
398    my $tree_type = ref($self->{tree});
399    my ($i, $current_tree) = ($self->{iterator}, $self->{tree});
400	while ($i->hasNext()) {
401        my ($depth, $node) = $self->{parse_filter}->($i);
402        # if we get nothing back and the iterator
403        # is exhausted, then we now it is time to
404        # stop parsing the input.
405        last if !$depth && !$node && !$i->hasNext();
406		# depth must be defined ...
407		(defined($depth)
408			&&
409			# and a digit (int or float)
410			($depth =~ /^\d+(\.\d*)?$/o)
411			# otherwise we throw and exception
412			) || die "Parse Error : Incorrect Value for depth (" . ((defined $depth) ? $depth : "undef") . ")";
413		# and node is fine as long as it is defined
414		(defined($node)) || die "Parse Error : node is not defined";
415
416        my $new_tree;
417        # if we get back a tree of the same type,
418        # or even of a different type, but still
419        # a Tree::Simple, then we use that ....
420        if (blessed($node) && ($node->isa($tree_type) || $node->isa('Tree::Simple'))) {
421            $new_tree = $node;
422        }
423        # othewise, we assume it is intended to be
424        # the node of the tree
425        else {
426            $new_tree = $tree_type->new($node);
427        }
428
429		if ($current_tree->isRoot()) {
430			$current_tree->addChild($new_tree);
431			$current_tree = $new_tree;
432			next;
433		}
434		my $tree_depth = $current_tree->getDepth();
435		if ($depth == $tree_depth) {
436			$current_tree->addSibling($new_tree);
437			$current_tree = $new_tree;
438		}
439		elsif ($depth > $tree_depth) {
440			(($depth - $tree_depth) <= 1)
441                || die "Parse Error : the difference between the depth ($depth) and the tree depth ($tree_depth) is too much (" . ($depth - $tree_depth) . ") at '$node'";
442			$current_tree->addChild($new_tree);
443			$current_tree = $new_tree;
444		}
445		elsif ($depth < $tree_depth) {
446			$current_tree = $current_tree->getParent() while ($depth < $current_tree->getDepth());
447			$current_tree->addSibling($new_tree);
448			$current_tree = $new_tree;
449		}
450
451	}
452	return $self->{tree};
453}
454
4551;
456
457__END__
458
459=pod
460
461=head1 NAME
462
463Tree::Parser - Module to parse formatted files into tree structures
464
465=head1 SYNOPSIS
466
467  use Tree::Parser;
468
469  # create a new parser object with some input
470  my $tp = Tree::Parser->new($input);
471
472  # use the built in tab indent filters
473  $tp->useTabIndentedFilters();
474
475  # use the built in space indent filters
476  $tp->useSpaceIndentedFilters(4);
477
478  # use the built in dot-seperated numbers filters
479  $tp->useDotSeperatedLevelFilters();
480
481  # use the nested parens filter
482  $tp->useNestedParensFilters();
483
484  # create your own filter
485  $tp->setParseFilter(sub {
486      my ($line_iterator) = @_;
487      my $line = $line_iterator->next();
488      my ($id, $tabs, $desc) = $line =~ /(\d+)(\t*)(.*)/;
489      my $depth = length $tabs;
490      return ($depth, { id => $id, desc => $desc } );
491  });
492
493  # parse our input and get back a tree
494  my $tree = $tp->parse();
495
496  # create your own deparse filter
497  # (which is in the inverse of our
498  # custom filter above)
499  $tp->setDeparseFilter(sub {
500      my ($tree) = @_;
501      my $info = $tree->getNodeValue();
502      return ($info->{id} . ("\t" x $tree->getDepth()) . $info->{desc});
503  });
504
505  # deparse our tree and get back a string
506  my $tree_string = $tp->deparse();
507
508=head1 DESCRIPTION
509
510This module can parse various types of input (formatted and containing
511hierarchal information) into a tree structures. It can also deparse the
512same tree structures back into a string. It accepts various types of
513input, such as; strings, filenames, array references. The tree structure
514is a hierarchy of B<Tree::Simple> objects.
515
516The parsing is controlled through a parse filter, which is used to process
517each "line" in the input (see C<setParseFilter> below for more information
518about parse filters).
519
520The deparseing as well is controlled by a deparse filter, which is used to
521covert each tree node into a string representation.
522
523This module can be viewed (somewhat simplistically) as a serialization tool
524for B<Tree::Simple> objects. Properly written parse and deparse filters can
525be used to do "round-trip" tree handling.
526
527=head1 METHODS
528
529=head2 Constructor
530
531=over 5
532
533=item B<new ($tree | $input)>
534
535The constructor is used primarily for creating an object instance. Initializing
536the object is done by the C<_init> method (see below).
537
538=back
539
540=head2 Input Processing
541
542=over 4
543
544=item B<setInput ($input)>
545
546This method will take varios types of input, and pre-process them through the
547C<prepareInput> method below.
548
549=item B<prepareInput ($input)>
550
551The C<prepareInput> method is used to pre-process certain types of C<$input>.
552It accepts any of the follow types of arguments:
553
554=over 4
555
556=item * I<an B<Array::Iterator> object>
557
558This just gets passed on through.
559
560=item * I<an array reference containing the lines to be parsed>
561
562This type of argument is used to construct an B<Array::Iterator> instance.
563
564=item * I<a filename>
565
566The file is opened, its contents slurped into an array, which is then used to
567construct an B<Array::Iterator> instance.
568
569B<NOTE>: we used to only handle files with the C<.tree> extension, however that
570was annoying, so now we accept any file name.
571
572=item * I<a string>
573
574The string is expected to have at least one embedded newline or be in the nested
575parens format.
576
577=back
578
579It then returns an B<Array::Iterator> object ready for the parser.
580
581=item B<setFileEncoding($encoding)>
582
583This allows you to specify the C<$encoding> that the file should be read using.
584This is only only applicable when your input is a file.
585
586=back
587
588=head2 Filter Methods
589
590=over 5
591
592=item B<useTabIndentedFilters>
593
594This will set the parse and deparse filters to handle tab indented content. This
595is for true tabs C<\t> only. The parse and deparse filters this uses are compatible
596with one another so round-triping is possible.
597
598Example:
599
600  1.0
601      1.1
602      1.2
603          1.2.1
604  2.0
605      2.1
606  3.0
607      3.1
608          3.1.1
609
610=item B<useSpaceIndentedFilters ($num_spaces)>
611
612This will set the parse and deparse filters to handle space indented content. The
613optional C<$num_spaces> argument allows you to specify how many spaces are to be
614treated as a single indent, if this argument is not specified it will default to a
6154 space indent. The parse and deparse filters this uses are compatible with one
616another so round-triping is possible.
617
618Example:
619
620  1.0
621    1.1
622    1.2
623      1.2.1
624  2.0
625    2.1
626  3.0
627    3.1
628      3.1.1
629
630=item B<useDotSeparatedLevelFilters (@level_identifiers)>
631
632This will set the parse and deparse filters to handle trees which are described in
633the following format:
634
635  1 First Child
636  1.1 First Grandchild
637  1.2 Second Grandchild
638  1.2.1 First Child of the Second Grandchild
639  1.3 Third Grandchild
640  2 Second Child
641
642There must be at least one space seperating the level identifier from the level
643name, all other spaces will be considered part of the name itself.
644
645The parse and deparse filters this uses are compatible with one another so
646round-triping is possible.
647
648The labels used are those specified in the C<@level_identifiers> argument. The
649above code uses the default level identifiers (C<1 .. 100>). But by passing the
650following as a set of level identifiers: C<'a' .. 'z'>, you can successfully
651parse a format like this:
652
653  a First Child
654  a.a First Grandchild
655  a.b Second Grandchild
656  a.b.a First Child of the Second Grandchild
657  a.c Third Grandchild
658  b Second Child
659
660Currently, you are restricted to only one set of level identifiers. Future plans
661include allowing each depth to have its own set of identifiers, therefore allowing
662formats like this: C<1.a> or other such variations (see L<TO DO> section for more
663info).
664
665=item B<useDotSeperatedLevelFilters>
666
667This old mispelled method name is kept for backwards compat.
668
669=item B<useNestedParensFilters>
670
671This will set the parse and deparse filters to handle trees which are described
672in the following format:
673
674  (1 (1.1 1.2 (1.2.1) 1.3) 2 (2.1))
675
676The parser will count the parentheses to determine the depth of the current node.
677This filter can also handle double quoted strings as values as well. So this would
678be valid input:
679
680  (root ("tree 1" ("tree 1 1" "tree 1 2") "tree 2"))
681
682This format is currently somewhat limited in that the input must all be on one
683line and not contain a trailing newline. It also does not handle embedded escaped
684double quotes. Further refinement and improvement of this filter format is to come
685(and patches are always welcome).
686
687It should be noted that this filter also cannot perform a roundtrip operation
688where the deparsed output is the exact same as the parsed input because it does
689not treat whitespace as signifigant (unless it is within a double quoted string).
690
691=item B<setParseFilter ($filter)>
692
693A parse filter is a subroutine reference which is used to process each element
694in the input. As the main parse loop runs, it calls this filter routine and
695passes it the B<Array::Iterator> instance which represents the input. To get
696the next element/line/token in the iterator, the filter must call C<next>, the
697element should then be processed by the filter. A filter can if it wants advance
698the iterator further by calling C<next> more than once if nessecary, there are
699no restrictions as to what it can do. However, the filter B<must> return these
700two values in order to correctly construct the tree:
701
702=over 4
703
704=item I<the depth of the node within the tree>
705
706=item Followed by either of the following items:
707
708=over 4
709
710=item I<the value of the node>
711
712This value will be used as the node value when constructing the new tree. This
713can basically be any scalar value.
714
715=item I<an instance of either a Tree::Simple object, or some derivative of Tree::Simple>
716
717If you need to perform special operations on the tree instance before it get's
718added to the larger hierarchy, then you can construct it within the parse filter
719and return it. An example of why you might want to do this would be if you
720wanted to set the UID of the tree instance from something in the parse filter.
721
722=back
723
724=back
725
726The following is an example of a very basic filter which simply counts the
727number of tab characters to determine the node depth and then captures any
728remaining character on the line.
729
730  $tree_parser->setParseFilter(sub {
731      my ($iterator) = @_;
732      my $line = $iterator->next();
733      # match the tables and all that follows it
734      my ($tabs, $node) = ($line =~ /(\t*)(.*)/);
735      # calculate the depth by seeing how long
736      # the tab string is.
737      my $depth = length $tabs;
738      # return the depth and the node value
739      return ($depth, $node);
740  });
741
742=item B<setDeparseFilter ($filter)>
743
744The deparse filter is the opposite of the parse filter, it takes each element
745of the tree and returns a string representation of it. The filter routine gets
746passed a B<Tree::Simple> instance and is expected to return a single string.
747However, this is not enforced we actually will gobble up all the filter returns,
748but keep in mind that each element returned is considered to be a single line
749in the output, so multiple elements will be treated as mutiple lines.
750
751Here is an example of a deparse filter. This can be viewed as the inverse of
752the parse filter example above.
753
754  $tp->setDeparseFilter(sub {
755      my ($tree) = @_;
756      return ("\t" x $tree->getDepth()) . $tree->getNodeValue();
757  });
758
759=back
760
761=head2 Accessors
762
763=over 4
764
765=item B<getTree>
766
767This method returns the tree held by the parser or set through the constructor.
768
769=back
770
771=head2 Parse/Deparse
772
773=over 4
774
775=item B<parse>
776
777Parsing is pretty automatic once everthing is set up. This routine will check
778to be sure you have all you need to proceed, and throw an execption if not.
779Once the parsing is complete, the tree will be stored interally as well as
780returned from this method.
781
782=item B<deparse>
783
784This method too is pretty automatic, it verifies that it has all its needs,
785throwing an exception if it does not. It will return an array of lines in list
786context, or in scalar context it will join the array into a single string
787seperated by newlines.
788
789=back
790
791=head2 Private Methods
792
793=over 4
794
795=item B<_init ($tree | $input)>
796
797This will initialize the slots of the object. If given a C<$tree> object, it
798will store it. This is currently the prefered way in which to use subclasses
799of B<Tree::Simple> to build your tree with, as this object will be used to
800build any other trees (see L<TO DO> for more information). If given some other
801kind of input, it will process this through the C<prepareInput> method.
802
803=item B<_parse>
804
805This is where all the parsing work is done. If you are truely interested in the
806inner workings of this method, I suggest you refer to the source. It is a very
807simple algorithm and should be easy to understand.
808
809=item B<_deparse>
810
811This is where all the deparsing work is done. As with the C<_parse> method, if
812you are interested in the inner workings, I suggest you refer to the source.
813
814=back
815
816=head1 TO DO
817
818=over 4
819
820=item Enhance the Nested Parens filter
821
822This filter is somewhat limited in its handling of embedded newlines as well as
823embedded double quotes (even if they are escaped). I would like to improve this
824filter more when time allows.
825
826=item Enhance the Dot Seperated Level filter
827
828I would like to enhance this built in filter to handle multi-level level-identifiers,
829basically allowing formats like this:
830
831  1 First Child
832  1.a First Grandchild
833  1.b Second Grandchild
834  1.b.I First Child of the Second Grandchild
835  1.b.II Second Child of the Second Grandchild
836  1.c Third Grandchild
837  2 Second Child
838
839=item Make Tree::Simple subclasses more easy to handle
840
841Currently in order to have Tree::Parser use a subclass of Tree::Simple to build
842the heirarchy with, you must pass a tree into the constructor, and then set the
843input manually. This could be handled better I think, but right now I am not 100%
844how best to go about it.
845
846=back
847
848=head1 BUGS
849
850None that I am aware of. Of course, if you find a bug, let me know, and I will be
851sure to fix it. This module, in an earlier form, has been and is being used in
852production for approx. 1 year now without incident. This version has been improved
853and the test suite added.
854
855=head1 CODE COVERAGE
856
857I use B<Devel::Cover> to test the code coverage of my tests, below is the B<Devel::Cover>
858report on this module's test suite.
859
860 ---------------------------- ------ ------ ------ ------ ------ ------ ------
861 File                           stmt branch   cond    sub    pod   time  total
862 ---------------------------- ------ ------ ------ ------ ------ ------ ------
863 Tree/Parser.pm                100.0   87.9   81.2  100.0  100.0  100.0   94.6
864 ---------------------------- ------ ------ ------ ------ ------ ------ ------
865 Total                         100.0   87.9   81.2  100.0  100.0  100.0   94.6
866 ---------------------------- ------ ------ ------ ------ ------ ------ ------
867
868=head1 SEE ALSO
869
870This module is not an attempt at a general purpose parser by any stretch of the
871imagination. It is basically a very flexible special purpose parser, it only
872builds Tree::Simple heirarchies, but your parse filters can be as complex as nessecary.
873If this is not what you are looking for, then you might want to consider one of
874the following modules:
875
876=over 4
877
878=item B<Parse::RecDescent>
879
880This is a general purpose Recursive Descent parser generator written by Damian
881Conway. If your parsing needs lean towards the more complex, this is good module
882for you. Recursive Descent parsing is known to be slower than other parsing styles,
883but it tends to be easier to write grammers for, so there is a trade off. If speed
884is a concern, then you may just want to skip perl and go straight to C and use
885C<yacc>.
886
887=item B<Parse::Yapp>
888
889As an alternative to Recursive Descent parsing, you can do LALR parsing. It is
890faster and does not have some of the well known (and avoidable) problems of
891Recursive Descent parsing. I have never actually used this module, but I have
892heard good things about it.
893
894=item B<Parse::FixedLength>
895
896If all you really need to do is process a file with fixed length fields in it,
897you can use this module.
898
899=item B<Parse::Tokens>
900
901This class will help you parse text with embedded tokens in it. I am not very
902familiar with this module, but it looks interesting.
903
904=back
905
906There are also a number of specific parsers out here, such as B<HTML::Parser>
907and B<XML::Parser>, which do one thing and do it well. If you are looking to
908parse HTML or XML, don't use my module, use these ones, it just makes sense.
909Use the right tool for the job basically.
910
911=head1 DEPENDENCIES
912
913This module uses two other modules I have written:
914
915=over 5
916
917=item B<Tree::Simple>
918
919=item B<Array::Iterator>
920
921=back
922
923=head1 ACKNOWLEDGEMENTS
924
925=over 4
926
927=item Thanks to Chad Ullman for reporting RT Bug #12244 and providing code and test case for it.
928
929=item Thanks to Gerd for reporting RT Bug #13041 and providing code to fix it.
930
931=back
932
933=head1 AUTHOR
934
935stevan little, E<lt>stevan@iinteractive.comE<gt>
936
937=head1 COPYRIGHT AND LICENSE
938
939Copyright 2004-2007 by Infinity Interactive, Inc.
940
941L<http://www.iinteractive.com>
942
943This library is free software; you can redistribute it and/or modify
944it under the same terms as Perl itself.
945
946=cut
947