1 2package Tree::Parser; 3 4use strict; 5use warnings; 6 7our $VERSION = '0.15'; 8 9use Scalar::Util qw(blessed); 10 11use Tree::Simple; 12use Array::Iterator; 13 14### constructor 15 16sub new { 17 my ($_class, $input) = @_; 18 my $class = ref($_class) || $_class; 19 my $tree_parser = {}; 20 bless($tree_parser, $class); 21 $tree_parser->_init($input); 22 return $tree_parser; 23} 24 25sub _init { 26 my ($self, $input) = @_; 27 # make slots for our 2 filters 28 $self->{parse_filter} = undef; 29 $self->{deparse_filter} = undef; 30 $self->{deparse_filter_cleanup} = undef; 31 # check the input and decide what to 32 # do with it 33 if ($input) { 34 # we accept a Tree::Simple object 35 # and expect then it to be deparsed 36 if (blessed($input) && $input->isa("Tree::Simple")) { 37 $self->{iterator} = undef; 38 $self->{tree} = $input; 39 } 40 # or we can take a number of types of input 41 # see prepareInput below 42 else { 43 $self->{iterator} = $self->prepareInput($input); 44 $self->{tree} = Tree::Simple->new(Tree::Simple->ROOT); 45 } 46 } 47 # if no input is given we create 48 # an empty tree a no iterator 49 else { 50 $self->{iterator} = undef; 51 $self->{tree} = Tree::Simple->new(Tree::Simple->ROOT); 52 } 53} 54 55### methods 56 57sub setFileEncoding { 58 my ($self, $file_encoding) = @_; 59 (defined($file_encoding)) || die "Insufficient Arguments : file_encoding must be defined"; 60 $self->{file_encoding} = $file_encoding; 61} 62 63sub setInput { 64 my ($self, $input) = @_; 65 (defined($input)) || die "Insufficient Arguments : input undefined"; 66 $self->{iterator} = $self->prepareInput($input); 67} 68 69# prepareInput accepts any of the follow 70# types of arguments: 71# - a .tree file 72# - an array reference of lines 73# - a single string of code (can have embedded newlines) 74# and then returns an iterator. 75# references will be stringified, unless they are array references or 76# Array::Iterator objects. 77sub prepareInput { 78 my ($self, $input) = @_; 79 80 # already an A:I instance 81 return $input 82 if blessed($input) and $input->isa('Array::Iterator'); 83 84 # a simple array 85 return Array::Iterator->new($input) 86 if ref($input) eq 'ARRAY'; 87 88 # stringifies to something that ends in .tree 89 if ($input =~ /\.tree$/) { 90 IS_A_FILE: 91 my $encoding = (defined $self->{file_encoding} 92 ? (":" . $self->{file_encoding}) 93 : ''); 94 open(TREE_FILE, ("<" . $encoding), $input) || die "cannot open file: $!"; 95 my @lines = <TREE_FILE>; 96 close(TREE_FILE); 97 return Array::Iterator->new(@lines); 98 } 99 # everything else 100 else { 101 my @lines; 102 if ($input =~ /\n/) { 103 @lines = split /\n/ => $input; 104 (scalar(@lines) > 1) 105 || die "Incorrect Object Type : input looked like a single string, but only a single line ($input) unable to parse input into line (" . (join "==" => @lines) . ")"; 106 } 107 elsif ($input =~ /^\(/) { 108 @lines = grep { $_ ne "" } split /(\(|\)|\s|\")/ => $input; #" 109 } 110 else { 111 # lets check if it is a file though 112 goto IS_A_FILE if -f $input; 113 # otherwise, croak on this sucker ... 114 die "Incorrect Object Type : input looked like a single string, but has no newlines or does not start with paren"; 115 } 116 return Array::Iterator->new(@lines); 117 } 118} 119 120## ---------------------------------------------------------------------------- 121## Filters 122## ---------------------------------------------------------------------------- 123 124## tab indented filters 125## ---------------------------------------------- 126{ 127 my $TAB_INDENTED_PARSE = sub ($) { 128 my ($line_iterator) = @_; 129 my $line = $line_iterator->next(); 130 my ($tabs, $node) = $line =~ /(\t*)(.*)/; 131 my $depth = length $tabs; 132 return ($depth, $node); 133 }; 134 135 my $TAB_INDENTED_DEPARSE = sub ($) { 136 my ($tree) = @_; 137 return ("\t" x $tree->getDepth()) . $tree->getNodeValue(); 138 }; 139 140 sub useTabIndentedFilters { 141 my ($self) = @_; 142 $self->{parse_filter} = $TAB_INDENTED_PARSE; 143 $self->{deparse_filter} = $TAB_INDENTED_DEPARSE; 144 $self->{deparse_filter_cleanup} = undef; 145 } 146} 147 148## space indented filters 149## ---------------------------------------------- 150{ 151 my $make_SPACE_INDENTED_PARSE = sub { 152 my ($num_spaces) = @_; 153 return sub ($) { 154 my ($line_iterator) = @_; 155 my $line = $line_iterator->next(); 156 my ($spaces, $node) = $line =~ /(\s*)(.*)/; 157 my $depth = (length($spaces) / $num_spaces) ; 158 return ($depth, $node); 159 }; 160 }; 161 162 my $make_SPACE_INDENTED_DEPARSE = sub { 163 my ($num_spaces) = @_; 164 my $spaces = (" " x $num_spaces); 165 return sub ($) { 166 my ($tree) = @_; 167 return ($spaces x $tree->getDepth()) . $tree->getNodeValue(); 168 }; 169 }; 170 171 sub useSpaceIndentedFilters { 172 my ($self, $num_spaces) = @_; 173 $num_spaces ||= 4; 174 $self->{parse_filter} = $make_SPACE_INDENTED_PARSE->($num_spaces); 175 $self->{deparse_filter} = $make_SPACE_INDENTED_DEPARSE->($num_spaces); 176 $self->{deparse_filter_cleanup} = undef; 177 } 178} 179 180## space indented filters 181## ---------------------------------------------- 182{ 183 184 my @default_level_identifiers = (1 .. 100); 185 186 my $make_DOT_SEPERATED_LEVEL_PARSE = sub { 187 my (@level_identifiers) = @_; 188 @level_identifiers = @default_level_identifiers unless @level_identifiers; 189 return sub { 190 my ($line_iterator) = @_; 191 my $line = $line_iterator->next(); 192 my $level_identifiers_reg_ex = join "|" => @level_identifiers; 193 my ($numbers, $value) = $line =~ /([($level_identifiers_reg_ex)\.]*)\s(.*)/; 194 # now split the numbers 195 my @numbers = split /\./ => $numbers; 196 # we know the depth of the tree by home many 197 # numbers are present, and we assume we were 198 # given them in sequential order anyway 199 my $depth = $#numbers; 200 return ($depth, $value); 201 }; 202 }; 203 204 my $make_DOT_SEPERATED_LEVEL_DEPARSE = sub { 205 my (@level_identifiers) = @_; 206 @level_identifiers = @default_level_identifiers unless @level_identifiers; 207 return sub { 208 my ($tree) = @_; 209 my @numbers = $level_identifiers[$tree->getIndex()]; 210 my $current_tree = $tree->getParent(); 211 until ($current_tree->isRoot()) { 212 unshift @numbers => $level_identifiers[$current_tree->getIndex()]; 213 $current_tree = $current_tree->getParent(); 214 } 215 return ((join "." => @numbers) . " " . $tree->getNodeValue()); 216 }; 217 }; 218 219 sub useDotSeparatedLevelFilters { 220 my ($self, @level_identifiers) = @_; 221 $self->{parse_filter} = $make_DOT_SEPERATED_LEVEL_PARSE->(@level_identifiers); 222 $self->{deparse_filter} = $make_DOT_SEPERATED_LEVEL_DEPARSE->(@level_identifiers); 223 $self->{deparse_filter_cleanup} = undef; 224 } 225 226 *useDotSeperatedLevelFilters = \&useDotSeparatedLevelFilters; 227 228} 229 230## nested parens filters 231## ---------------------------------------------- 232{ 233 234 my $make_NESTED_PARENS_PARSE = sub { 235 my @paren_stack; 236 return sub { 237 my ($line_iterator) = @_; 238 my $line = $line_iterator->next(); 239 my $node = ""; 240 while (!$node && $node ne 0) { 241 if ($line eq "(") { 242 push @paren_stack => $line; 243 last unless $line_iterator->hasNext(); 244 $line = $line_iterator->next(); 245 } 246 elsif ($line eq ")") { 247 pop @paren_stack; 248 last unless $line_iterator->hasNext(); 249 $line = $line_iterator->next(); 250 } 251 elsif ($line eq '"') { 252 $line = ""; # clear the quote 253 while ($line_iterator->hasNext()) { 254 my $next = $line_iterator->next(); 255 last if $next eq '"'; 256 $line .= $next; 257 } 258 } 259 elsif ($line eq ' ') { 260 # discard misc whitespace 261 $line = $line_iterator->next(); 262 next; 263 } 264 else { 265 $node = $line; 266 } 267 } 268 my $depth = $#paren_stack; 269 $depth = 0 if $depth < 0; 270 return ($depth, $node); 271 }; 272 }; 273 274 # this is used in clean up as well 275 my $prev_depth; 276 my $NESTED_PARENS_DEPARSE = sub { 277 my ($tree) = @_; 278 my $output = ""; 279 unless (defined($prev_depth)) { 280 $output .= "("; 281 $prev_depth = $tree->getDepth(); 282 } 283 else { 284 my $current_depth = $tree->getDepth(); 285 if ($prev_depth == $current_depth) { 286 $output .= " "; 287 } 288 elsif ($prev_depth < $current_depth) { 289 $output .= " ("; 290 } 291 elsif ($prev_depth > $current_depth) { 292 my $delta = $prev_depth - $current_depth; 293 $output .= ")" x $delta . " "; 294 } 295 $prev_depth = $current_depth; 296 } 297 my $current_node = $tree->getNodeValue(); 298 $current_node = '"' . $current_node . '"' if $current_node =~ /\s/; 299 $output .= $current_node; 300 return $output; 301 }; 302 303 my $NESTED_PARENS_CLEANUP = sub { 304 my $closing_parens = $prev_depth; 305 # unset this so it can be used again 306 undef $prev_depth; 307 return @_, (")" x ($closing_parens + 1)) 308 }; 309 310 sub useNestedParensFilters { 311 my ($self) = @_; 312 $self->{parse_filter} = $make_NESTED_PARENS_PARSE->(); 313 $self->{deparse_filter} = $NESTED_PARENS_DEPARSE; 314 $self->{deparse_filter_cleanup} = $NESTED_PARENS_CLEANUP; 315 } 316} 317 318## manual filters 319## ---------------------------------------------- 320# a filter is a subroutine reference 321# which gets executed upon each line 322# and it must return two values: 323# - the depth of the node 324# - the value of the node (which can 325# be anything; string, array ref, 326# object instanace, you name it) 327# NOTE: 328# if a filter is not specified, then 329# the parsers iterator is expected to 330# return the dual values. 331 332sub setParseFilter { 333 my ($self, $filter) = @_; 334 (defined($filter) && ref($filter) eq "CODE") 335 || die "Insufficient Arguments : parse filter must be a code reference"; 336 $self->{parse_filter} = $filter; 337} 338 339sub setDeparseFilter { 340 my ($self, $filter) = @_; 341 (defined($filter) && ref($filter) eq "CODE") 342 || die "Insufficient Arguments : parse filter must be a code reference"; 343 $self->{deparse_filter} = $filter; 344} 345 346## ---------------------------------------------------------------------------- 347 348sub getTree { 349 my ($self) = @_; 350 return $self->{tree}; 351} 352 353# deparse creates either: 354# - an array of lines 355# - or one large string 356# which contains the values 357# created by the sub ref 358# (unfilter) passed as an argument 359sub deparse { 360 my ($self) = @_; 361 (defined($self->{deparse_filter})) 362 || die "Parse Error : no deparse filter is specified"; 363 (!$self->{tree}->isLeaf()) 364 || die "Parse Error : Tree is a leaf node, cannot de-parse a tree that has not be created yet"; 365 return $self->_deparse(); 366} 367 368# parser front end 369sub parse { 370 my ($self) = @_; 371 (defined($self->{parse_filter})) 372 || die "Parse Error : No parse filter is specified to parse with"; 373 (defined($self->{iterator})) 374 || die "Parse Error : no input has yet been defined, there is nothing to parse"; 375 return $self->_parse(); 376} 377 378## private methods 379 380sub _deparse { 381 my ($self) = @_; 382 my @lines; 383 $self->{tree}->traverse(sub { 384 my ($tree) = @_; 385 push @lines => $self->{deparse_filter}->($tree); 386 }); 387 @lines = $self->{deparse_filter_cleanup}->(@lines) if defined $self->{deparse_filter_cleanup}; 388 return wantarray ? 389 @lines 390 : 391 join("\n" => @lines); 392} 393 394# private method which parses given 395# an iterator and a tree 396sub _parse { 397 my ($self) = @_; 398 my $tree_type = ref($self->{tree}); 399 my ($i, $current_tree) = ($self->{iterator}, $self->{tree}); 400 while ($i->hasNext()) { 401 my ($depth, $node) = $self->{parse_filter}->($i); 402 # if we get nothing back and the iterator 403 # is exhausted, then we now it is time to 404 # stop parsing the input. 405 last if !$depth && !$node && !$i->hasNext(); 406 # depth must be defined ... 407 (defined($depth) 408 && 409 # and a digit (int or float) 410 ($depth =~ /^\d+(\.\d*)?$/o) 411 # otherwise we throw and exception 412 ) || die "Parse Error : Incorrect Value for depth (" . ((defined $depth) ? $depth : "undef") . ")"; 413 # and node is fine as long as it is defined 414 (defined($node)) || die "Parse Error : node is not defined"; 415 416 my $new_tree; 417 # if we get back a tree of the same type, 418 # or even of a different type, but still 419 # a Tree::Simple, then we use that .... 420 if (blessed($node) && ($node->isa($tree_type) || $node->isa('Tree::Simple'))) { 421 $new_tree = $node; 422 } 423 # othewise, we assume it is intended to be 424 # the node of the tree 425 else { 426 $new_tree = $tree_type->new($node); 427 } 428 429 if ($current_tree->isRoot()) { 430 $current_tree->addChild($new_tree); 431 $current_tree = $new_tree; 432 next; 433 } 434 my $tree_depth = $current_tree->getDepth(); 435 if ($depth == $tree_depth) { 436 $current_tree->addSibling($new_tree); 437 $current_tree = $new_tree; 438 } 439 elsif ($depth > $tree_depth) { 440 (($depth - $tree_depth) <= 1) 441 || die "Parse Error : the difference between the depth ($depth) and the tree depth ($tree_depth) is too much (" . ($depth - $tree_depth) . ") at '$node'"; 442 $current_tree->addChild($new_tree); 443 $current_tree = $new_tree; 444 } 445 elsif ($depth < $tree_depth) { 446 $current_tree = $current_tree->getParent() while ($depth < $current_tree->getDepth()); 447 $current_tree->addSibling($new_tree); 448 $current_tree = $new_tree; 449 } 450 451 } 452 return $self->{tree}; 453} 454 4551; 456 457__END__ 458 459=pod 460 461=head1 NAME 462 463Tree::Parser - Module to parse formatted files into tree structures 464 465=head1 SYNOPSIS 466 467 use Tree::Parser; 468 469 # create a new parser object with some input 470 my $tp = Tree::Parser->new($input); 471 472 # use the built in tab indent filters 473 $tp->useTabIndentedFilters(); 474 475 # use the built in space indent filters 476 $tp->useSpaceIndentedFilters(4); 477 478 # use the built in dot-seperated numbers filters 479 $tp->useDotSeperatedLevelFilters(); 480 481 # use the nested parens filter 482 $tp->useNestedParensFilters(); 483 484 # create your own filter 485 $tp->setParseFilter(sub { 486 my ($line_iterator) = @_; 487 my $line = $line_iterator->next(); 488 my ($id, $tabs, $desc) = $line =~ /(\d+)(\t*)(.*)/; 489 my $depth = length $tabs; 490 return ($depth, { id => $id, desc => $desc } ); 491 }); 492 493 # parse our input and get back a tree 494 my $tree = $tp->parse(); 495 496 # create your own deparse filter 497 # (which is in the inverse of our 498 # custom filter above) 499 $tp->setDeparseFilter(sub { 500 my ($tree) = @_; 501 my $info = $tree->getNodeValue(); 502 return ($info->{id} . ("\t" x $tree->getDepth()) . $info->{desc}); 503 }); 504 505 # deparse our tree and get back a string 506 my $tree_string = $tp->deparse(); 507 508=head1 DESCRIPTION 509 510This module can parse various types of input (formatted and containing 511hierarchal information) into a tree structures. It can also deparse the 512same tree structures back into a string. It accepts various types of 513input, such as; strings, filenames, array references. The tree structure 514is a hierarchy of B<Tree::Simple> objects. 515 516The parsing is controlled through a parse filter, which is used to process 517each "line" in the input (see C<setParseFilter> below for more information 518about parse filters). 519 520The deparseing as well is controlled by a deparse filter, which is used to 521covert each tree node into a string representation. 522 523This module can be viewed (somewhat simplistically) as a serialization tool 524for B<Tree::Simple> objects. Properly written parse and deparse filters can 525be used to do "round-trip" tree handling. 526 527=head1 METHODS 528 529=head2 Constructor 530 531=over 5 532 533=item B<new ($tree | $input)> 534 535The constructor is used primarily for creating an object instance. Initializing 536the object is done by the C<_init> method (see below). 537 538=back 539 540=head2 Input Processing 541 542=over 4 543 544=item B<setInput ($input)> 545 546This method will take varios types of input, and pre-process them through the 547C<prepareInput> method below. 548 549=item B<prepareInput ($input)> 550 551The C<prepareInput> method is used to pre-process certain types of C<$input>. 552It accepts any of the follow types of arguments: 553 554=over 4 555 556=item * I<an B<Array::Iterator> object> 557 558This just gets passed on through. 559 560=item * I<an array reference containing the lines to be parsed> 561 562This type of argument is used to construct an B<Array::Iterator> instance. 563 564=item * I<a filename> 565 566The file is opened, its contents slurped into an array, which is then used to 567construct an B<Array::Iterator> instance. 568 569B<NOTE>: we used to only handle files with the C<.tree> extension, however that 570was annoying, so now we accept any file name. 571 572=item * I<a string> 573 574The string is expected to have at least one embedded newline or be in the nested 575parens format. 576 577=back 578 579It then returns an B<Array::Iterator> object ready for the parser. 580 581=item B<setFileEncoding($encoding)> 582 583This allows you to specify the C<$encoding> that the file should be read using. 584This is only only applicable when your input is a file. 585 586=back 587 588=head2 Filter Methods 589 590=over 5 591 592=item B<useTabIndentedFilters> 593 594This will set the parse and deparse filters to handle tab indented content. This 595is for true tabs C<\t> only. The parse and deparse filters this uses are compatible 596with one another so round-triping is possible. 597 598Example: 599 600 1.0 601 1.1 602 1.2 603 1.2.1 604 2.0 605 2.1 606 3.0 607 3.1 608 3.1.1 609 610=item B<useSpaceIndentedFilters ($num_spaces)> 611 612This will set the parse and deparse filters to handle space indented content. The 613optional C<$num_spaces> argument allows you to specify how many spaces are to be 614treated as a single indent, if this argument is not specified it will default to a 6154 space indent. The parse and deparse filters this uses are compatible with one 616another so round-triping is possible. 617 618Example: 619 620 1.0 621 1.1 622 1.2 623 1.2.1 624 2.0 625 2.1 626 3.0 627 3.1 628 3.1.1 629 630=item B<useDotSeparatedLevelFilters (@level_identifiers)> 631 632This will set the parse and deparse filters to handle trees which are described in 633the following format: 634 635 1 First Child 636 1.1 First Grandchild 637 1.2 Second Grandchild 638 1.2.1 First Child of the Second Grandchild 639 1.3 Third Grandchild 640 2 Second Child 641 642There must be at least one space seperating the level identifier from the level 643name, all other spaces will be considered part of the name itself. 644 645The parse and deparse filters this uses are compatible with one another so 646round-triping is possible. 647 648The labels used are those specified in the C<@level_identifiers> argument. The 649above code uses the default level identifiers (C<1 .. 100>). But by passing the 650following as a set of level identifiers: C<'a' .. 'z'>, you can successfully 651parse a format like this: 652 653 a First Child 654 a.a First Grandchild 655 a.b Second Grandchild 656 a.b.a First Child of the Second Grandchild 657 a.c Third Grandchild 658 b Second Child 659 660Currently, you are restricted to only one set of level identifiers. Future plans 661include allowing each depth to have its own set of identifiers, therefore allowing 662formats like this: C<1.a> or other such variations (see L<TO DO> section for more 663info). 664 665=item B<useDotSeperatedLevelFilters> 666 667This old mispelled method name is kept for backwards compat. 668 669=item B<useNestedParensFilters> 670 671This will set the parse and deparse filters to handle trees which are described 672in the following format: 673 674 (1 (1.1 1.2 (1.2.1) 1.3) 2 (2.1)) 675 676The parser will count the parentheses to determine the depth of the current node. 677This filter can also handle double quoted strings as values as well. So this would 678be valid input: 679 680 (root ("tree 1" ("tree 1 1" "tree 1 2") "tree 2")) 681 682This format is currently somewhat limited in that the input must all be on one 683line and not contain a trailing newline. It also does not handle embedded escaped 684double quotes. Further refinement and improvement of this filter format is to come 685(and patches are always welcome). 686 687It should be noted that this filter also cannot perform a roundtrip operation 688where the deparsed output is the exact same as the parsed input because it does 689not treat whitespace as signifigant (unless it is within a double quoted string). 690 691=item B<setParseFilter ($filter)> 692 693A parse filter is a subroutine reference which is used to process each element 694in the input. As the main parse loop runs, it calls this filter routine and 695passes it the B<Array::Iterator> instance which represents the input. To get 696the next element/line/token in the iterator, the filter must call C<next>, the 697element should then be processed by the filter. A filter can if it wants advance 698the iterator further by calling C<next> more than once if nessecary, there are 699no restrictions as to what it can do. However, the filter B<must> return these 700two values in order to correctly construct the tree: 701 702=over 4 703 704=item I<the depth of the node within the tree> 705 706=item Followed by either of the following items: 707 708=over 4 709 710=item I<the value of the node> 711 712This value will be used as the node value when constructing the new tree. This 713can basically be any scalar value. 714 715=item I<an instance of either a Tree::Simple object, or some derivative of Tree::Simple> 716 717If you need to perform special operations on the tree instance before it get's 718added to the larger hierarchy, then you can construct it within the parse filter 719and return it. An example of why you might want to do this would be if you 720wanted to set the UID of the tree instance from something in the parse filter. 721 722=back 723 724=back 725 726The following is an example of a very basic filter which simply counts the 727number of tab characters to determine the node depth and then captures any 728remaining character on the line. 729 730 $tree_parser->setParseFilter(sub { 731 my ($iterator) = @_; 732 my $line = $iterator->next(); 733 # match the tables and all that follows it 734 my ($tabs, $node) = ($line =~ /(\t*)(.*)/); 735 # calculate the depth by seeing how long 736 # the tab string is. 737 my $depth = length $tabs; 738 # return the depth and the node value 739 return ($depth, $node); 740 }); 741 742=item B<setDeparseFilter ($filter)> 743 744The deparse filter is the opposite of the parse filter, it takes each element 745of the tree and returns a string representation of it. The filter routine gets 746passed a B<Tree::Simple> instance and is expected to return a single string. 747However, this is not enforced we actually will gobble up all the filter returns, 748but keep in mind that each element returned is considered to be a single line 749in the output, so multiple elements will be treated as mutiple lines. 750 751Here is an example of a deparse filter. This can be viewed as the inverse of 752the parse filter example above. 753 754 $tp->setDeparseFilter(sub { 755 my ($tree) = @_; 756 return ("\t" x $tree->getDepth()) . $tree->getNodeValue(); 757 }); 758 759=back 760 761=head2 Accessors 762 763=over 4 764 765=item B<getTree> 766 767This method returns the tree held by the parser or set through the constructor. 768 769=back 770 771=head2 Parse/Deparse 772 773=over 4 774 775=item B<parse> 776 777Parsing is pretty automatic once everthing is set up. This routine will check 778to be sure you have all you need to proceed, and throw an execption if not. 779Once the parsing is complete, the tree will be stored interally as well as 780returned from this method. 781 782=item B<deparse> 783 784This method too is pretty automatic, it verifies that it has all its needs, 785throwing an exception if it does not. It will return an array of lines in list 786context, or in scalar context it will join the array into a single string 787seperated by newlines. 788 789=back 790 791=head2 Private Methods 792 793=over 4 794 795=item B<_init ($tree | $input)> 796 797This will initialize the slots of the object. If given a C<$tree> object, it 798will store it. This is currently the prefered way in which to use subclasses 799of B<Tree::Simple> to build your tree with, as this object will be used to 800build any other trees (see L<TO DO> for more information). If given some other 801kind of input, it will process this through the C<prepareInput> method. 802 803=item B<_parse> 804 805This is where all the parsing work is done. If you are truely interested in the 806inner workings of this method, I suggest you refer to the source. It is a very 807simple algorithm and should be easy to understand. 808 809=item B<_deparse> 810 811This is where all the deparsing work is done. As with the C<_parse> method, if 812you are interested in the inner workings, I suggest you refer to the source. 813 814=back 815 816=head1 TO DO 817 818=over 4 819 820=item Enhance the Nested Parens filter 821 822This filter is somewhat limited in its handling of embedded newlines as well as 823embedded double quotes (even if they are escaped). I would like to improve this 824filter more when time allows. 825 826=item Enhance the Dot Seperated Level filter 827 828I would like to enhance this built in filter to handle multi-level level-identifiers, 829basically allowing formats like this: 830 831 1 First Child 832 1.a First Grandchild 833 1.b Second Grandchild 834 1.b.I First Child of the Second Grandchild 835 1.b.II Second Child of the Second Grandchild 836 1.c Third Grandchild 837 2 Second Child 838 839=item Make Tree::Simple subclasses more easy to handle 840 841Currently in order to have Tree::Parser use a subclass of Tree::Simple to build 842the heirarchy with, you must pass a tree into the constructor, and then set the 843input manually. This could be handled better I think, but right now I am not 100% 844how best to go about it. 845 846=back 847 848=head1 BUGS 849 850None that I am aware of. Of course, if you find a bug, let me know, and I will be 851sure to fix it. This module, in an earlier form, has been and is being used in 852production for approx. 1 year now without incident. This version has been improved 853and the test suite added. 854 855=head1 CODE COVERAGE 856 857I use B<Devel::Cover> to test the code coverage of my tests, below is the B<Devel::Cover> 858report on this module's test suite. 859 860 ---------------------------- ------ ------ ------ ------ ------ ------ ------ 861 File stmt branch cond sub pod time total 862 ---------------------------- ------ ------ ------ ------ ------ ------ ------ 863 Tree/Parser.pm 100.0 87.9 81.2 100.0 100.0 100.0 94.6 864 ---------------------------- ------ ------ ------ ------ ------ ------ ------ 865 Total 100.0 87.9 81.2 100.0 100.0 100.0 94.6 866 ---------------------------- ------ ------ ------ ------ ------ ------ ------ 867 868=head1 SEE ALSO 869 870This module is not an attempt at a general purpose parser by any stretch of the 871imagination. It is basically a very flexible special purpose parser, it only 872builds Tree::Simple heirarchies, but your parse filters can be as complex as nessecary. 873If this is not what you are looking for, then you might want to consider one of 874the following modules: 875 876=over 4 877 878=item B<Parse::RecDescent> 879 880This is a general purpose Recursive Descent parser generator written by Damian 881Conway. If your parsing needs lean towards the more complex, this is good module 882for you. Recursive Descent parsing is known to be slower than other parsing styles, 883but it tends to be easier to write grammers for, so there is a trade off. If speed 884is a concern, then you may just want to skip perl and go straight to C and use 885C<yacc>. 886 887=item B<Parse::Yapp> 888 889As an alternative to Recursive Descent parsing, you can do LALR parsing. It is 890faster and does not have some of the well known (and avoidable) problems of 891Recursive Descent parsing. I have never actually used this module, but I have 892heard good things about it. 893 894=item B<Parse::FixedLength> 895 896If all you really need to do is process a file with fixed length fields in it, 897you can use this module. 898 899=item B<Parse::Tokens> 900 901This class will help you parse text with embedded tokens in it. I am not very 902familiar with this module, but it looks interesting. 903 904=back 905 906There are also a number of specific parsers out here, such as B<HTML::Parser> 907and B<XML::Parser>, which do one thing and do it well. If you are looking to 908parse HTML or XML, don't use my module, use these ones, it just makes sense. 909Use the right tool for the job basically. 910 911=head1 DEPENDENCIES 912 913This module uses two other modules I have written: 914 915=over 5 916 917=item B<Tree::Simple> 918 919=item B<Array::Iterator> 920 921=back 922 923=head1 ACKNOWLEDGEMENTS 924 925=over 4 926 927=item Thanks to Chad Ullman for reporting RT Bug #12244 and providing code and test case for it. 928 929=item Thanks to Gerd for reporting RT Bug #13041 and providing code to fix it. 930 931=back 932 933=head1 AUTHOR 934 935stevan little, E<lt>stevan@iinteractive.comE<gt> 936 937=head1 COPYRIGHT AND LICENSE 938 939Copyright 2004-2007 by Infinity Interactive, Inc. 940 941L<http://www.iinteractive.com> 942 943This library is free software; you can redistribute it and/or modify 944it under the same terms as Perl itself. 945 946=cut 947