1#############################################################################
2## Name:        Tree.pm
3## Purpose:     XML::Smart::Tree
4## Author:      Graciliano M. P.
5## Modified by: Harish Madabushi
6## Created:     10/05/2003
7## RCS-ID:
8## Copyright:   (c) 2003 Graciliano M. P.
9## Licence:     This program is free software; you can redistribute it and/or
10##              modify it under the same terms as Perl itself
11#############################################################################
12
13package XML::Smart::Tree                                       ;
14
15use strict                                                     ;
16use warnings                                                   ;
17
18use Carp                                                       ;
19
20use XML::Smart::Entity qw(_parse_basic_entity)                 ;
21use XML::Smart::Shared qw( _unset_sig_warn _reset_sig_warn )   ;
22
23
24our ($VERSION) ;
25$VERSION = '1.34' ;
26
27my %PARSERS = (
28    XML_Parser           => 0 ,
29    XML_Smart_Parser     => 0 ,
30    XML_Smart_HTMLParser => 0 ,
31    ) ;
32
33## BUG - By making DEFAULT_LOADED a global variable it is working across objects! ( Watch for possible usage elsewhere )
34# my $DEFAULT_LOADED ;
35
36use vars qw($NO_XML_PARSER);
37
38
39###################
40# LOAD_XML_PARSER #
41###################
42
43sub load_XML_Parser {
44
45    return if $NO_XML_PARSER ;
46
47    _unset_sig_warn() ;
48    eval('use XML::Parser ;') ;
49    _reset_sig_warn() ;
50    if ($@) { $@ = undef ; return( undef ) ;}
51
52    my ($xml , $tree) ;
53
54    _unset_sig_warn() ;
55    eval {
56	no strict ;
57	my $data = '<root><foo arg1="t1" arg2="t2" /></root>' ;
58	$xml = XML::Parser->new(Style => 'Tree') ;
59	$tree = $xml->parse($data) ;
60    } ;
61    _reset_sig_warn() ;
62
63    if (!$tree || ref($tree) ne 'ARRAY') { return( undef ) ;}
64    if ($tree->[1][2][0]{arg1} eq 't1') { return( 1 ) ;}
65    return( undef ) ;
66
67}
68
69#########################
70# LOAD_XML_SMART_PARSER #
71#########################
72
73sub load_XML_Smart_Parser {
74
75  _unset_sig_warn() ;
76  eval('use XML::Smart::Parser ;') ;
77  _reset_sig_warn() ;
78  if ($@) { $@ = undef ; return( undef ) ;}
79  return(1) ;
80
81}
82
83#############################
84# LOAD_XML_SMART_HTMLPARSER #
85#############################
86
87sub load_XML_Smart_HTMLParser {
88  _unset_sig_warn() ;
89    eval('use XML::Smart::HTMLParser ;') ;
90  _reset_sig_warn() ;
91  if ($@) { $@ = undef ; return( undef ) ;}
92  return(1) ;
93}
94
95########
96# LOAD #
97########
98
99sub load {
100
101  my ( $parser ) = @_ ;
102  my $module ;
103
104  my $DEFAULT_LOADED  ;
105
106  if ($parser) {
107    $parser =~ s/:+/_/gs ;
108    $parser =~ s/\W//g ;
109
110    if    ($parser =~ /^(?:html?|wild)$/i) { $parser = 'XML_Smart_HTMLParser' ;}
111    elsif ($parser =~ /^(?:re|smart)/i) { $parser = 'XML_Smart_Parser' ;}
112
113    foreach my $Key ( keys %PARSERS ) {
114      if ($Key =~ /^$parser$/i) { $module = $Key ; last ;}
115    }
116  }
117
118  my $ok ;
119  if( $module && ( $module eq 'XML_Parser' ) ) {
120    $PARSERS{XML_Parser} = 1 if &load_XML_Parser() ;
121    $ok = $PARSERS{XML_Parser} ;
122  } elsif ( $module && ( $module eq 'XML_Smart_Parser' ) ) {
123      $PARSERS{XML_Smart_Parser} = 1 if !$PARSERS{XML_Smart_Parser} && &load_XML_Smart_Parser() ;
124      $ok = $PARSERS{XML_Smart_Parser} ;
125  } elsif( $module and ( $module eq 'XML_Smart_HTMLParser' ) ) {
126      $PARSERS{XML_Smart_HTMLParser} = 1 if !$PARSERS{XML_Smart_HTMLParser} && &load_XML_Smart_HTMLParser() ;
127      $ok = $PARSERS{XML_Smart_HTMLParser} ;
128  }
129
130  if (!$ok && !$DEFAULT_LOADED) {
131      $PARSERS{XML_Parser} = 1 if &load_XML_Parser() ;
132      $module = 'XML_Parser' ;
133      if ( !$PARSERS{XML_Parser} ) {
134	  $PARSERS{XML_Smart_Parser} = 1 if &load_XML_Smart_Parser() ;
135	  $module = 'XML_Smart_Parser' ;
136      }
137      $DEFAULT_LOADED = 1 ;
138  }
139
140  return($module) ;
141}
142
143#########
144# PARSE #
145#########
146
147sub parse {
148
149    my $module = $_[1] ;
150
151    my $data ;
152    {
153	my ($fh,$open) ;
154
155	if (ref($_[0]) eq 'GLOB') { $fh = $_[0] ;}
156	elsif ($_[0] =~ /^http:\/\/\w+[^\r\n]+$/s) { $data = &get_url($_[0]) ;}
157	elsif ($_[0] =~ /<.*?>/s) { $data = $_[0] ;}
158	else {
159	    open ($fh,$_[0]) or croak( $! ); binmode($fh) ; $open = 1 ;
160	}
161
162	if ($fh) {
163	    no warnings ;
164	    1 while( read($fh, $data , 1024*8 , length($data) ) ) ;
165	    close($fh) if $open ;
166	}
167    }
168
169    if ($data !~ /<.*?>/s) { return( {} ) ;}
170
171    if (!$module || !$PARSERS{$module}) {
172	if    ( !$NO_XML_PARSER && $INC{'XML/Parser.pm'} && $PARSERS{XML_Parser}) { $module = 'XML_Parser' ;}
173	elsif ($PARSERS{XML_Smart_Parser}) { $module = 'XML_Smart_Parser' ;}
174    }
175
176    my $xml ;
177    if ($module eq 'XML_Parser') { $xml = XML::Parser->new() ;}
178    elsif ($module eq 'XML_Smart_Parser') { $xml = XML::Smart::Parser->new() ;}
179    elsif ($module eq 'XML_Smart_HTMLParser') { $xml = XML::Smart::HTMLParser->new() ;}
180    else { croak("Can't find a parser for XML!") ;}
181
182    shift(@_) ;
183    if ( $_[0] && ( $_[0] =~ /^\s*(?:XML_\w+|html?|re\w+|smart)\s*$/i ) ) { shift(@_) ;}
184
185    _unset_sig_warn() ;
186    my ( %args ) = @_ ;
187    _reset_sig_warn() ;
188
189    if ( $args{lowtag} ) { $xml->{SMART}{tag} = 1 ;}
190    if ( $args{upertag} ) { $xml->{SMART}{tag} = 2 ;}
191    if ( $args{lowarg} ) { $xml->{SMART}{arg} = 1 ;}
192    if ( $args{uperarg} ) { $xml->{SMART}{arg} = 2 ;}
193    if ( $args{arg_single} ) { $xml->{SMART}{arg_single} = 1 ;}
194
195    if ( $args{no_order} ) { $xml->{SMART}{no_order} = 1 ;}
196    if ( $args{no_nodes} ) { $xml->{SMART}{no_nodes} = 1 ;}
197
198    if ( $args{use_spaces} ) { $xml->{SMART}{use_spaces} = 1 ;}
199
200    $xml->{SMART}{on_start} = $args{on_start} if ref($args{on_start}) eq 'CODE' ;
201    $xml->{SMART}{on_char}  = $args{on_char}  if ref($args{on_char})  eq 'CODE' ;
202    $xml->{SMART}{on_end}   = $args{on_end}   if ref($args{on_end})   eq 'CODE' ;
203
204    $xml->setHandlers(
205	Init => \&_Init ,
206	Start => \&_Start ,
207	Char  => \&_Char ,
208	End   => \&_End ,
209	Final => \&_Final ,
210	) ;
211
212    my $tree ;
213    eval {
214	$tree = $xml->parse($data);
215    }; croak( $@ ) if( $@ );
216    return( $tree ) ;
217}
218
219
220
221
222##################################################
223##            UNUSED - DEPRECATED.              ##
224##################################################
225
226sub _clean_data_with_lt {
227
228    my $data = shift ;
229
230    my @data = split( //, $data ) ;
231    my $data_len = @data          ;
232
233
234    # State Machine Definition:
235
236    my %state_machine =
237	(
238	 'in_cdata_block'            =>  0 ,
239	 'seen_some_tag'             =>  0 ,
240	 'need_to_cdata_this'        =>  0 ,
241	 'prev_lt'                   => -1 ,
242	 'last_tag_start'            => -1 ,
243	 'last_tag_close'            => -1 ,
244	 'tag_balance'               =>  0 ,
245	);
246
247
248    CHAR: for( my $index = 0; $index < $data_len; $index++ ) {
249
250	{
251	    no warnings ;
252	    next CHAR unless( $data[ $index ] eq '<' or $data[ $index ] eq '>' ) ;
253	}
254
255	if( $data[ $index ] eq '<' ) {
256
257	    next CHAR if( $state_machine{ 'in_cdata_block' } ) ;
258
259	    {
260		# Check for possibility of this being a cdata block
261		my $possible_cdata_block = join( '', @data[ $index .. ( $index + 8 ) ] ) ;
262		if( $possible_cdata_block eq '<![CDATA[' ) {
263		    $state_machine{ 'in_cdata_block' } = 1 ;
264		    next CHAR                              ;
265		}
266
267	    }
268
269	    $state_machine{ 'tag_balance'    }++ ;
270	    $state_machine{ 'prev_lt' } = $index ;
271
272	    next CHAR if( $state_machine{ 'need_to_cdata_this' } ) ;
273
274	    unless( $state_machine{ 'seen_some_tag' } ) {
275		$state_machine{ 'seen_some_tag' }  = 1      ;
276		$state_machine{ 'last_tag_start' } = $index ;
277		next CHAR                                   ;
278	    }
279
280	    if( $state_machine{ 'tag_balance' } == 1 ) {
281		$state_machine{ 'last_tag_start' } = $index ;
282		next CHAR ;
283	    }
284
285	    $state_machine{ 'need_to_cdata_this' } = 1 ;
286
287	    ## Seen a < and
288	    #    1. We are not in a CDATA block
289	    #    2. This is not the start of a CDATA block
290
291
292	} elsif( $data[ $index ] eq '>' ) {
293
294
295	    if( $state_machine{ 'in_cdata_block' } ) {
296
297		my $possible_cdata_close = join( '', @data[ ( $index - 2 ) .. $index ] ) ;
298		if( $possible_cdata_close eq ']]>' ) {
299		    $state_machine{ 'in_cdata_block' } = 0 ;
300		    $state_machine{ 'tag_balance'    } = 0 ;
301		    next CHAR                              ;
302		}
303
304		next CHAR ;
305	    }
306
307	    unless( $state_machine{ 'seen_some_tag' } ) {
308		croak " > found before < - Input XML seems to have errors!\n";
309	    }
310
311
312	    $state_machine{ 'tag_balance' }-- ;
313
314	    unless( $state_machine{ 'tag_balance' } ) {
315		$state_machine{ 'last_tag_close' } = $index ;
316		next CHAR                                   ;
317	    }
318
319
320	    ## Need to add CDATA now.
321
322	    my $last_tag_close = $state_machine{ 'last_tag_close' } ;
323	    my $prev_lt        = $state_machine{ 'prev_lt'        } ;
324	    $data[ $last_tag_close ] = '><![CDATA[' ;
325	    $data[ $prev_lt        ] = ']]><'       ;
326
327	    $state_machine{ 'last_tag_close'     } = $index ;
328	    $state_machine{ 'need_to_cdata_this' } = 0      ;
329
330	    $state_machine{ 'tag_balance'        } = 0      ;
331
332	}
333
334    }
335
336    $data = join( '', @data ) ;
337
338    return $data;
339
340}
341
342
343###########
344# GET_URL #
345###########
346
347
348sub get_url {
349
350  my ( $url ) = @_ ;
351  my $data ;
352
353  require LWP ;
354  require LWP::UserAgent ;
355
356  my $ua = LWP::UserAgent->new();
357
358  my $agent = $ua->agent() ;
359  $agent = "XML::Smart/$XML::Smart::VERSION $agent" ;
360  $ua->agent($agent) ;
361
362  my $req = HTTP::Request->new(GET => $url) ;
363  my $res = $ua->request($req) ;
364
365  if ($res->is_success) { return $res->content ;}
366  else { return undef ;}
367}
368
369##########
370# MODULE #
371##########
372
373sub module {
374  foreach my $Key ( keys %PARSERS ) {
375    if ($PARSERS{$Key}) {
376      my $module = $Key ;
377      $module =~ s/_/::/g ;
378      return( $module ) ;
379    }
380  }
381  return('') ;
382}
383
384#########
385# _INIT #
386#########
387
388sub _Init {
389  my $this = shift ;
390  $this->{PARSING}{tree} = {} ;
391  $this->{PARSING}{p} = $this->{PARSING}{tree} ;
392
393  return ;
394}
395
396##########
397# _START #
398##########
399
400sub _Start {
401  my $this = shift ;
402
403  if ( $this->{LAST_CALL} && ( $this->{LAST_CALL} eq 'char' ) ) {
404      _Char_process( $this , delete $this->{CONTENT_BUFFER} ) ;
405  }
406
407  ##print "START>> @_\n" ;
408
409  $this->{LAST_CALL} = 'start' ;
410
411  _unset_sig_warn();
412  my ( $tag , %args ) = @_ ;
413  _reset_sig_warn();
414
415  if    ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 1 ) ) { $tag = lc($tag) ;}
416  elsif ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 2 ) ) { $tag = uc($tag) ;}
417
418  $this->{PARSING}{p}{'/nodes'}{$tag} = 1 if !$this->{SMART}{no_nodes} ;
419
420  push( @{$this->{PARSING}{p}{'/order'}} , $tag) if !$this->{SMART}{no_order} ;
421
422  if ( $this->{SMART}{arg} ) {
423    my $type = $this->{SMART}{arg} ;
424    my %argsok ;
425    foreach my $Key ( keys %args ) {
426      my $k ;
427      if    ($type == 1) { $k = lc($Key) ;}
428      elsif ($type == 2) { $k = uc($Key) ;}
429
430      if (exists $argsok{$k}) {
431        if ( ref $argsok{$k} ne 'ARRAY' ) {
432          my $key = $argsok{$k} ;
433          $argsok{$k} = [$key] ;
434        }
435        push(@{$argsok{$k}} , $args{$Key}) ;
436      }
437      else { $argsok{$k} = $args{$Key} ;}
438    }
439
440    %args = %argsok ;
441  }
442
443  if ( $this->{SMART}{arg_single} ) {
444    foreach my $Key ( keys %args ) {
445      $args{$Key} = 1 if !defined $args{$Key} ;
446    }
447  }
448
449  ## Args order:
450  if ( !$this->{SMART}{no_order} ) {
451    my @order ;
452    for(my $i = 1 ; $i < $#_ ; $i+=2) { push( @order , $_[$i] ) ;}
453
454    if ( $this->{SMART}{arg} ) {
455      my $type = $this->{SMART}{arg} ;
456      foreach my $order_i ( @order ) {
457        if    ($type == 1) { $order_i = lc($order_i) ;}
458        elsif ($type == 2) { $order_i = uc($order_i) ;}
459      }
460    }
461
462    $args{'/order'} = \@order if @order ;
463  }
464
465  $args{'/tag'} = $tag ;
466  $args{'/back'} = $this->{PARSING}{p} ;
467
468  if ($this->{NOENTITY}) {
469    foreach my $Key ( keys %args ) { &_parse_basic_entity( $args{$Key} ) ;}
470  }
471
472  if ( defined $this->{PARSING}{p}{$tag} ) {
473    if ( ref($this->{PARSING}{p}{$tag}) ne 'ARRAY' ) {
474      my $prev = $this->{PARSING}{p}{$tag} ;
475      $this->{PARSING}{p}{$tag} = [$prev] ;
476    }
477    push(@{$this->{PARSING}{p}{$tag}} , \%args) ;
478
479    my $i = @{$this->{PARSING}{p}{$tag}} ; $i-- ;
480    $args{'/i'} = $i ;
481
482    $this->{PARSING}{p} = \%args ;
483  }
484  else {
485    $this->{PARSING}{p}{$tag} = \%args ;
486    ## Change the pointer:
487    $this->{PARSING}{p} = \%args ;
488  }
489
490  if ( $this->{SMART}{on_start} ) {
491    my $sub = $this->{SMART}{on_start} ;
492    &$sub($tag , $this->{PARSING}{p} , $this->{PARSING}{p}{'/back'} , undef , $this ) ;
493  }
494
495  return ;
496}
497
498#########
499# _CHAR #
500#########
501#
502# XML::Parser parse each line as a different call to _Char().
503# For XML::Smart multiple calls to _Char() occurs only when the content
504# have other nodes inside.
505#
506
507sub _Char { ##print "CHAR>>\n" ;
508  my $this = shift ;
509  $this->{CONTENT_BUFFER} .= $_[0] ;
510  $this->{LAST_CALL} = 'char' ;
511  return ;
512}
513
514sub _Char_process {
515  my $this = shift ;
516  ##print "CONT>> ##@_##\n" ;
517
518  my $content = $_[0] ;
519
520  if ( !$this->{SMART}{use_spaces} && $content !~ /\S+/s ) { return ;}
521
522  ######
523
524  if (! defined $this->{PARSING}{p}{'dt:dt'} && defined $this->{PARSING}{p}{'DT:DT'}) {
525    $this->{PARSING}{p}{'dt:dt'} = delete $this->{PARSING}{p}{'DT:DT'} ;
526  }
527
528  if ( $this->{PARSING}{p}{'dt:dt'} && ( $this->{PARSING}{p}{'dt:dt'} =~ /binary\.base64/si ) ) {
529    require XML::Smart::Base64 ;
530    $content = &XML::Smart::Base64::decode_base64($content) ;
531    delete $this->{PARSING}{p}{'dt:dt'} ;
532
533    if ( $this->{PARSING}{p}{'/nodes'} ) {
534      delete $this->{PARSING}{p}{'/nodes'}{'dt:dt'} ;
535      my $nkeys = keys %{$this->{PARSING}{p}{'/nodes'}} ;
536      if ($nkeys < 1) { delete $this->{PARSING}{p}{'/nodes'} ;}
537    }
538
539    if ( $this->{PARSING}{p}{'/order'} ) {
540      my @order = @{$this->{PARSING}{p}{'/order'}} ;
541      my @order_ok ;
542      foreach my $order_i ( @order ) { push(@order_ok , $order_i) if $order_i ne 'dt:dt' ;}
543      if (@order_ok) { $this->{PARSING}{p}{'/order'} = \@order_ok ;}
544      else { delete $this->{PARSING}{p}{'/order'} ;}
545    }
546  }
547  elsif ($this->{NOENTITY}) { &_parse_basic_entity($content) ;}
548
549  ######
550
551  if ( !exists $this->{PARSING}{p}{CONTENT} ) {
552    $this->{PARSING}{p}{CONTENT} = $content ;
553    push(@{$this->{PARSING}{p}{'/order'}} , 'CONTENT') if !$this->{SMART}{no_order} ;
554  }
555  else {
556    if ( !tied $this->{PARSING}{p}{CONTENT} ) {
557      my $cont = $this->{PARSING}{p}{CONTENT} ;
558      $this->{PARSING}{p}{CONTENT} = '' ;
559      my $tied = tie( $this->{PARSING}{p}{CONTENT} => 'XML::Smart::TieScalar' , $this->{PARSING}{p}) ;
560      push(@{$this->{TIED_CONTENTS}} , $tied) ;
561
562      $this->{PARSING}{p}{'/.CONTENT/x'} = 0 ;
563      $this->{PARSING}{p}{"/.CONTENT/0"} = $cont ;
564
565      my $cont_pos = 0 ;
566      for my $key ( @{$this->{PARSING}{p}{'/order'}} ) {
567        last if ($key eq 'CONTENT') ;
568        ++$cont_pos ;
569      }
570
571      splice( @{$this->{PARSING}{p}{'/order'}} , $cont_pos,0, "/.CONTENT/0") if !$this->{SMART}{no_order} ;
572    }
573
574    my $x = ++$this->{PARSING}{p}{'/.CONTENT/x'} ;
575    $this->{PARSING}{p}{"/.CONTENT/$x"} = $content ;
576    push( @{$this->{PARSING}{p}{'/order'}} , "/.CONTENT/$x") if !$this->{SMART}{no_order} ;
577  }
578
579  if ( $this->{SMART}{on_char} ) {
580    my $sub = $this->{SMART}{on_char} ;
581    &$sub($this->{PARSING}{p}{'/tag'} , $this->{PARSING}{p} , $this->{PARSING}{p}{'/back'} , \$this->{PARSING}{p}{CONTENT} , $this ) ;
582  }
583
584  return ;
585}
586
587########
588# _END #
589########
590
591sub _End { ##print "END>> @_[1] >> $_[0]->{PARSING}{p}{'/tag'}\n" ;
592  my $this = shift ;
593
594  if ( $this->{LAST_CALL} eq 'char' ) { _Char_process( $this , delete $this->{CONTENT_BUFFER} ) ;}
595  $this->{LAST_CALL} = 'end' ;
596
597  my $tag = shift ;
598
599  if    ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 1 ) ) { $tag = lc($tag) ;}
600  elsif ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 2 ) ) { $tag = uc($tag) ;}
601
602  if ( $this->{PARSING}{p}{'/tag'} ne $tag ) { return ;}
603
604  delete $this->{PARSING}{p}{'/tag'} ;
605
606  my $back  = delete $this->{PARSING}{p}{'/back'} ;
607  my $i = delete $this->{PARSING}{p}{'/i'} || 0 ;
608
609  my $nkeys = keys %{$this->{PARSING}{p}} ;
610
611  if ( $nkeys == 1 && exists $this->{PARSING}{p}{CONTENT} ) {
612    if (ref($back->{$tag}) eq 'ARRAY') { $back->{$tag}[$i] = $this->{PARSING}{p}{CONTENT} ;}
613    else { $back->{$tag} = $this->{PARSING}{p}{CONTENT} ;}
614  }
615
616  if ( $this->{PARSING}{p}{'/nodes'} && !%{$this->{PARSING}{p}{'/nodes'}} ) { delete $this->{PARSING}{p}{'/nodes'} ;}
617  if ( $this->{PARSING}{p}{'/order'} && $#{$this->{PARSING}{p}{'/order'}} <= 0 ) { delete $this->{PARSING}{p}{'/order'} ;}
618
619  delete $this->{PARSING}{p}{'/.CONTENT/x'} ;
620
621  if ( $this->{SMART}{on_end} ) {
622    my $sub = $this->{SMART}{on_end} ;
623    &$sub($tag , $this->{PARSING}{p} , $back , undef , $this) ;
624  }
625
626  $this->{PARSING}{p} = $back ;
627
628  return ;
629}
630
631##########
632# _FINAL #
633##########
634
635sub _Final {
636  my $this = shift ;
637  my $tree = $this->{PARSING}{tree} ;
638
639  foreach my $tied_cont ( @{$this->{TIED_CONTENTS}} ) {
640    $tied_cont->_cache_keys ;
641  }
642
643  delete $this->{TIED_CONTENTS} ;
644  delete $this->{LAST_CALL} ;
645
646  delete($this->{PARSING}) ;
647  return($tree) ;
648}
649
650#######
651# END #
652#######
653
6541;
655
656
657__END__
658
659
660
661
662
663
664