1############################################################################# 2## Name: Tree.pm 3## Purpose: XML::Smart::Tree 4## Author: Graciliano M. P. 5## Modified by: Harish Madabushi 6## Created: 10/05/2003 7## RCS-ID: 8## Copyright: (c) 2003 Graciliano M. P. 9## Licence: This program is free software; you can redistribute it and/or 10## modify it under the same terms as Perl itself 11############################################################################# 12 13package XML::Smart::Tree ; 14 15use strict ; 16use warnings ; 17 18use Carp ; 19 20use XML::Smart::Entity qw(_parse_basic_entity) ; 21use XML::Smart::Shared qw( _unset_sig_warn _reset_sig_warn ) ; 22 23 24our ($VERSION) ; 25$VERSION = '1.34' ; 26 27my %PARSERS = ( 28 XML_Parser => 0 , 29 XML_Smart_Parser => 0 , 30 XML_Smart_HTMLParser => 0 , 31 ) ; 32 33## BUG - By making DEFAULT_LOADED a global variable it is working across objects! ( Watch for possible usage elsewhere ) 34# my $DEFAULT_LOADED ; 35 36use vars qw($NO_XML_PARSER); 37 38 39################### 40# LOAD_XML_PARSER # 41################### 42 43sub load_XML_Parser { 44 45 return if $NO_XML_PARSER ; 46 47 _unset_sig_warn() ; 48 eval('use XML::Parser ;') ; 49 _reset_sig_warn() ; 50 if ($@) { $@ = undef ; return( undef ) ;} 51 52 my ($xml , $tree) ; 53 54 _unset_sig_warn() ; 55 eval { 56 no strict ; 57 my $data = '<root><foo arg1="t1" arg2="t2" /></root>' ; 58 $xml = XML::Parser->new(Style => 'Tree') ; 59 $tree = $xml->parse($data) ; 60 } ; 61 _reset_sig_warn() ; 62 63 if (!$tree || ref($tree) ne 'ARRAY') { return( undef ) ;} 64 if ($tree->[1][2][0]{arg1} eq 't1') { return( 1 ) ;} 65 return( undef ) ; 66 67} 68 69######################### 70# LOAD_XML_SMART_PARSER # 71######################### 72 73sub load_XML_Smart_Parser { 74 75 _unset_sig_warn() ; 76 eval('use XML::Smart::Parser ;') ; 77 _reset_sig_warn() ; 78 if ($@) { $@ = undef ; return( undef ) ;} 79 return(1) ; 80 81} 82 83############################# 84# LOAD_XML_SMART_HTMLPARSER # 85############################# 86 87sub load_XML_Smart_HTMLParser { 88 _unset_sig_warn() ; 89 eval('use XML::Smart::HTMLParser ;') ; 90 _reset_sig_warn() ; 91 if ($@) { $@ = undef ; return( undef ) ;} 92 return(1) ; 93} 94 95######## 96# LOAD # 97######## 98 99sub load { 100 101 my ( $parser ) = @_ ; 102 my $module ; 103 104 my $DEFAULT_LOADED ; 105 106 if ($parser) { 107 $parser =~ s/:+/_/gs ; 108 $parser =~ s/\W//g ; 109 110 if ($parser =~ /^(?:html?|wild)$/i) { $parser = 'XML_Smart_HTMLParser' ;} 111 elsif ($parser =~ /^(?:re|smart)/i) { $parser = 'XML_Smart_Parser' ;} 112 113 foreach my $Key ( keys %PARSERS ) { 114 if ($Key =~ /^$parser$/i) { $module = $Key ; last ;} 115 } 116 } 117 118 my $ok ; 119 if( $module && ( $module eq 'XML_Parser' ) ) { 120 $PARSERS{XML_Parser} = 1 if &load_XML_Parser() ; 121 $ok = $PARSERS{XML_Parser} ; 122 } elsif ( $module && ( $module eq 'XML_Smart_Parser' ) ) { 123 $PARSERS{XML_Smart_Parser} = 1 if !$PARSERS{XML_Smart_Parser} && &load_XML_Smart_Parser() ; 124 $ok = $PARSERS{XML_Smart_Parser} ; 125 } elsif( $module and ( $module eq 'XML_Smart_HTMLParser' ) ) { 126 $PARSERS{XML_Smart_HTMLParser} = 1 if !$PARSERS{XML_Smart_HTMLParser} && &load_XML_Smart_HTMLParser() ; 127 $ok = $PARSERS{XML_Smart_HTMLParser} ; 128 } 129 130 if (!$ok && !$DEFAULT_LOADED) { 131 $PARSERS{XML_Parser} = 1 if &load_XML_Parser() ; 132 $module = 'XML_Parser' ; 133 if ( !$PARSERS{XML_Parser} ) { 134 $PARSERS{XML_Smart_Parser} = 1 if &load_XML_Smart_Parser() ; 135 $module = 'XML_Smart_Parser' ; 136 } 137 $DEFAULT_LOADED = 1 ; 138 } 139 140 return($module) ; 141} 142 143######### 144# PARSE # 145######### 146 147sub parse { 148 149 my $module = $_[1] ; 150 151 my $data ; 152 { 153 my ($fh,$open) ; 154 155 if (ref($_[0]) eq 'GLOB') { $fh = $_[0] ;} 156 elsif ($_[0] =~ /^http:\/\/\w+[^\r\n]+$/s) { $data = &get_url($_[0]) ;} 157 elsif ($_[0] =~ /<.*?>/s) { $data = $_[0] ;} 158 else { 159 open ($fh,$_[0]) or croak( $! ); binmode($fh) ; $open = 1 ; 160 } 161 162 if ($fh) { 163 no warnings ; 164 1 while( read($fh, $data , 1024*8 , length($data) ) ) ; 165 close($fh) if $open ; 166 } 167 } 168 169 if ($data !~ /<.*?>/s) { return( {} ) ;} 170 171 if (!$module || !$PARSERS{$module}) { 172 if ( !$NO_XML_PARSER && $INC{'XML/Parser.pm'} && $PARSERS{XML_Parser}) { $module = 'XML_Parser' ;} 173 elsif ($PARSERS{XML_Smart_Parser}) { $module = 'XML_Smart_Parser' ;} 174 } 175 176 my $xml ; 177 if ($module eq 'XML_Parser') { $xml = XML::Parser->new() ;} 178 elsif ($module eq 'XML_Smart_Parser') { $xml = XML::Smart::Parser->new() ;} 179 elsif ($module eq 'XML_Smart_HTMLParser') { $xml = XML::Smart::HTMLParser->new() ;} 180 else { croak("Can't find a parser for XML!") ;} 181 182 shift(@_) ; 183 if ( $_[0] && ( $_[0] =~ /^\s*(?:XML_\w+|html?|re\w+|smart)\s*$/i ) ) { shift(@_) ;} 184 185 _unset_sig_warn() ; 186 my ( %args ) = @_ ; 187 _reset_sig_warn() ; 188 189 if ( $args{lowtag} ) { $xml->{SMART}{tag} = 1 ;} 190 if ( $args{upertag} ) { $xml->{SMART}{tag} = 2 ;} 191 if ( $args{lowarg} ) { $xml->{SMART}{arg} = 1 ;} 192 if ( $args{uperarg} ) { $xml->{SMART}{arg} = 2 ;} 193 if ( $args{arg_single} ) { $xml->{SMART}{arg_single} = 1 ;} 194 195 if ( $args{no_order} ) { $xml->{SMART}{no_order} = 1 ;} 196 if ( $args{no_nodes} ) { $xml->{SMART}{no_nodes} = 1 ;} 197 198 if ( $args{use_spaces} ) { $xml->{SMART}{use_spaces} = 1 ;} 199 200 $xml->{SMART}{on_start} = $args{on_start} if ref($args{on_start}) eq 'CODE' ; 201 $xml->{SMART}{on_char} = $args{on_char} if ref($args{on_char}) eq 'CODE' ; 202 $xml->{SMART}{on_end} = $args{on_end} if ref($args{on_end}) eq 'CODE' ; 203 204 $xml->setHandlers( 205 Init => \&_Init , 206 Start => \&_Start , 207 Char => \&_Char , 208 End => \&_End , 209 Final => \&_Final , 210 ) ; 211 212 my $tree ; 213 eval { 214 $tree = $xml->parse($data); 215 }; croak( $@ ) if( $@ ); 216 return( $tree ) ; 217} 218 219 220 221 222################################################## 223## UNUSED - DEPRECATED. ## 224################################################## 225 226sub _clean_data_with_lt { 227 228 my $data = shift ; 229 230 my @data = split( //, $data ) ; 231 my $data_len = @data ; 232 233 234 # State Machine Definition: 235 236 my %state_machine = 237 ( 238 'in_cdata_block' => 0 , 239 'seen_some_tag' => 0 , 240 'need_to_cdata_this' => 0 , 241 'prev_lt' => -1 , 242 'last_tag_start' => -1 , 243 'last_tag_close' => -1 , 244 'tag_balance' => 0 , 245 ); 246 247 248 CHAR: for( my $index = 0; $index < $data_len; $index++ ) { 249 250 { 251 no warnings ; 252 next CHAR unless( $data[ $index ] eq '<' or $data[ $index ] eq '>' ) ; 253 } 254 255 if( $data[ $index ] eq '<' ) { 256 257 next CHAR if( $state_machine{ 'in_cdata_block' } ) ; 258 259 { 260 # Check for possibility of this being a cdata block 261 my $possible_cdata_block = join( '', @data[ $index .. ( $index + 8 ) ] ) ; 262 if( $possible_cdata_block eq '<![CDATA[' ) { 263 $state_machine{ 'in_cdata_block' } = 1 ; 264 next CHAR ; 265 } 266 267 } 268 269 $state_machine{ 'tag_balance' }++ ; 270 $state_machine{ 'prev_lt' } = $index ; 271 272 next CHAR if( $state_machine{ 'need_to_cdata_this' } ) ; 273 274 unless( $state_machine{ 'seen_some_tag' } ) { 275 $state_machine{ 'seen_some_tag' } = 1 ; 276 $state_machine{ 'last_tag_start' } = $index ; 277 next CHAR ; 278 } 279 280 if( $state_machine{ 'tag_balance' } == 1 ) { 281 $state_machine{ 'last_tag_start' } = $index ; 282 next CHAR ; 283 } 284 285 $state_machine{ 'need_to_cdata_this' } = 1 ; 286 287 ## Seen a < and 288 # 1. We are not in a CDATA block 289 # 2. This is not the start of a CDATA block 290 291 292 } elsif( $data[ $index ] eq '>' ) { 293 294 295 if( $state_machine{ 'in_cdata_block' } ) { 296 297 my $possible_cdata_close = join( '', @data[ ( $index - 2 ) .. $index ] ) ; 298 if( $possible_cdata_close eq ']]>' ) { 299 $state_machine{ 'in_cdata_block' } = 0 ; 300 $state_machine{ 'tag_balance' } = 0 ; 301 next CHAR ; 302 } 303 304 next CHAR ; 305 } 306 307 unless( $state_machine{ 'seen_some_tag' } ) { 308 croak " > found before < - Input XML seems to have errors!\n"; 309 } 310 311 312 $state_machine{ 'tag_balance' }-- ; 313 314 unless( $state_machine{ 'tag_balance' } ) { 315 $state_machine{ 'last_tag_close' } = $index ; 316 next CHAR ; 317 } 318 319 320 ## Need to add CDATA now. 321 322 my $last_tag_close = $state_machine{ 'last_tag_close' } ; 323 my $prev_lt = $state_machine{ 'prev_lt' } ; 324 $data[ $last_tag_close ] = '><![CDATA[' ; 325 $data[ $prev_lt ] = ']]><' ; 326 327 $state_machine{ 'last_tag_close' } = $index ; 328 $state_machine{ 'need_to_cdata_this' } = 0 ; 329 330 $state_machine{ 'tag_balance' } = 0 ; 331 332 } 333 334 } 335 336 $data = join( '', @data ) ; 337 338 return $data; 339 340} 341 342 343########### 344# GET_URL # 345########### 346 347 348sub get_url { 349 350 my ( $url ) = @_ ; 351 my $data ; 352 353 require LWP ; 354 require LWP::UserAgent ; 355 356 my $ua = LWP::UserAgent->new(); 357 358 my $agent = $ua->agent() ; 359 $agent = "XML::Smart/$XML::Smart::VERSION $agent" ; 360 $ua->agent($agent) ; 361 362 my $req = HTTP::Request->new(GET => $url) ; 363 my $res = $ua->request($req) ; 364 365 if ($res->is_success) { return $res->content ;} 366 else { return undef ;} 367} 368 369########## 370# MODULE # 371########## 372 373sub module { 374 foreach my $Key ( keys %PARSERS ) { 375 if ($PARSERS{$Key}) { 376 my $module = $Key ; 377 $module =~ s/_/::/g ; 378 return( $module ) ; 379 } 380 } 381 return('') ; 382} 383 384######### 385# _INIT # 386######### 387 388sub _Init { 389 my $this = shift ; 390 $this->{PARSING}{tree} = {} ; 391 $this->{PARSING}{p} = $this->{PARSING}{tree} ; 392 393 return ; 394} 395 396########## 397# _START # 398########## 399 400sub _Start { 401 my $this = shift ; 402 403 if ( $this->{LAST_CALL} && ( $this->{LAST_CALL} eq 'char' ) ) { 404 _Char_process( $this , delete $this->{CONTENT_BUFFER} ) ; 405 } 406 407 ##print "START>> @_\n" ; 408 409 $this->{LAST_CALL} = 'start' ; 410 411 _unset_sig_warn(); 412 my ( $tag , %args ) = @_ ; 413 _reset_sig_warn(); 414 415 if ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 1 ) ) { $tag = lc($tag) ;} 416 elsif ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 2 ) ) { $tag = uc($tag) ;} 417 418 $this->{PARSING}{p}{'/nodes'}{$tag} = 1 if !$this->{SMART}{no_nodes} ; 419 420 push( @{$this->{PARSING}{p}{'/order'}} , $tag) if !$this->{SMART}{no_order} ; 421 422 if ( $this->{SMART}{arg} ) { 423 my $type = $this->{SMART}{arg} ; 424 my %argsok ; 425 foreach my $Key ( keys %args ) { 426 my $k ; 427 if ($type == 1) { $k = lc($Key) ;} 428 elsif ($type == 2) { $k = uc($Key) ;} 429 430 if (exists $argsok{$k}) { 431 if ( ref $argsok{$k} ne 'ARRAY' ) { 432 my $key = $argsok{$k} ; 433 $argsok{$k} = [$key] ; 434 } 435 push(@{$argsok{$k}} , $args{$Key}) ; 436 } 437 else { $argsok{$k} = $args{$Key} ;} 438 } 439 440 %args = %argsok ; 441 } 442 443 if ( $this->{SMART}{arg_single} ) { 444 foreach my $Key ( keys %args ) { 445 $args{$Key} = 1 if !defined $args{$Key} ; 446 } 447 } 448 449 ## Args order: 450 if ( !$this->{SMART}{no_order} ) { 451 my @order ; 452 for(my $i = 1 ; $i < $#_ ; $i+=2) { push( @order , $_[$i] ) ;} 453 454 if ( $this->{SMART}{arg} ) { 455 my $type = $this->{SMART}{arg} ; 456 foreach my $order_i ( @order ) { 457 if ($type == 1) { $order_i = lc($order_i) ;} 458 elsif ($type == 2) { $order_i = uc($order_i) ;} 459 } 460 } 461 462 $args{'/order'} = \@order if @order ; 463 } 464 465 $args{'/tag'} = $tag ; 466 $args{'/back'} = $this->{PARSING}{p} ; 467 468 if ($this->{NOENTITY}) { 469 foreach my $Key ( keys %args ) { &_parse_basic_entity( $args{$Key} ) ;} 470 } 471 472 if ( defined $this->{PARSING}{p}{$tag} ) { 473 if ( ref($this->{PARSING}{p}{$tag}) ne 'ARRAY' ) { 474 my $prev = $this->{PARSING}{p}{$tag} ; 475 $this->{PARSING}{p}{$tag} = [$prev] ; 476 } 477 push(@{$this->{PARSING}{p}{$tag}} , \%args) ; 478 479 my $i = @{$this->{PARSING}{p}{$tag}} ; $i-- ; 480 $args{'/i'} = $i ; 481 482 $this->{PARSING}{p} = \%args ; 483 } 484 else { 485 $this->{PARSING}{p}{$tag} = \%args ; 486 ## Change the pointer: 487 $this->{PARSING}{p} = \%args ; 488 } 489 490 if ( $this->{SMART}{on_start} ) { 491 my $sub = $this->{SMART}{on_start} ; 492 &$sub($tag , $this->{PARSING}{p} , $this->{PARSING}{p}{'/back'} , undef , $this ) ; 493 } 494 495 return ; 496} 497 498######### 499# _CHAR # 500######### 501# 502# XML::Parser parse each line as a different call to _Char(). 503# For XML::Smart multiple calls to _Char() occurs only when the content 504# have other nodes inside. 505# 506 507sub _Char { ##print "CHAR>>\n" ; 508 my $this = shift ; 509 $this->{CONTENT_BUFFER} .= $_[0] ; 510 $this->{LAST_CALL} = 'char' ; 511 return ; 512} 513 514sub _Char_process { 515 my $this = shift ; 516 ##print "CONT>> ##@_##\n" ; 517 518 my $content = $_[0] ; 519 520 if ( !$this->{SMART}{use_spaces} && $content !~ /\S+/s ) { return ;} 521 522 ###### 523 524 if (! defined $this->{PARSING}{p}{'dt:dt'} && defined $this->{PARSING}{p}{'DT:DT'}) { 525 $this->{PARSING}{p}{'dt:dt'} = delete $this->{PARSING}{p}{'DT:DT'} ; 526 } 527 528 if ( $this->{PARSING}{p}{'dt:dt'} && ( $this->{PARSING}{p}{'dt:dt'} =~ /binary\.base64/si ) ) { 529 require XML::Smart::Base64 ; 530 $content = &XML::Smart::Base64::decode_base64($content) ; 531 delete $this->{PARSING}{p}{'dt:dt'} ; 532 533 if ( $this->{PARSING}{p}{'/nodes'} ) { 534 delete $this->{PARSING}{p}{'/nodes'}{'dt:dt'} ; 535 my $nkeys = keys %{$this->{PARSING}{p}{'/nodes'}} ; 536 if ($nkeys < 1) { delete $this->{PARSING}{p}{'/nodes'} ;} 537 } 538 539 if ( $this->{PARSING}{p}{'/order'} ) { 540 my @order = @{$this->{PARSING}{p}{'/order'}} ; 541 my @order_ok ; 542 foreach my $order_i ( @order ) { push(@order_ok , $order_i) if $order_i ne 'dt:dt' ;} 543 if (@order_ok) { $this->{PARSING}{p}{'/order'} = \@order_ok ;} 544 else { delete $this->{PARSING}{p}{'/order'} ;} 545 } 546 } 547 elsif ($this->{NOENTITY}) { &_parse_basic_entity($content) ;} 548 549 ###### 550 551 if ( !exists $this->{PARSING}{p}{CONTENT} ) { 552 $this->{PARSING}{p}{CONTENT} = $content ; 553 push(@{$this->{PARSING}{p}{'/order'}} , 'CONTENT') if !$this->{SMART}{no_order} ; 554 } 555 else { 556 if ( !tied $this->{PARSING}{p}{CONTENT} ) { 557 my $cont = $this->{PARSING}{p}{CONTENT} ; 558 $this->{PARSING}{p}{CONTENT} = '' ; 559 my $tied = tie( $this->{PARSING}{p}{CONTENT} => 'XML::Smart::TieScalar' , $this->{PARSING}{p}) ; 560 push(@{$this->{TIED_CONTENTS}} , $tied) ; 561 562 $this->{PARSING}{p}{'/.CONTENT/x'} = 0 ; 563 $this->{PARSING}{p}{"/.CONTENT/0"} = $cont ; 564 565 my $cont_pos = 0 ; 566 for my $key ( @{$this->{PARSING}{p}{'/order'}} ) { 567 last if ($key eq 'CONTENT') ; 568 ++$cont_pos ; 569 } 570 571 splice( @{$this->{PARSING}{p}{'/order'}} , $cont_pos,0, "/.CONTENT/0") if !$this->{SMART}{no_order} ; 572 } 573 574 my $x = ++$this->{PARSING}{p}{'/.CONTENT/x'} ; 575 $this->{PARSING}{p}{"/.CONTENT/$x"} = $content ; 576 push( @{$this->{PARSING}{p}{'/order'}} , "/.CONTENT/$x") if !$this->{SMART}{no_order} ; 577 } 578 579 if ( $this->{SMART}{on_char} ) { 580 my $sub = $this->{SMART}{on_char} ; 581 &$sub($this->{PARSING}{p}{'/tag'} , $this->{PARSING}{p} , $this->{PARSING}{p}{'/back'} , \$this->{PARSING}{p}{CONTENT} , $this ) ; 582 } 583 584 return ; 585} 586 587######## 588# _END # 589######## 590 591sub _End { ##print "END>> @_[1] >> $_[0]->{PARSING}{p}{'/tag'}\n" ; 592 my $this = shift ; 593 594 if ( $this->{LAST_CALL} eq 'char' ) { _Char_process( $this , delete $this->{CONTENT_BUFFER} ) ;} 595 $this->{LAST_CALL} = 'end' ; 596 597 my $tag = shift ; 598 599 if ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 1 ) ) { $tag = lc($tag) ;} 600 elsif ( $this->{SMART}{tag} && ( $this->{SMART}{tag} == 2 ) ) { $tag = uc($tag) ;} 601 602 if ( $this->{PARSING}{p}{'/tag'} ne $tag ) { return ;} 603 604 delete $this->{PARSING}{p}{'/tag'} ; 605 606 my $back = delete $this->{PARSING}{p}{'/back'} ; 607 my $i = delete $this->{PARSING}{p}{'/i'} || 0 ; 608 609 my $nkeys = keys %{$this->{PARSING}{p}} ; 610 611 if ( $nkeys == 1 && exists $this->{PARSING}{p}{CONTENT} ) { 612 if (ref($back->{$tag}) eq 'ARRAY') { $back->{$tag}[$i] = $this->{PARSING}{p}{CONTENT} ;} 613 else { $back->{$tag} = $this->{PARSING}{p}{CONTENT} ;} 614 } 615 616 if ( $this->{PARSING}{p}{'/nodes'} && !%{$this->{PARSING}{p}{'/nodes'}} ) { delete $this->{PARSING}{p}{'/nodes'} ;} 617 if ( $this->{PARSING}{p}{'/order'} && $#{$this->{PARSING}{p}{'/order'}} <= 0 ) { delete $this->{PARSING}{p}{'/order'} ;} 618 619 delete $this->{PARSING}{p}{'/.CONTENT/x'} ; 620 621 if ( $this->{SMART}{on_end} ) { 622 my $sub = $this->{SMART}{on_end} ; 623 &$sub($tag , $this->{PARSING}{p} , $back , undef , $this) ; 624 } 625 626 $this->{PARSING}{p} = $back ; 627 628 return ; 629} 630 631########## 632# _FINAL # 633########## 634 635sub _Final { 636 my $this = shift ; 637 my $tree = $this->{PARSING}{tree} ; 638 639 foreach my $tied_cont ( @{$this->{TIED_CONTENTS}} ) { 640 $tied_cont->_cache_keys ; 641 } 642 643 delete $this->{TIED_CONTENTS} ; 644 delete $this->{LAST_CALL} ; 645 646 delete($this->{PARSING}) ; 647 return($tree) ; 648} 649 650####### 651# END # 652####### 653 6541; 655 656 657__END__ 658 659 660 661 662 663 664