1package XML::Simple; 2$XML::Simple::VERSION = '2.25'; 3=head1 NAME 4 5XML::Simple - An API for simple XML files 6 7=head1 SYNOPSIS 8 9PLEASE DO NOT USE THIS MODULE IN NEW CODE. If you ignore this 10warning and use it anyway, the C<qw(:strict)> mode will save you a little pain. 11 12 use XML::Simple qw(:strict); 13 14 my $ref = XMLin([<xml file or string>] [, <options>]); 15 16 my $xml = XMLout($hashref [, <options>]); 17 18Or the object oriented way: 19 20 require XML::Simple qw(:strict); 21 22 my $xs = XML::Simple->new([<options>]); 23 24 my $ref = $xs->XMLin([<xml file or string>] [, <options>]); 25 26 my $xml = $xs->XMLout($hashref [, <options>]); 27 28(or see L<"SAX SUPPORT"> for 'the SAX way'). 29 30Note, in these examples, the square brackets are used to denote optional items 31not to imply items should be supplied in arrayrefs. 32 33=cut 34 35# See after __END__ for more POD documentation 36 37 38# Load essentials here, other modules loaded on demand later 39 40use strict; 41use warnings; 42use warnings::register; 43use Carp; 44use Scalar::Util qw(); 45require Exporter; 46 47 48############################################################################## 49# Define some constants 50# 51 52use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER); 53 54@ISA = qw(Exporter); 55@EXPORT = qw(XMLin XMLout); 56@EXPORT_OK = qw(xml_in xml_out); 57 58my %StrictMode = (); 59 60my @KnownOptIn = qw(keyattr keeproot forcecontent contentkey noattr 61 searchpath forcearray cache suppressempty parseropts 62 grouptags nsexpand datahandler varattr variables 63 normalisespace normalizespace valueattr strictmode); 64 65my @KnownOptOut = qw(keyattr keeproot contentkey noattr 66 rootname xmldecl outputfile noescape suppressempty 67 grouptags nsexpand handler noindent attrindent nosort 68 valueattr numericescape strictmode); 69 70my @DefKeyAttr = qw(name key id); 71my $DefRootName = qq(opt); 72my $DefContentKey = qq(content); 73my $DefXmlDecl = qq(<?xml version='1.0' standalone='yes'?>); 74 75my $xmlns_ns = 'http://www.w3.org/2000/xmlns/'; 76my $bad_def_ns_jcn = '{' . $xmlns_ns . '}'; # LibXML::SAX workaround 77 78 79############################################################################## 80# Globals for use by caching routines 81# 82 83my %MemShareCache = (); 84my %MemCopyCache = (); 85 86 87############################################################################## 88# Wrapper for Exporter - handles ':strict' 89# 90 91sub import { 92 # Handle the :strict tag 93 94 my($calling_package) = caller(); 95 _strict_mode_for_caller(1) if grep(/^:strict$/, @_); 96 97 # Pass everything else to Exporter.pm 98 99 @_ = grep(!/^:strict$/, @_); 100 goto &Exporter::import; 101} 102 103 104############################################################################## 105# Constructor for optional object interface. 106# 107 108sub new { 109 my $class = shift; 110 111 if(@_ % 2) { 112 croak "Default options must be name=>value pairs (odd number supplied)"; 113 } 114 115 my %known_opt; 116 @known_opt{@KnownOptIn, @KnownOptOut} = (); 117 118 my %raw_opt = @_; 119 $raw_opt{strictmode} = _strict_mode_for_caller() 120 unless exists $raw_opt{strictmode}; 121 my %def_opt; 122 while(my($key, $val) = each %raw_opt) { 123 my $lkey = lc($key); 124 $lkey =~ s/_//g; 125 croak "Unrecognised option: $key" unless(exists($known_opt{$lkey})); 126 $def_opt{$lkey} = $val; 127 } 128 my $self = { def_opt => \%def_opt }; 129 130 return(bless($self, $class)); 131} 132 133 134############################################################################## 135# Sub: _strict_mode_for_caller() 136# 137# Gets or sets the XML::Simple :strict mode flag for the calling namespace. 138# Walks back through call stack to find the calling namespace and sets the 139# :strict mode flag for that namespace if an argument was supplied and returns 140# the flag value if not. 141# 142 143sub _strict_mode_for_caller { 144 my $set_mode = @_; 145 my $frame = 1; 146 while(my($package) = caller($frame++)) { 147 next if $package eq 'XML::Simple'; 148 $StrictMode{$package} = 1 if $set_mode; 149 return $StrictMode{$package}; 150 } 151 return(0); 152} 153 154 155############################################################################## 156# Sub: _get_object() 157# 158# Helper routine called from XMLin() and XMLout() to create an object if none 159# was provided. Note, this routine does mess with the caller's @_ array. 160# 161 162sub _get_object { 163 my $self; 164 if($_[0] and UNIVERSAL::isa($_[0], 'XML::Simple')) { 165 $self = shift; 166 } 167 else { 168 $self = XML::Simple->new(); 169 } 170 171 return $self; 172} 173 174 175############################################################################## 176# Sub/Method: XMLin() 177# 178# Exported routine for slurping XML into a hashref - see pod for info. 179# 180# May be called as object method or as a plain function. 181# 182# Expects one arg for the source XML, optionally followed by a number of 183# name => value option pairs. 184# 185 186sub XMLin { 187 my $self = &_get_object; # note, @_ is passed implicitly 188 189 my $target = shift; 190 191 192 # Work out whether to parse a string, a file or a filehandle 193 194 if(not defined $target) { 195 return $self->parse_file(undef, @_); 196 } 197 198 elsif($target eq '-') { 199 local($/) = undef; 200 $target = <STDIN>; 201 return $self->parse_string(\$target, @_); 202 } 203 204 elsif(my $type = ref($target)) { 205 if($type eq 'SCALAR') { 206 return $self->parse_string($target, @_); 207 } 208 else { 209 return $self->parse_fh($target, @_); 210 } 211 } 212 213 elsif($target =~ m{<.*?>}s) { 214 return $self->parse_string(\$target, @_); 215 } 216 217 else { 218 return $self->parse_file($target, @_); 219 } 220} 221 222 223############################################################################## 224# Sub/Method: parse_file() 225# 226# Same as XMLin, but only parses from a named file. 227# 228 229sub parse_file { 230 my $self = &_get_object; # note, @_ is passed implicitly 231 232 my $filename = shift; 233 234 $self->handle_options('in', @_); 235 236 $filename = $self->default_config_file if not defined $filename; 237 238 $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}}); 239 240 # Check cache for previous parse 241 242 if($self->{opt}->{cache}) { 243 foreach my $scheme (@{$self->{opt}->{cache}}) { 244 my $method = 'cache_read_' . $scheme; 245 my $opt = $self->$method($filename); 246 return($opt) if($opt); 247 } 248 } 249 250 my $ref = $self->build_simple_tree($filename, undef); 251 252 if($self->{opt}->{cache}) { 253 my $method = 'cache_write_' . $self->{opt}->{cache}->[0]; 254 $self->$method($ref, $filename); 255 } 256 257 return $ref; 258} 259 260 261############################################################################## 262# Sub/Method: parse_fh() 263# 264# Same as XMLin, but only parses from a filehandle. 265# 266 267sub parse_fh { 268 my $self = &_get_object; # note, @_ is passed implicitly 269 270 my $fh = shift; 271 croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') . 272 " as a filehandle" unless ref $fh; 273 274 $self->handle_options('in', @_); 275 276 return $self->build_simple_tree(undef, $fh); 277} 278 279 280############################################################################## 281# Sub/Method: parse_string() 282# 283# Same as XMLin, but only parses from a string or a reference to a string. 284# 285 286sub parse_string { 287 my $self = &_get_object; # note, @_ is passed implicitly 288 289 my $string = shift; 290 291 $self->handle_options('in', @_); 292 293 return $self->build_simple_tree(undef, ref $string ? $string : \$string); 294} 295 296 297############################################################################## 298# Method: default_config_file() 299# 300# Returns the name of the XML file to parse if no filename (or XML string) 301# was provided. 302# 303 304sub default_config_file { 305 my $self = shift; 306 307 require File::Basename; 308 309 my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+'); 310 311 # Add script directory to searchpath 312 313 if($script_dir) { 314 unshift(@{$self->{opt}->{searchpath}}, $script_dir); 315 } 316 317 return $basename . '.xml'; 318} 319 320 321############################################################################## 322# Method: build_simple_tree() 323# 324# Builds a 'tree' data structure as provided by XML::Parser and then 325# 'simplifies' it as specified by the various options in effect. 326# 327 328sub build_simple_tree { 329 my $self = shift; 330 331 my $tree = eval { 332 $self->build_tree(@_); 333 }; 334 Carp::croak("$@XML::Simple called") if $@; 335 336 return $self->{opt}->{keeproot} 337 ? $self->collapse({}, @$tree) 338 : $self->collapse(@{$tree->[1]}); 339} 340 341 342############################################################################## 343# Method: build_tree() 344# 345# This routine will be called if there is no suitable pre-parsed tree in a 346# cache. It parses the XML and returns an XML::Parser 'Tree' style data 347# structure (summarised in the comments for the collapse() routine below). 348# 349# XML::Simple requires the services of another module that knows how to parse 350# XML. If XML::SAX is installed, the default SAX parser will be used, 351# otherwise XML::Parser will be used. 352# 353# This routine expects to be passed a filename as argument 1 or a 'string' as 354# argument 2. The 'string' might be a string of XML (passed by reference to 355# save memory) or it might be a reference to an IO::Handle. (This 356# non-intuitive mess results in part from the way XML::Parser works but that's 357# really no excuse). 358# 359 360sub build_tree { 361 my $self = shift; 362 my $filename = shift; 363 my $string = shift; 364 365 366 my $preferred_parser = $PREFERRED_PARSER; 367 unless(defined($preferred_parser)) { 368 $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || ''; 369 } 370 if($preferred_parser eq 'XML::Parser') { 371 return($self->build_tree_xml_parser($filename, $string)); 372 } 373 374 eval { require XML::SAX; }; # We didn't need it until now 375 if($@) { # No XML::SAX - fall back to XML::Parser 376 if($preferred_parser) { # unless a SAX parser was expressly requested 377 croak "XMLin() could not load XML::SAX"; 378 } 379 return($self->build_tree_xml_parser($filename, $string)); 380 } 381 382 $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser); 383 384 my $sp = XML::SAX::ParserFactory->parser(Handler => $self); 385 386 $self->{nocollapse} = 1; 387 my($tree); 388 if($filename) { 389 $tree = $sp->parse_uri($filename); 390 } 391 else { 392 if(ref($string) && ref($string) ne 'SCALAR') { 393 $tree = $sp->parse_file($string); 394 } 395 else { 396 $tree = $sp->parse_string($$string); 397 } 398 } 399 400 return($tree); 401} 402 403 404############################################################################## 405# Method: build_tree_xml_parser() 406# 407# This routine will be called if XML::SAX is not installed, or if XML::Parser 408# was specifically requested. It takes the same arguments as build_tree() and 409# returns the same data structure (XML::Parser 'Tree' style). 410# 411 412sub build_tree_xml_parser { 413 my $self = shift; 414 my $filename = shift; 415 my $string = shift; 416 417 418 eval { 419 local($^W) = 0; # Suppress warning from Expat.pm re File::Spec::load() 420 require XML::Parser; # We didn't need it until now 421 }; 422 if($@) { 423 croak "XMLin() requires either XML::SAX or XML::Parser"; 424 } 425 426 if($self->{opt}->{nsexpand}) { 427 carp "'nsexpand' option requires XML::SAX"; 428 } 429 430 my $xp = $self->new_xml_parser(); 431 432 my($tree); 433 if($filename) { 434 # $tree = $xp->parsefile($filename); # Changed due to prob w/mod_perl 435 open(my $xfh, '<', $filename) || croak qq($filename - $!); 436 $tree = $xp->parse($xfh); 437 } 438 else { 439 $tree = $xp->parse($$string); 440 } 441 442 return($tree); 443} 444 445 446############################################################################## 447# Method: new_xml_parser() 448# 449# Simply calls the XML::Parser constructor. Override this method to customise 450# the behaviour of the parser. 451# 452 453sub new_xml_parser { 454 my($self) = @_; 455 456 my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}}); 457 $xp->setHandlers(ExternEnt => sub {return $_[2]}); 458 459 return $xp; 460} 461 462 463############################################################################## 464# Method: cache_write_storable() 465# 466# Wrapper routine for invoking Storable::nstore() to cache a parsed data 467# structure. 468# 469 470sub cache_write_storable { 471 my($self, $data, $filename) = @_; 472 473 my $cachefile = $self->storable_filename($filename); 474 475 require Storable; # We didn't need it until now 476 477 if ('VMS' eq $^O) { 478 Storable::nstore($data, $cachefile); 479 } 480 else { 481 # If the following line fails for you, your Storable.pm is old - upgrade 482 Storable::lock_nstore($data, $cachefile); 483 } 484 485} 486 487 488############################################################################## 489# Method: cache_read_storable() 490# 491# Wrapper routine for invoking Storable::retrieve() to read a cached parsed 492# data structure. Only returns cached data if the cache file exists and is 493# newer than the source XML file. 494# 495 496sub cache_read_storable { 497 my($self, $filename) = @_; 498 499 my $cachefile = $self->storable_filename($filename); 500 501 return unless(-r $cachefile); 502 return unless((stat($cachefile))[9] > (stat($filename))[9]); 503 504 require Storable; # We didn't need it until now 505 506 if ('VMS' eq $^O) { 507 return(Storable::retrieve($cachefile)); 508 } 509 else { 510 return(Storable::lock_retrieve($cachefile)); 511 } 512 513} 514 515 516############################################################################## 517# Method: storable_filename() 518# 519# Translates the supplied source XML filename into a filename for the storable 520# cached data. A '.stor' suffix is added after stripping an optional '.xml' 521# suffix. 522# 523 524sub storable_filename { 525 my($self, $cachefile) = @_; 526 527 $cachefile =~ s{(\.xml)?$}{.stor}; 528 return $cachefile; 529} 530 531 532############################################################################## 533# Method: cache_write_memshare() 534# 535# Takes the supplied data structure reference and stores it away in a global 536# hash structure. 537# 538 539sub cache_write_memshare { 540 my($self, $data, $filename) = @_; 541 542 $MemShareCache{$filename} = [time(), $data]; 543} 544 545 546############################################################################## 547# Method: cache_read_memshare() 548# 549# Takes a filename and looks in a global hash for a cached parsed version. 550# 551 552sub cache_read_memshare { 553 my($self, $filename) = @_; 554 555 return unless($MemShareCache{$filename}); 556 return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]); 557 558 return($MemShareCache{$filename}->[1]); 559 560} 561 562 563############################################################################## 564# Method: cache_write_memcopy() 565# 566# Takes the supplied data structure and stores a copy of it in a global hash 567# structure. 568# 569 570sub cache_write_memcopy { 571 my($self, $data, $filename) = @_; 572 573 require Storable; # We didn't need it until now 574 575 $MemCopyCache{$filename} = [time(), Storable::dclone($data)]; 576} 577 578 579############################################################################## 580# Method: cache_read_memcopy() 581# 582# Takes a filename and looks in a global hash for a cached parsed version. 583# Returns a reference to a copy of that data structure. 584# 585 586sub cache_read_memcopy { 587 my($self, $filename) = @_; 588 589 return unless($MemCopyCache{$filename}); 590 return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]); 591 592 return(Storable::dclone($MemCopyCache{$filename}->[1])); 593 594} 595 596 597############################################################################## 598# Sub/Method: XMLout() 599# 600# Exported routine for 'unslurping' a data structure out to XML. 601# 602# Expects a reference to a data structure and an optional list of option 603# name => value pairs. 604# 605 606sub XMLout { 607 my $self = &_get_object; # note, @_ is passed implicitly 608 609 croak "XMLout() requires at least one argument" unless(@_); 610 my $ref = shift; 611 612 $self->handle_options('out', @_); 613 614 615 # If namespace expansion is set, XML::NamespaceSupport is required 616 617 if($self->{opt}->{nsexpand}) { 618 require XML::NamespaceSupport; 619 $self->{nsup} = XML::NamespaceSupport->new(); 620 $self->{ns_prefix} = 'aaa'; 621 } 622 623 624 # Wrap top level arrayref in a hash 625 626 if(UNIVERSAL::isa($ref, 'ARRAY')) { 627 $ref = { anon => $ref }; 628 } 629 630 631 # Extract rootname from top level hash if keeproot enabled 632 633 if($self->{opt}->{keeproot}) { 634 my(@keys) = keys(%$ref); 635 if(@keys == 1) { 636 $ref = $ref->{$keys[0]}; 637 $self->{opt}->{rootname} = $keys[0]; 638 } 639 } 640 641 # Ensure there are no top level attributes if we're not adding root elements 642 643 elsif($self->{opt}->{rootname} eq '') { 644 if(UNIVERSAL::isa($ref, 'HASH')) { 645 my $refsave = $ref; 646 $ref = {}; 647 foreach (keys(%$refsave)) { 648 if(ref($refsave->{$_})) { 649 $ref->{$_} = $refsave->{$_}; 650 } 651 else { 652 $ref->{$_} = [ $refsave->{$_} ]; 653 } 654 } 655 } 656 } 657 658 659 # Encode the hashref and write to file if necessary 660 661 $self->{_ancestors} = {}; 662 my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, ''); 663 delete $self->{_ancestors}; 664 665 if($self->{opt}->{xmldecl}) { 666 $xml = $self->{opt}->{xmldecl} . "\n" . $xml; 667 } 668 669 if($self->{opt}->{outputfile}) { 670 if(ref($self->{opt}->{outputfile})) { 671 my $fh = $self->{opt}->{outputfile}; 672 if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) { 673 eval { require IO::Handle; }; 674 croak $@ if $@; 675 } 676 return($fh->print($xml)); 677 } 678 else { 679 open(my $out, '>', "$self->{opt}->{outputfile}") || 680 croak "open($self->{opt}->{outputfile}): $!"; 681 binmode($out, ':utf8') if($] >= 5.008); 682 print $out $xml or croak "print: $!"; 683 close $out or croak "close: $!"; 684 } 685 } 686 elsif($self->{opt}->{handler}) { 687 require XML::SAX; 688 my $sp = XML::SAX::ParserFactory->parser( 689 Handler => $self->{opt}->{handler} 690 ); 691 return($sp->parse_string($xml)); 692 } 693 else { 694 return($xml); 695 } 696} 697 698 699############################################################################## 700# Method: handle_options() 701# 702# Helper routine for both XMLin() and XMLout(). Both routines handle their 703# first argument and assume all other args are options handled by this routine. 704# Saves a hash of options in $self->{opt}. 705# 706# If default options were passed to the constructor, they will be retrieved 707# here and merged with options supplied to the method call. 708# 709# First argument should be the string 'in' or the string 'out'. 710# 711# Remaining arguments should be name=>value pairs. Sets up default values 712# for options not supplied. Unrecognised options are a fatal error. 713# 714 715sub handle_options { 716 my $self = shift; 717 my $dirn = shift; 718 719 720 # Determine valid options based on context 721 722 my %known_opt; 723 if($dirn eq 'in') { 724 @known_opt{@KnownOptIn} = @KnownOptIn; 725 } 726 else { 727 @known_opt{@KnownOptOut} = @KnownOptOut; 728 } 729 730 731 # Store supplied options in hashref and weed out invalid ones 732 733 if(@_ % 2) { 734 croak "Options must be name=>value pairs (odd number supplied)"; 735 } 736 my %raw_opt = @_; 737 my $opt = {}; 738 $self->{opt} = $opt; 739 740 while(my($key, $val) = each %raw_opt) { 741 my $lkey = lc($key); 742 $lkey =~ s/_//g; 743 croak "Unrecognised option: $key" unless($known_opt{$lkey}); 744 $opt->{$lkey} = $val; 745 } 746 747 748 # Merge in options passed to constructor 749 750 foreach (keys(%known_opt)) { 751 unless(exists($opt->{$_})) { 752 if(exists($self->{def_opt}->{$_})) { 753 $opt->{$_} = $self->{def_opt}->{$_}; 754 } 755 } 756 } 757 758 759 # Set sensible defaults if not supplied 760 761 if(exists($opt->{rootname})) { 762 unless(defined($opt->{rootname})) { 763 $opt->{rootname} = ''; 764 } 765 } 766 else { 767 $opt->{rootname} = $DefRootName; 768 } 769 770 if($opt->{xmldecl} and $opt->{xmldecl} eq '1') { 771 $opt->{xmldecl} = $DefXmlDecl; 772 } 773 774 if(exists($opt->{contentkey})) { 775 if($opt->{contentkey} =~ m{^-(.*)$}) { 776 $opt->{contentkey} = $1; 777 $opt->{collapseagain} = 1; 778 } 779 } 780 else { 781 $opt->{contentkey} = $DefContentKey; 782 } 783 784 unless(exists($opt->{normalisespace})) { 785 $opt->{normalisespace} = $opt->{normalizespace}; 786 } 787 $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace})); 788 789 # Cleanups for values assumed to be arrays later 790 791 if($opt->{searchpath}) { 792 unless(ref($opt->{searchpath})) { 793 $opt->{searchpath} = [ $opt->{searchpath} ]; 794 } 795 } 796 else { 797 $opt->{searchpath} = [ ]; 798 } 799 800 if($opt->{cache} and !ref($opt->{cache})) { 801 $opt->{cache} = [ $opt->{cache} ]; 802 } 803 if($opt->{cache}) { 804 $_ = lc($_) foreach (@{$opt->{cache}}); 805 foreach my $scheme (@{$opt->{cache}}) { 806 my $method = 'cache_read_' . $scheme; 807 croak "Unsupported caching scheme: $scheme" 808 unless($self->can($method)); 809 } 810 } 811 812 if(exists($opt->{parseropts})) { 813 if(warnings::enabled()) { 814 carp "Warning: " . 815 "'ParserOpts' is deprecated, contact the author if you need it"; 816 } 817 } 818 else { 819 $opt->{parseropts} = [ ]; 820 } 821 822 823 # Special cleanup for {forcearray} which could be regex, arrayref or boolean 824 # or left to default to 0 825 826 if(exists($opt->{forcearray})) { 827 if(ref($opt->{forcearray}) eq 'Regexp') { 828 $opt->{forcearray} = [ $opt->{forcearray} ]; 829 } 830 831 if(ref($opt->{forcearray}) eq 'ARRAY') { 832 my @force_list = @{$opt->{forcearray}}; 833 if(@force_list) { 834 $opt->{forcearray} = {}; 835 foreach my $tag (@force_list) { 836 if(ref($tag) eq 'Regexp') { 837 push @{$opt->{forcearray}->{_regex}}, $tag; 838 } 839 else { 840 $opt->{forcearray}->{$tag} = 1; 841 } 842 } 843 } 844 else { 845 $opt->{forcearray} = 0; 846 } 847 } 848 else { 849 $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 ); 850 } 851 } 852 else { 853 if($opt->{strictmode} and $dirn eq 'in') { 854 croak "No value specified for 'ForceArray' option in call to XML$dirn()"; 855 } 856 $opt->{forcearray} = 0; 857 } 858 859 860 # Special cleanup for {keyattr} which could be arrayref or hashref or left 861 # to default to arrayref 862 863 if(exists($opt->{keyattr})) { 864 if(ref($opt->{keyattr})) { 865 if(ref($opt->{keyattr}) eq 'HASH') { 866 867 # Make a copy so we can mess with it 868 869 $opt->{keyattr} = { %{$opt->{keyattr}} }; 870 871 872 # Convert keyattr => { elem => '+attr' } 873 # to keyattr => { elem => [ 'attr', '+' ] } 874 875 foreach my $el (keys(%{$opt->{keyattr}})) { 876 if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) { 877 $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ]; 878 if($opt->{strictmode} and $dirn eq 'in') { 879 next if($opt->{forcearray} == 1); 880 next if(ref($opt->{forcearray}) eq 'HASH' 881 and $opt->{forcearray}->{$el}); 882 croak "<$el> set in KeyAttr but not in ForceArray"; 883 } 884 } 885 else { 886 delete($opt->{keyattr}->{$el}); # Never reached (famous last words?) 887 } 888 } 889 } 890 else { 891 if(@{$opt->{keyattr}} == 0) { 892 delete($opt->{keyattr}); 893 } 894 } 895 } 896 else { 897 $opt->{keyattr} = [ $opt->{keyattr} ]; 898 } 899 } 900 else { 901 if($opt->{strictmode}) { 902 croak "No value specified for 'KeyAttr' option in call to XML$dirn()"; 903 } 904 $opt->{keyattr} = [ @DefKeyAttr ]; 905 } 906 907 908 # Special cleanup for {valueattr} which could be arrayref or hashref 909 910 if(exists($opt->{valueattr})) { 911 if(ref($opt->{valueattr}) eq 'ARRAY') { 912 $opt->{valueattrlist} = {}; 913 $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} }); 914 } 915 } 916 917 # make sure there's nothing weird in {grouptags} 918 919 if($opt->{grouptags}) { 920 croak "Illegal value for 'GroupTags' option - expected a hashref" 921 unless UNIVERSAL::isa($opt->{grouptags}, 'HASH'); 922 923 while(my($key, $val) = each %{$opt->{grouptags}}) { 924 next if $key ne $val; 925 croak "Bad value in GroupTags: '$key' => '$val'"; 926 } 927 } 928 929 930 # Check the {variables} option is valid and initialise variables hash 931 932 if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) { 933 croak "Illegal value for 'Variables' option - expected a hashref"; 934 } 935 936 if($opt->{variables}) { 937 $self->{_var_values} = { %{$opt->{variables}} }; 938 } 939 elsif($opt->{varattr}) { 940 $self->{_var_values} = {}; 941 } 942 943} 944 945 946############################################################################## 947# Method: find_xml_file() 948# 949# Helper routine for XMLin(). 950# Takes a filename, and a list of directories, attempts to locate the file in 951# the directories listed. 952# Returns a full pathname on success; croaks on failure. 953# 954 955sub find_xml_file { 956 my $self = shift; 957 my $file = shift; 958 my @search_path = @_; 959 960 961 require File::Basename; 962 require File::Spec; 963 964 my($filename, $filedir) = File::Basename::fileparse($file); 965 966 if($filename ne $file) { # Ignore searchpath if dir component 967 return($file) if(-e $file); 968 } 969 else { 970 my($path); 971 foreach $path (@search_path) { 972 my $fullpath = File::Spec->catfile($path, $file); 973 return($fullpath) if(-e $fullpath); 974 } 975 } 976 977 # If user did not supply a search path, default to current directory 978 979 if(!@search_path) { 980 return($file) if(-e $file); 981 croak "File does not exist: $file"; 982 } 983 984 croak "Could not find $file in ", join(':', @search_path); 985} 986 987 988############################################################################## 989# Method: collapse() 990# 991# Helper routine for XMLin(). This routine really comprises the 'smarts' (or 992# value add) of this module. 993# 994# Takes the parse tree that XML::Parser produced from the supplied XML and 995# recurses through it 'collapsing' unnecessary levels of indirection (nested 996# arrays etc) to produce a data structure that is easier to work with. 997# 998# Elements in the original parser tree are represented as an element name 999# followed by an arrayref. The first element of the array is a hashref 1000# containing the attributes. The rest of the array contains a list of any 1001# nested elements as name+arrayref pairs: 1002# 1003# <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ] 1004# 1005# The special element name '0' (zero) flags text content. 1006# 1007# This routine cuts down the noise by discarding any text content consisting of 1008# only whitespace and then moves the nested elements into the attribute hash 1009# using the name of the nested element as the hash key and the collapsed 1010# version of the nested element as the value. Multiple nested elements with 1011# the same name will initially be represented as an arrayref, but this may be 1012# 'folded' into a hashref depending on the value of the keyattr option. 1013# 1014 1015sub collapse { 1016 my $self = shift; 1017 1018 1019 # Start with the hash of attributes 1020 1021 my $attr = shift; 1022 if($self->{opt}->{noattr}) { # Discard if 'noattr' set 1023 $attr = $self->new_hashref; 1024 } 1025 elsif($self->{opt}->{normalisespace} == 2) { 1026 while(my($key, $value) = each %$attr) { 1027 $attr->{$key} = $self->normalise_space($value) 1028 } 1029 } 1030 1031 1032 # Do variable substitutions 1033 1034 if(my $var = $self->{_var_values}) { 1035 while(my($key, $val) = each(%$attr)) { 1036 $val =~ s^\$\{([\w.]+)\}^ $self->get_var($1) ^ge; 1037 $attr->{$key} = $val; 1038 } 1039 } 1040 1041 1042 # Roll up 'value' attributes (but only if no nested elements) 1043 1044 if(!@_ and keys %$attr == 1) { 1045 my($k) = keys %$attr; 1046 if($self->{opt}->{valueattrlist} and $self->{opt}->{valueattrlist}->{$k}) { 1047 return $attr->{$k}; 1048 } 1049 } 1050 1051 1052 # Add any nested elements 1053 1054 my($key, $val); 1055 while(@_) { 1056 $key = shift; 1057 $val = shift; 1058 $val = '' if not defined $val; 1059 1060 if(ref($val)) { 1061 $val = $self->collapse(@$val); 1062 next if(!defined($val) and $self->{opt}->{suppressempty}); 1063 } 1064 elsif($key eq '0') { 1065 next if($val =~ m{^\s*$}s); # Skip all whitespace content 1066 1067 $val = $self->normalise_space($val) 1068 if($self->{opt}->{normalisespace} == 2); 1069 1070 # do variable substitutions 1071 1072 if(my $var = $self->{_var_values}) { 1073 $val =~ s^\$\{(\w+)\}^ $self->get_var($1) ^ge; 1074 } 1075 1076 1077 # look for variable definitions 1078 1079 if(my $var = $self->{opt}->{varattr}) { 1080 if(exists $attr->{$var}) { 1081 $self->set_var($attr->{$var}, $val); 1082 } 1083 } 1084 1085 1086 # Collapse text content in element with no attributes to a string 1087 1088 if(!%$attr and !@_) { 1089 return($self->{opt}->{forcecontent} ? 1090 { $self->{opt}->{contentkey} => $val } : $val 1091 ); 1092 } 1093 $key = $self->{opt}->{contentkey}; 1094 } 1095 1096 1097 # Combine duplicate attributes into arrayref if required 1098 1099 if(exists($attr->{$key})) { 1100 if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) { 1101 push(@{$attr->{$key}}, $val); 1102 } 1103 else { 1104 $attr->{$key} = [ $attr->{$key}, $val ]; 1105 } 1106 } 1107 elsif(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { 1108 $attr->{$key} = [ $val ]; 1109 } 1110 else { 1111 if( $key ne $self->{opt}->{contentkey} 1112 and ( 1113 ($self->{opt}->{forcearray} == 1) 1114 or ( 1115 (ref($self->{opt}->{forcearray}) eq 'HASH') 1116 and ( 1117 $self->{opt}->{forcearray}->{$key} 1118 or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}}) 1119 ) 1120 ) 1121 ) 1122 ) { 1123 $attr->{$key} = [ $val ]; 1124 } 1125 else { 1126 $attr->{$key} = $val; 1127 } 1128 } 1129 1130 } 1131 1132 1133 # Turn arrayrefs into hashrefs if key fields present 1134 1135 if($self->{opt}->{keyattr}) { 1136 while(($key,$val) = each %$attr) { 1137 if(defined($val) and UNIVERSAL::isa($val, 'ARRAY')) { 1138 $attr->{$key} = $self->array_to_hash($key, $val); 1139 } 1140 } 1141 } 1142 1143 1144 # disintermediate grouped tags 1145 1146 if($self->{opt}->{grouptags}) { 1147 while(my($key, $val) = each(%$attr)) { 1148 next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); 1149 next unless(exists($self->{opt}->{grouptags}->{$key})); 1150 1151 my($child_key, $child_val) = %$val; 1152 1153 if($self->{opt}->{grouptags}->{$key} eq $child_key) { 1154 $attr->{$key}= $child_val; 1155 } 1156 } 1157 } 1158 1159 1160 # Fold hashes containing a single anonymous array up into just the array 1161 1162 my $count = scalar keys %$attr; 1163 if($count == 1 1164 and exists $attr->{anon} 1165 and UNIVERSAL::isa($attr->{anon}, 'ARRAY') 1166 ) { 1167 return($attr->{anon}); 1168 } 1169 1170 1171 # Do the right thing if hash is empty, otherwise just return it 1172 1173 if(!%$attr and exists($self->{opt}->{suppressempty})) { 1174 if(defined($self->{opt}->{suppressempty}) and 1175 $self->{opt}->{suppressempty} eq '') { 1176 return(''); 1177 } 1178 return(undef); 1179 } 1180 1181 1182 # Roll up named elements with named nested 'value' attributes 1183 1184 if($self->{opt}->{valueattr}) { 1185 while(my($key, $val) = each(%$attr)) { 1186 next unless($self->{opt}->{valueattr}->{$key}); 1187 next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1)); 1188 my($k) = keys %$val; 1189 next unless($k eq $self->{opt}->{valueattr}->{$key}); 1190 $attr->{$key} = $val->{$k}; 1191 } 1192 } 1193 1194 return($attr) 1195 1196} 1197 1198 1199############################################################################## 1200# Method: set_var() 1201# 1202# Called when a variable definition is encountered in the XML. (A variable 1203# definition looks like <element attrname="name">value</element> where attrname 1204# matches the varattr setting). 1205# 1206 1207sub set_var { 1208 my($self, $name, $value) = @_; 1209 1210 $self->{_var_values}->{$name} = $value; 1211} 1212 1213 1214############################################################################## 1215# Method: get_var() 1216# 1217# Called during variable substitution to get the value for the named variable. 1218# 1219 1220sub get_var { 1221 my($self, $name) = @_; 1222 1223 my $value = $self->{_var_values}->{$name}; 1224 return $value if(defined($value)); 1225 1226 return '${' . $name . '}'; 1227} 1228 1229 1230############################################################################## 1231# Method: normalise_space() 1232# 1233# Strips leading and trailing whitespace and collapses sequences of whitespace 1234# characters to a single space. 1235# 1236 1237sub normalise_space { 1238 my($self, $text) = @_; 1239 1240 $text =~ s/^\s+//s; 1241 $text =~ s/\s+$//s; 1242 $text =~ s/\s\s+/ /sg; 1243 1244 return $text; 1245} 1246 1247 1248############################################################################## 1249# Method: array_to_hash() 1250# 1251# Helper routine for collapse(). 1252# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a 1253# reference to the hash on success or the original array if folding is 1254# not possible. Behaviour is controlled by 'keyattr' option. 1255# 1256 1257sub array_to_hash { 1258 my $self = shift; 1259 my $name = shift; 1260 my $arrayref = shift; 1261 1262 my $hashref = $self->new_hashref; 1263 1264 my($i, $key, $val, $flag); 1265 1266 1267 # Handle keyattr => { .... } 1268 1269 if(ref($self->{opt}->{keyattr}) eq 'HASH') { 1270 return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name})); 1271 ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}}; 1272 for($i = 0; $i < @$arrayref; $i++) { 1273 if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and 1274 exists($arrayref->[$i]->{$key}) 1275 ) { 1276 $val = $arrayref->[$i]->{$key}; 1277 if(ref($val)) { 1278 $self->die_or_warn("<$name> element has non-scalar '$key' key attribute"); 1279 return($arrayref); 1280 } 1281 $val = $self->normalise_space($val) 1282 if($self->{opt}->{normalisespace} == 1); 1283 $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") 1284 if(exists($hashref->{$val})); 1285 $hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} ); 1286 $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-'); 1287 delete $hashref->{$val}->{$key} unless($flag eq '+'); 1288 } 1289 else { 1290 $self->die_or_warn("<$name> element has no '$key' key attribute"); 1291 return($arrayref); 1292 } 1293 } 1294 } 1295 1296 1297 # Or assume keyattr => [ .... ] 1298 1299 else { 1300 my $default_keys = 1301 join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}}); 1302 1303 ELEMENT: for($i = 0; $i < @$arrayref; $i++) { 1304 return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH')); 1305 1306 foreach $key (@{$self->{opt}->{keyattr}}) { 1307 if(defined($arrayref->[$i]->{$key})) { 1308 $val = $arrayref->[$i]->{$key}; 1309 if(ref($val)) { 1310 $self->die_or_warn("<$name> element has non-scalar '$key' key attribute") 1311 if not $default_keys; 1312 return($arrayref); 1313 } 1314 $val = $self->normalise_space($val) 1315 if($self->{opt}->{normalisespace} == 1); 1316 $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val") 1317 if(exists($hashref->{$val})); 1318 $hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} ); 1319 delete $hashref->{$val}->{$key}; 1320 next ELEMENT; 1321 } 1322 } 1323 1324 return($arrayref); # No keyfield matched 1325 } 1326 } 1327 1328 # collapse any hashes which now only have a 'content' key 1329 1330 if($self->{opt}->{collapseagain}) { 1331 $hashref = $self->collapse_content($hashref); 1332 } 1333 1334 return($hashref); 1335} 1336 1337 1338############################################################################## 1339# Method: die_or_warn() 1340# 1341# Takes a diagnostic message and does one of three things: 1342# 1. dies if strict mode is enabled 1343# 2. warns if warnings are enabled but strict mode is not 1344# 3. ignores message and returns silently if neither strict mode nor warnings 1345# are enabled 1346# 1347 1348sub die_or_warn { 1349 my $self = shift; 1350 my $msg = shift; 1351 1352 croak $msg if($self->{opt}->{strictmode}); 1353 if(warnings::enabled()) { 1354 carp "Warning: $msg"; 1355 } 1356} 1357 1358 1359############################################################################## 1360# Method: new_hashref() 1361# 1362# This is a hook routine for overriding in a sub-class. Some people believe 1363# that using Tie::IxHash here will solve order-loss problems. 1364# 1365 1366sub new_hashref { 1367 my $self = shift; 1368 1369 return { @_ }; 1370} 1371 1372 1373############################################################################## 1374# Method: collapse_content() 1375# 1376# Helper routine for array_to_hash 1377# 1378# Arguments expected are: 1379# - an XML::Simple object 1380# - a hashref 1381# the hashref is a former array, turned into a hash by array_to_hash because 1382# of the presence of key attributes 1383# at this point collapse_content avoids over-complicated structures like 1384# dir => { libexecdir => { content => '$exec_prefix/libexec' }, 1385# localstatedir => { content => '$prefix' }, 1386# } 1387# into 1388# dir => { libexecdir => '$exec_prefix/libexec', 1389# localstatedir => '$prefix', 1390# } 1391 1392sub collapse_content { 1393 my $self = shift; 1394 my $hashref = shift; 1395 1396 my $contentkey = $self->{opt}->{contentkey}; 1397 1398 # first go through the values,checking that they are fit to collapse 1399 foreach my $val (values %$hashref) { 1400 return $hashref unless ( (ref($val) eq 'HASH') 1401 and (keys %$val == 1) 1402 and (exists $val->{$contentkey}) 1403 ); 1404 } 1405 1406 # now collapse them 1407 foreach my $key (keys %$hashref) { 1408 $hashref->{$key}= $hashref->{$key}->{$contentkey}; 1409 } 1410 1411 return $hashref; 1412} 1413 1414 1415############################################################################## 1416# Method: value_to_xml() 1417# 1418# Helper routine for XMLout() - recurses through a data structure building up 1419# and returning an XML representation of that structure as a string. 1420# 1421# Arguments expected are: 1422# - the data structure to be encoded (usually a reference) 1423# - the XML tag name to use for this item 1424# - a string of spaces for use as the current indent level 1425# 1426 1427sub value_to_xml { 1428 my $self = shift;; 1429 1430 1431 # Grab the other arguments 1432 1433 my($ref, $name, $indent) = @_; 1434 1435 my $named = (defined($name) and $name ne '' ? 1 : 0); 1436 1437 my $nl = "\n"; 1438 1439 my $is_root = $indent eq '' ? 1 : 0; # Warning, dirty hack! 1440 if($self->{opt}->{noindent}) { 1441 $indent = ''; 1442 $nl = ''; 1443 } 1444 1445 1446 # Convert to XML 1447 1448 my $refaddr = Scalar::Util::refaddr($ref); 1449 if($refaddr) { 1450 croak "circular data structures not supported" 1451 if $self->{_ancestors}->{$refaddr}; 1452 $self->{_ancestors}->{$refaddr} = $ref; # keep ref alive until we delete it 1453 } 1454 else { 1455 if($named) { 1456 return(join('', 1457 $indent, '<', $name, '>', 1458 ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)), 1459 '</', $name, ">", $nl 1460 )); 1461 } 1462 else { 1463 return("$ref$nl"); 1464 } 1465 } 1466 1467 1468 # Unfold hash to array if possible 1469 1470 if(UNIVERSAL::isa($ref, 'HASH') # It is a hash 1471 and keys %$ref # and it's not empty 1472 and $self->{opt}->{keyattr} # and folding is enabled 1473 and !$is_root # and its not the root element 1474 ) { 1475 $ref = $self->hash_to_array($name, $ref); 1476 } 1477 1478 1479 my @result = (); 1480 my($key, $value); 1481 1482 1483 # Handle hashrefs 1484 1485 if(UNIVERSAL::isa($ref, 'HASH')) { 1486 1487 # Reintermediate grouped values if applicable 1488 1489 if($self->{opt}->{grouptags}) { 1490 $ref = $self->copy_hash($ref); 1491 while(my($key, $val) = each %$ref) { 1492 if($self->{opt}->{grouptags}->{$key}) { 1493 $ref->{$key} = $self->new_hashref( 1494 $self->{opt}->{grouptags}->{$key} => $val 1495 ); 1496 } 1497 } 1498 } 1499 1500 1501 # Scan for namespace declaration attributes 1502 1503 my $nsdecls = ''; 1504 my $default_ns_uri; 1505 if($self->{nsup}) { 1506 $ref = $self->copy_hash($ref); 1507 $self->{nsup}->push_context(); 1508 1509 # Look for default namespace declaration first 1510 1511 if(exists($ref->{xmlns})) { 1512 $self->{nsup}->declare_prefix('', $ref->{xmlns}); 1513 $nsdecls .= qq( xmlns="$ref->{xmlns}"); 1514 delete($ref->{xmlns}); 1515 } 1516 $default_ns_uri = $self->{nsup}->get_uri(''); 1517 1518 1519 # Then check all the other keys 1520 1521 foreach my $qname (keys(%$ref)) { 1522 my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); 1523 if($uri) { 1524 if($uri eq $xmlns_ns) { 1525 $self->{nsup}->declare_prefix($lname, $ref->{$qname}); 1526 $nsdecls .= qq( xmlns:$lname="$ref->{$qname}"); 1527 delete($ref->{$qname}); 1528 } 1529 } 1530 } 1531 1532 # Translate any remaining Clarkian names 1533 1534 foreach my $qname (keys(%$ref)) { 1535 my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname); 1536 if($uri) { 1537 if($default_ns_uri and $uri eq $default_ns_uri) { 1538 $ref->{$lname} = $ref->{$qname}; 1539 delete($ref->{$qname}); 1540 } 1541 else { 1542 my $prefix = $self->{nsup}->get_prefix($uri); 1543 unless($prefix) { 1544 # $self->{nsup}->declare_prefix(undef, $uri); 1545 # $prefix = $self->{nsup}->get_prefix($uri); 1546 $prefix = $self->{ns_prefix}++; 1547 $self->{nsup}->declare_prefix($prefix, $uri); 1548 $nsdecls .= qq( xmlns:$prefix="$uri"); 1549 } 1550 $ref->{"$prefix:$lname"} = $ref->{$qname}; 1551 delete($ref->{$qname}); 1552 } 1553 } 1554 } 1555 } 1556 1557 1558 my @nested = (); 1559 my $text_content = undef; 1560 if($named) { 1561 push @result, $indent, '<', $name, $nsdecls; 1562 } 1563 1564 if(keys %$ref) { 1565 my $first_arg = 1; 1566 foreach my $key ($self->sorted_keys($name, $ref)) { 1567 my $value = $ref->{$key}; 1568 next if(substr($key, 0, 1) eq '-'); 1569 if(!defined($value)) { 1570 next if $self->{opt}->{suppressempty}; 1571 unless(exists($self->{opt}->{suppressempty}) 1572 and !defined($self->{opt}->{suppressempty}) 1573 ) { 1574 carp 'Use of uninitialized value' if warnings::enabled(); 1575 } 1576 if($key eq $self->{opt}->{contentkey}) { 1577 $text_content = ''; 1578 } 1579 else { 1580 $value = exists($self->{opt}->{suppressempty}) ? {} : ''; 1581 } 1582 } 1583 1584 if(!ref($value) 1585 and $self->{opt}->{valueattr} 1586 and $self->{opt}->{valueattr}->{$key} 1587 ) { 1588 $value = $self->new_hashref( 1589 $self->{opt}->{valueattr}->{$key} => $value 1590 ); 1591 } 1592 1593 if(ref($value) or $self->{opt}->{noattr}) { 1594 push @nested, 1595 $self->value_to_xml($value, $key, "$indent "); 1596 } 1597 else { 1598 if($key eq $self->{opt}->{contentkey}) { 1599 $value = $self->escape_value($value) unless($self->{opt}->{noescape}); 1600 $text_content = $value; 1601 } 1602 else { 1603 $value = $self->escape_attr($value) unless($self->{opt}->{noescape}); 1604 push @result, "\n$indent " . ' ' x length($name) 1605 if($self->{opt}->{attrindent} and !$first_arg); 1606 push @result, ' ', $key, '="', $value , '"'; 1607 $first_arg = 0; 1608 } 1609 } 1610 } 1611 } 1612 else { 1613 $text_content = ''; 1614 } 1615 1616 if(@nested or defined($text_content)) { 1617 if($named) { 1618 push @result, ">"; 1619 if(defined($text_content)) { 1620 push @result, $text_content; 1621 $nested[0] =~ s/^\s+// if(@nested); 1622 } 1623 else { 1624 push @result, $nl; 1625 } 1626 if(@nested) { 1627 push @result, @nested, $indent; 1628 } 1629 push @result, '</', $name, ">", $nl; 1630 } 1631 else { 1632 push @result, @nested; # Special case if no root elements 1633 } 1634 } 1635 else { 1636 push @result, " />", $nl; 1637 } 1638 $self->{nsup}->pop_context() if($self->{nsup}); 1639 } 1640 1641 1642 # Handle arrayrefs 1643 1644 elsif(UNIVERSAL::isa($ref, 'ARRAY')) { 1645 foreach $value (@$ref) { 1646 next if !defined($value) and $self->{opt}->{suppressempty}; 1647 if(!ref($value)) { 1648 push @result, 1649 $indent, '<', $name, '>', 1650 ($self->{opt}->{noescape} ? $value : $self->escape_value($value)), 1651 '</', $name, ">$nl"; 1652 } 1653 elsif(UNIVERSAL::isa($value, 'HASH')) { 1654 push @result, $self->value_to_xml($value, $name, $indent); 1655 } 1656 else { 1657 push @result, 1658 $indent, '<', $name, ">$nl", 1659 $self->value_to_xml($value, 'anon', "$indent "), 1660 $indent, '</', $name, ">$nl"; 1661 } 1662 } 1663 } 1664 1665 else { 1666 croak "Can't encode a value of type: " . ref($ref); 1667 } 1668 1669 1670 delete $self->{_ancestors}->{$refaddr}; 1671 1672 return(join('', @result)); 1673} 1674 1675 1676############################################################################## 1677# Method: sorted_keys() 1678# 1679# Returns the keys of the referenced hash sorted into alphabetical order, but 1680# with the 'key' key (as in KeyAttr) first, if there is one. 1681# 1682 1683sub sorted_keys { 1684 my($self, $name, $ref) = @_; 1685 1686 return keys %$ref if $self->{opt}->{nosort}; 1687 1688 my %hash = %$ref; 1689 my $keyattr = $self->{opt}->{keyattr}; 1690 1691 my @key; 1692 1693 if(ref $keyattr eq 'HASH') { 1694 if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) { 1695 push @key, $keyattr->{$name}->[0]; 1696 delete $hash{$keyattr->{$name}->[0]}; 1697 } 1698 } 1699 elsif(ref $keyattr eq 'ARRAY') { 1700 foreach (@{$keyattr}) { 1701 if(exists $hash{$_}) { 1702 push @key, $_; 1703 delete $hash{$_}; 1704 last; 1705 } 1706 } 1707 } 1708 1709 return(@key, sort keys %hash); 1710} 1711 1712############################################################################## 1713# Method: escape_value() 1714# 1715# Helper routine for automatically escaping values for XMLout(). 1716# Expects a scalar data value. Returns escaped version. 1717# 1718 1719sub escape_value { 1720 my($self, $data) = @_; 1721 1722 return '' unless(defined($data)); 1723 1724 $data =~ s/&/&/sg; 1725 $data =~ s/</</sg; 1726 $data =~ s/>/>/sg; 1727 $data =~ s/"/"/sg; 1728 1729 my $level = $self->{opt}->{numericescape} or return $data; 1730 1731 return $self->numeric_escape($data, $level); 1732} 1733 1734sub numeric_escape { 1735 my($self, $data, $level) = @_; 1736 1737 if($self->{opt}->{numericescape} eq '2') { 1738 $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse; 1739 } 1740 else { 1741 $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse; 1742 } 1743 1744 return $data; 1745} 1746 1747############################################################################## 1748# Method: escape_attr() 1749# 1750# Helper routine for escaping attribute values. Defaults to escape_value(), 1751# but may be overridden by a subclass to customise behaviour. 1752# 1753 1754sub escape_attr { 1755 my $self = shift; 1756 1757 return $self->escape_value(@_); 1758} 1759 1760 1761############################################################################## 1762# Method: hash_to_array() 1763# 1764# Helper routine for value_to_xml(). 1765# Attempts to 'unfold' a hash of hashes into an array of hashes. Returns a 1766# reference to the array on success or the original hash if unfolding is 1767# not possible. 1768# 1769 1770sub hash_to_array { 1771 my $self = shift; 1772 my $parent = shift; 1773 my $hashref = shift; 1774 1775 my $arrayref = []; 1776 1777 my($key, $value); 1778 1779 my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref; 1780 foreach $key (@keys) { 1781 $value = $hashref->{$key}; 1782 return($hashref) unless(UNIVERSAL::isa($value, 'HASH')); 1783 1784 if(ref($self->{opt}->{keyattr}) eq 'HASH') { 1785 return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent})); 1786 push @$arrayref, $self->copy_hash( 1787 $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key 1788 ); 1789 } 1790 else { 1791 push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value }); 1792 } 1793 } 1794 1795 return($arrayref); 1796} 1797 1798 1799############################################################################## 1800# Method: copy_hash() 1801# 1802# Helper routine for hash_to_array(). When unfolding a hash of hashes into 1803# an array of hashes, we need to copy the key from the outer hash into the 1804# inner hash. This routine makes a copy of the original hash so we don't 1805# destroy the original data structure. You might wish to override this 1806# method if you're using tied hashes and don't want them to get untied. 1807# 1808 1809sub copy_hash { 1810 my($self, $orig, @extra) = @_; 1811 1812 return { @extra, %$orig }; 1813} 1814 1815############################################################################## 1816# Methods required for building trees from SAX events 1817############################################################################## 1818 1819sub start_document { 1820 my $self = shift; 1821 1822 $self->handle_options('in') unless($self->{opt}); 1823 1824 $self->{lists} = []; 1825 $self->{curlist} = $self->{tree} = []; 1826} 1827 1828 1829sub start_element { 1830 my $self = shift; 1831 my $element = shift; 1832 1833 my $name = $element->{Name}; 1834 if($self->{opt}->{nsexpand}) { 1835 $name = $element->{LocalName} || ''; 1836 if($element->{NamespaceURI}) { 1837 $name = '{' . $element->{NamespaceURI} . '}' . $name; 1838 } 1839 } 1840 my $attributes = {}; 1841 if($element->{Attributes}) { # Might be undef 1842 foreach my $attr (values %{$element->{Attributes}}) { 1843 if($self->{opt}->{nsexpand}) { 1844 my $name = $attr->{LocalName} || ''; 1845 if($attr->{NamespaceURI}) { 1846 $name = '{' . $attr->{NamespaceURI} . '}' . $name 1847 } 1848 $name = 'xmlns' if($name eq $bad_def_ns_jcn); 1849 $attributes->{$name} = $attr->{Value}; 1850 } 1851 else { 1852 $attributes->{$attr->{Name}} = $attr->{Value}; 1853 } 1854 } 1855 } 1856 my $newlist = [ $attributes ]; 1857 push @{ $self->{lists} }, $self->{curlist}; 1858 push @{ $self->{curlist} }, $name => $newlist; 1859 $self->{curlist} = $newlist; 1860} 1861 1862 1863sub characters { 1864 my $self = shift; 1865 my $chars = shift; 1866 1867 my $text = $chars->{Data}; 1868 my $clist = $self->{curlist}; 1869 my $pos = $#$clist; 1870 1871 if ($pos > 0 and $clist->[$pos - 1] eq '0') { 1872 $clist->[$pos] .= $text; 1873 } 1874 else { 1875 push @$clist, 0 => $text; 1876 } 1877} 1878 1879 1880sub end_element { 1881 my $self = shift; 1882 1883 $self->{curlist} = pop @{ $self->{lists} }; 1884} 1885 1886 1887sub end_document { 1888 my $self = shift; 1889 1890 delete($self->{curlist}); 1891 delete($self->{lists}); 1892 1893 my $tree = $self->{tree}; 1894 delete($self->{tree}); 1895 1896 1897 # Return tree as-is to XMLin() 1898 1899 return($tree) if($self->{nocollapse}); 1900 1901 1902 # Or collapse it before returning it to SAX parser class 1903 1904 if($self->{opt}->{keeproot}) { 1905 $tree = $self->collapse({}, @$tree); 1906 } 1907 else { 1908 $tree = $self->collapse(@{$tree->[1]}); 1909 } 1910 1911 if($self->{opt}->{datahandler}) { 1912 return($self->{opt}->{datahandler}->($self, $tree)); 1913 } 1914 1915 return($tree); 1916} 1917 1918*xml_in = \&XMLin; 1919*xml_out = \&XMLout; 1920 19211; 1922 1923__END__ 1924 1925=head1 STATUS OF THIS MODULE 1926 1927The use of this module in new code is B<strongly discouraged>. Other modules 1928are available which provide more straightforward and consistent interfaces. In 1929particular, L<XML::LibXML> is highly recommended and you can refer to 1930L<Perl XML::LibXML by Example|http://grantm.github.io/perl-libxml-by-example/> 1931for a tutorial introduction. 1932 1933L<XML::Twig> is another excellent alternative. 1934 1935The major problems with this module are the large number of options (some of 1936which have unfortunate defaults) and the arbitrary ways in which these options 1937interact - often producing unexpected results. 1938 1939Patches with bug fixes and documentation fixes are welcome, but new features 1940are unlikely to be added. 1941 1942=head1 QUICK START 1943 1944Say you have a script called B<foo> and a file of configuration options 1945called B<foo.xml> containing the following: 1946 1947 <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug"> 1948 <server name="sahara" osname="solaris" osversion="2.6"> 1949 <address>10.0.0.101</address> 1950 <address>10.0.1.101</address> 1951 </server> 1952 <server name="gobi" osname="irix" osversion="6.5"> 1953 <address>10.0.0.102</address> 1954 </server> 1955 <server name="kalahari" osname="linux" osversion="2.0.34"> 1956 <address>10.0.0.103</address> 1957 <address>10.0.1.103</address> 1958 </server> 1959 </config> 1960 1961The following lines of code in B<foo>: 1962 1963 use XML::Simple qw(:strict); 1964 1965 my $config = XMLin(undef, KeyAttr => { server => 'name' }, ForceArray => [ 'server', 'address' ]); 1966 1967will 'slurp' the configuration options into the hashref $config (because no 1968filename or XML string was passed as the first argument to C<XMLin()> the name 1969and location of the XML file will be inferred from name and location of the 1970script). You can dump out the contents of the hashref using Data::Dumper: 1971 1972 use Data::Dumper; 1973 1974 print Dumper($config); 1975 1976which will produce something like this (formatting has been adjusted for 1977brevity): 1978 1979 { 1980 'logdir' => '/var/log/foo/', 1981 'debugfile' => '/tmp/foo.debug', 1982 'server' => { 1983 'sahara' => { 1984 'osversion' => '2.6', 1985 'osname' => 'solaris', 1986 'address' => [ '10.0.0.101', '10.0.1.101' ] 1987 }, 1988 'gobi' => { 1989 'osversion' => '6.5', 1990 'osname' => 'irix', 1991 'address' => [ '10.0.0.102' ] 1992 }, 1993 'kalahari' => { 1994 'osversion' => '2.0.34', 1995 'osname' => 'linux', 1996 'address' => [ '10.0.0.103', '10.0.1.103' ] 1997 } 1998 } 1999 } 2000 2001Your script could then access the name of the log directory like this: 2002 2003 print $config->{logdir}; 2004 2005similarly, the second address on the server 'kalahari' could be referenced as: 2006 2007 print $config->{server}->{kalahari}->{address}->[1]; 2008 2009Note: If the mapping between the output of Data::Dumper and the print 2010statements above is not obvious to you, then please refer to the 'references' 2011tutorial (AKA: "Mark's very short tutorial about references") at L<perlreftut>. 2012 2013In this example, the C<< ForceArray >> option was used to list elements that 2014might occur multiple times and should therefore be represented as arrayrefs 2015(even when only one element is present). 2016 2017The C<< KeyAttr >> option was used to indicate that each C<< <server> >> 2018element has a unique identifier in the C<< name >> attribute. This allows you 2019to index directly to a particular server record using the name as a hash key 2020(as shown above). 2021 2022For simple requirements, that's really all there is to it. If you want to 2023store your XML in a different directory or file, or pass it in as a string or 2024even pass it in via some derivative of an IO::Handle, you'll need to check out 2025L<"OPTIONS">. If you want to turn off or tweak the array folding feature (that 2026neat little transformation that produced $config->{server}) you'll find options 2027for that as well. 2028 2029If you want to generate XML (for example to write a modified version of 2030$config back out as XML), check out C<XMLout()>. 2031 2032If your needs are not so simple, this may not be the module for you. In that 2033case, you might want to read L<"WHERE TO FROM HERE?">. 2034 2035=head1 DESCRIPTION 2036 2037The XML::Simple module provides a simple API layer on top of an underlying XML 2038parsing module (either XML::Parser or one of the SAX2 parser modules). Two 2039functions are exported: C<XMLin()> and C<XMLout()>. Note: you can explicitly 2040request the lower case versions of the function names: C<xml_in()> and 2041C<xml_out()>. 2042 2043The simplest approach is to call these two functions directly, but an 2044optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below) 2045allows them to be called as methods of an B<XML::Simple> object. The object 2046interface can also be used at either end of a SAX pipeline. 2047 2048=head2 XMLin() 2049 2050Parses XML formatted data and returns a reference to a data structure which 2051contains the same information in a more readily accessible form. (Skip 2052down to L<"EXAMPLES"> below, for more sample code). 2053 2054C<XMLin()> accepts an optional XML specifier followed by zero or more 'name => 2055value' option pairs. The XML specifier can be one of the following: 2056 2057=over 4 2058 2059=item A filename 2060 2061If the filename contains no directory components C<XMLin()> will look for the 2062file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the 2063current directory if the SearchPath option is not defined. eg: 2064 2065 $ref = XMLin('/etc/params.xml'); 2066 2067Note, the filename '-' can be used to parse from STDIN. 2068 2069=item undef 2070 2071If there is no XML specifier, C<XMLin()> will check the script directory and 2072each of the SearchPath directories for a file with the same name as the script 2073but with the extension '.xml'. Note: if you wish to specify options, you 2074must specify the value 'undef'. eg: 2075 2076 $ref = XMLin(undef, ForceArray => 1); 2077 2078=item A string of XML 2079 2080A string containing XML (recognised by the presence of '<' and '>' characters) 2081will be parsed directly. eg: 2082 2083 $ref = XMLin('<opt username="bob" password="flurp" />'); 2084 2085=item An IO::Handle object 2086 2087An IO::Handle object will be read to EOF and its contents parsed. eg: 2088 2089 $fh = IO::File->new('/etc/params.xml'); 2090 $ref = XMLin($fh); 2091 2092=back 2093 2094=head2 XMLout() 2095 2096Takes a data structure (generally a hashref) and returns an XML encoding of 2097that structure. If the resulting XML is parsed using C<XMLin()>, it should 2098return a data structure equivalent to the original (see caveats below). 2099 2100The C<XMLout()> function can also be used to output the XML as SAX events 2101see the C<Handler> option and L<"SAX SUPPORT"> for more details). 2102 2103When translating hashes to XML, hash keys which have a leading '-' will be 2104silently skipped. This is the approved method for marking elements of a 2105data structure which should be ignored by C<XMLout>. (Note: If these items 2106were not skipped the key names would be emitted as element or attribute names 2107with a leading '-' which would not be valid XML). 2108 2109=head2 Caveats 2110 2111Some care is required in creating data structures which will be passed to 2112C<XMLout()>. Hash keys from the data structure will be encoded as either XML 2113element names or attribute names. Therefore, you should use hash key names 2114which conform to the relatively strict XML naming rules: 2115 2116Names in XML must begin with a letter. The remaining characters may be 2117letters, digits, hyphens (-), underscores (_) or full stops (.). It is also 2118allowable to include one colon (:) in an element name but this should only be 2119used when working with namespaces (B<XML::Simple> can only usefully work with 2120namespaces when teamed with a SAX Parser). 2121 2122You can use other punctuation characters in hash values (just not in hash 2123keys) however B<XML::Simple> does not support dumping binary data. 2124 2125If you break these rules, the current implementation of C<XMLout()> will 2126simply emit non-compliant XML which will be rejected if you try to read it 2127back in. (A later version of B<XML::Simple> might take a more proactive 2128approach). 2129 2130Note also that although you can nest hashes and arrays to arbitrary levels, 2131circular data structures are not supported and will cause C<XMLout()> to die. 2132 2133If you wish to 'round-trip' arbitrary data structures from Perl to XML and back 2134to Perl, then you should probably disable array folding (using the KeyAttr 2135option) both with C<XMLout()> and with C<XMLin()>. If you still don't get the 2136expected results, you may prefer to use L<XML::Dumper> which is designed for 2137exactly that purpose. 2138 2139Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs. 2140 2141 2142=head1 OPTIONS 2143 2144B<XML::Simple> supports a number of options (in fact as each release of 2145B<XML::Simple> adds more options, the module's claim to the name 'Simple' 2146becomes increasingly tenuous). If you find yourself repeatedly having to 2147specify the same options, you might like to investigate L<"OPTIONAL OO 2148INTERFACE"> below. 2149 2150If you can't be bothered reading the documentation, refer to 2151L<"STRICT MODE"> to automatically catch common mistakes. 2152 2153Because there are so many options, it's hard for new users to know which ones 2154are important, so here are the two you really need to know about: 2155 2156=over 4 2157 2158=item * 2159 2160check out C<ForceArray> because you'll almost certainly want to turn it on 2161 2162=item * 2163 2164make sure you know what the C<KeyAttr> option does and what its default value is 2165because it may surprise you otherwise (note in particular that 'KeyAttr' 2166affects both C<XMLin> and C<XMLout>) 2167 2168=back 2169 2170The option name headings below have a trailing 'comment' - a hash followed by 2171two pieces of metadata: 2172 2173=over 4 2174 2175=item * 2176 2177Options are marked with 'I<in>' if they are recognised by C<XMLin()> and 2178'I<out>' if they are recognised by C<XMLout()>. 2179 2180=item * 2181 2182Each option is also flagged to indicate whether it is: 2183 2184 'important' - don't use the module until you understand this one 2185 'handy' - you can skip this on the first time through 2186 'advanced' - you can skip this on the second time through 2187 'SAX only' - don't worry about this unless you're using SAX (or 2188 alternatively if you need this, you also need SAX) 2189 'seldom used' - you'll probably never use this unless you were the 2190 person that requested the feature 2191 2192=back 2193 2194The options are listed alphabetically: 2195 2196Note: option names are no longer case sensitive so you can use the mixed case 2197versions shown here; all lower case as required by versions 2.03 and earlier; 2198or you can add underscores between the words (eg: key_attr). 2199 2200 2201=head2 AttrIndent => 1 I<# out - handy> 2202 2203When you are using C<XMLout()>, enable this option to have attributes printed 2204one-per-line with sensible indentation rather than all on one line. 2205 2206=head2 Cache => [ cache schemes ] I<# in - advanced> 2207 2208Because loading the B<XML::Parser> module and parsing an XML file can consume a 2209significant number of CPU cycles, it is often desirable to cache the output of 2210C<XMLin()> for later reuse. 2211 2212When parsing from a named file, B<XML::Simple> supports a number of caching 2213schemes. The 'Cache' option may be used to specify one or more schemes (using 2214an anonymous array). Each scheme will be tried in turn in the hope of finding 2215a cached pre-parsed representation of the XML file. If no cached copy is 2216found, the file will be parsed and the first cache scheme in the list will be 2217used to save a copy of the results. The following cache schemes have been 2218implemented: 2219 2220=over 4 2221 2222=item storable 2223 2224Utilises B<Storable.pm> to read/write a cache file with the same name as the 2225XML file but with the extension .stor 2226 2227=item memshare 2228 2229When a file is first parsed, a copy of the resulting data structure is retained 2230in memory in the B<XML::Simple> module's namespace. Subsequent calls to parse 2231the same file will return a reference to this structure. This cached version 2232will persist only for the life of the Perl interpreter (which in the case of 2233mod_perl for example, may be some significant time). 2234 2235Because each caller receives a reference to the same data structure, a change 2236made by one caller will be visible to all. For this reason, the reference 2237returned should be treated as read-only. 2238 2239=item memcopy 2240 2241This scheme works identically to 'memshare' (above) except that each caller 2242receives a reference to a new data structure which is a copy of the cached 2243version. Copying the data structure will add a little processing overhead, 2244therefore this scheme should only be used where the caller intends to modify 2245the data structure (or wishes to protect itself from others who might). This 2246scheme uses B<Storable.pm> to perform the copy. 2247 2248=back 2249 2250Warning! The memory-based caching schemes compare the timestamp on the file to 2251the time when it was last parsed. If the file is stored on an NFS filesystem 2252(or other network share) and the clock on the file server is not exactly 2253synchronised with the clock where your script is run, updates to the source XML 2254file may appear to be ignored. 2255 2256=head2 ContentKey => 'keyname' I<# in+out - seldom used> 2257 2258When text content is parsed to a hash value, this option lets you specify a 2259name for the hash key to override the default 'content'. So for example: 2260 2261 XMLin('<opt one="1">Text</opt>', ContentKey => 'text') 2262 2263will parse to: 2264 2265 { 'one' => 1, 'text' => 'Text' } 2266 2267instead of: 2268 2269 { 'one' => 1, 'content' => 'Text' } 2270 2271C<XMLout()> will also honour the value of this option when converting a hashref 2272to XML. 2273 2274You can also prefix your selected key name with a '-' character to have 2275C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after 2276array folding. For example: 2277 2278 XMLin( 2279 '<opt><item name="one">First</item><item name="two">Second</item></opt>', 2280 KeyAttr => {item => 'name'}, 2281 ForceArray => [ 'item' ], 2282 ContentKey => '-content' 2283 ) 2284 2285will parse to: 2286 2287 { 2288 'item' => { 2289 'one' => 'First' 2290 'two' => 'Second' 2291 } 2292 } 2293 2294rather than this (without the '-'): 2295 2296 { 2297 'item' => { 2298 'one' => { 'content' => 'First' } 2299 'two' => { 'content' => 'Second' } 2300 } 2301 } 2302 2303=head2 DataHandler => code_ref I<# in - SAX only> 2304 2305When you use an B<XML::Simple> object as a SAX handler, it will return a 2306'simple tree' data structure in the same format as C<XMLin()> would return. If 2307this option is set (to a subroutine reference), then when the tree is built the 2308subroutine will be called and passed two arguments: a reference to the 2309B<XML::Simple> object and a reference to the data tree. The return value from 2310the subroutine will be returned to the SAX driver. (See L<"SAX SUPPORT"> for 2311more details). 2312 2313=head2 ForceArray => 1 I<# in - important> 2314 2315This option should be set to '1' to force nested elements to be represented 2316as arrays even when there is only one. Eg, with ForceArray enabled, this 2317XML: 2318 2319 <opt> 2320 <name>value</name> 2321 </opt> 2322 2323would parse to this: 2324 2325 { 2326 'name' => [ 2327 'value' 2328 ] 2329 } 2330 2331instead of this (the default): 2332 2333 { 2334 'name' => 'value' 2335 } 2336 2337This option is especially useful if the data structure is likely to be written 2338back out as XML and the default behaviour of rolling single nested elements up 2339into attributes is not desirable. 2340 2341If you are using the array folding feature, you should almost certainly enable 2342this option. If you do not, single nested elements will not be parsed to 2343arrays and therefore will not be candidates for folding to a hash. (Given that 2344the default value of 'KeyAttr' enables array folding, the default value of this 2345option should probably also have been enabled too - sorry). 2346 2347=head2 ForceArray => [ names ] I<# in - important> 2348 2349This alternative (and preferred) form of the 'ForceArray' option allows you to 2350specify a list of element names which should always be forced into an array 2351representation, rather than the 'all or nothing' approach above. 2352 2353It is also possible (since version 2.05) to include compiled regular 2354expressions in the list - any element names which match the pattern will be 2355forced to arrays. If the list contains only a single regex, then it is not 2356necessary to enclose it in an arrayref. Eg: 2357 2358 ForceArray => qr/_list$/ 2359 2360=head2 ForceContent => 1 I<# in - seldom used> 2361 2362When C<XMLin()> parses elements which have text content as well as attributes, 2363the text content must be represented as a hash value rather than a simple 2364scalar. This option allows you to force text content to always parse to 2365a hash value even when there are no attributes. So for example: 2366 2367 XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1) 2368 2369will parse to: 2370 2371 { 2372 'x' => { 'content' => 'text1' }, 2373 'y' => { 'a' => 2, 'content' => 'text2' } 2374 } 2375 2376instead of: 2377 2378 { 2379 'x' => 'text1', 2380 'y' => { 'a' => 2, 'content' => 'text2' } 2381 } 2382 2383=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy> 2384 2385You can use this option to eliminate extra levels of indirection in your Perl 2386data structure. For example this XML: 2387 2388 <opt> 2389 <searchpath> 2390 <dir>/usr/bin</dir> 2391 <dir>/usr/local/bin</dir> 2392 <dir>/usr/X11/bin</dir> 2393 </searchpath> 2394 </opt> 2395 2396Would normally be read into a structure like this: 2397 2398 { 2399 searchpath => { 2400 dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] 2401 } 2402 } 2403 2404But when read in with the appropriate value for 'GroupTags': 2405 2406 my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' }); 2407 2408It will return this simpler structure: 2409 2410 { 2411 searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ] 2412 } 2413 2414The grouping element (C<< <searchpath> >> in the example) must not contain any 2415attributes or elements other than the grouped element. 2416 2417You can specify multiple 'grouping element' to 'grouped element' mappings in 2418the same hashref. If this option is combined with C<KeyAttr>, the array 2419folding will occur first and then the grouped element names will be eliminated. 2420 2421C<XMLout> will also use the grouptag mappings to re-introduce the tags around 2422the grouped elements. Beware though that this will occur in all places that 2423the 'grouping tag' name occurs - you probably don't want to use the same name 2424for elements as well as attributes. 2425 2426=head2 Handler => object_ref I<# out - SAX only> 2427 2428Use the 'Handler' option to have C<XMLout()> generate SAX events rather than 2429returning a string of XML. For more details see L<"SAX SUPPORT"> below. 2430 2431Note: the current implementation of this option generates a string of XML 2432and uses a SAX parser to translate it into SAX events. The normal encoding 2433rules apply here - your data must be UTF8 encoded unless you specify an 2434alternative encoding via the 'XMLDecl' option; and by the time the data reaches 2435the handler object, it will be in UTF8 form regardless of the encoding you 2436supply. A future implementation of this option may generate the events 2437directly. 2438 2439=head2 KeepRoot => 1 I<# in+out - handy> 2440 2441In its attempt to return a data structure free of superfluous detail and 2442unnecessary levels of indirection, C<XMLin()> normally discards the root 2443element name. Setting the 'KeepRoot' option to '1' will cause the root element 2444name to be retained. So after executing this code: 2445 2446 $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1) 2447 2448You'll be able to reference the tempdir as 2449C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default 2450C<$config-E<gt>{tempdir}>. 2451 2452Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the 2453data structure already contains a root element name and it is not necessary to 2454add another. 2455 2456=head2 KeyAttr => [ list ] I<# in+out - important> 2457 2458This option controls the 'array folding' feature which translates nested 2459elements from an array to a hash. It also controls the 'unfolding' of hashes 2460to arrays. 2461 2462For example, this XML: 2463 2464 <opt> 2465 <user login="grep" fullname="Gary R Epstein" /> 2466 <user login="stty" fullname="Simon T Tyson" /> 2467 </opt> 2468 2469would, by default, parse to this: 2470 2471 { 2472 'user' => [ 2473 { 2474 'login' => 'grep', 2475 'fullname' => 'Gary R Epstein' 2476 }, 2477 { 2478 'login' => 'stty', 2479 'fullname' => 'Simon T Tyson' 2480 } 2481 ] 2482 } 2483 2484If the option 'KeyAttr => "login"' were used to specify that the 'login' 2485attribute is a key, the same XML would parse to: 2486 2487 { 2488 'user' => { 2489 'stty' => { 2490 'fullname' => 'Simon T Tyson' 2491 }, 2492 'grep' => { 2493 'fullname' => 'Gary R Epstein' 2494 } 2495 } 2496 } 2497 2498The key attribute names should be supplied in an arrayref if there is more 2499than one. C<XMLin()> will attempt to match attribute names in the order 2500supplied. C<XMLout()> will use the first attribute name supplied when 2501'unfolding' a hash into an array. 2502 2503Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id']. If you do 2504not want folding on input or unfolding on output you must set this option 2505to an empty list to disable the feature. 2506 2507Note 2: If you wish to use this option, you should also enable the 2508C<ForceArray> option. Without 'ForceArray', a single nested element will be 2509rolled up into a scalar rather than an array and therefore will not be folded 2510(since only arrays get folded). 2511 2512=head2 KeyAttr => { list } I<# in+out - important> 2513 2514This alternative (and preferred) method of specifying the key attributes 2515allows more fine grained control over which elements are folded and on which 2516attributes. For example the option 'KeyAttr => { package => 'id' } will cause 2517any package elements to be folded on the 'id' attribute. No other elements 2518which have an 'id' attribute will be folded at all. 2519 2520Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">) 2521if this syntax is used and an element which does not have the specified key 2522attribute is encountered (eg: a 'package' element without an 'id' attribute, to 2523use the example above). Warnings can be suppressed with the lexical 2524C<no warnings;> pragma or C<no warnings 'XML::Simple';>. 2525 2526Two further variations are made possible by prefixing a '+' or a '-' character 2527to the attribute name: 2528 2529The option 'KeyAttr => { user => "+login" }' will cause this XML: 2530 2531 <opt> 2532 <user login="grep" fullname="Gary R Epstein" /> 2533 <user login="stty" fullname="Simon T Tyson" /> 2534 </opt> 2535 2536to parse to this data structure: 2537 2538 { 2539 'user' => { 2540 'stty' => { 2541 'fullname' => 'Simon T Tyson', 2542 'login' => 'stty' 2543 }, 2544 'grep' => { 2545 'fullname' => 'Gary R Epstein', 2546 'login' => 'grep' 2547 } 2548 } 2549 } 2550 2551The '+' indicates that the value of the key attribute should be copied rather 2552than moved to the folded hash key. 2553 2554A '-' prefix would produce this result: 2555 2556 { 2557 'user' => { 2558 'stty' => { 2559 'fullname' => 'Simon T Tyson', 2560 '-login' => 'stty' 2561 }, 2562 'grep' => { 2563 'fullname' => 'Gary R Epstein', 2564 '-login' => 'grep' 2565 } 2566 } 2567 } 2568 2569As described earlier, C<XMLout> will ignore hash keys starting with a '-'. 2570 2571=head2 NoAttr => 1 I<# in+out - handy> 2572 2573When used with C<XMLout()>, the generated XML will contain no attributes. 2574All hash key/values will be represented as nested elements instead. 2575 2576When used with C<XMLin()>, any attributes in the XML will be ignored. 2577 2578=head2 NoEscape => 1 I<# out - seldom used> 2579 2580By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and 2581'"' to '<', '>', '&' and '"' respectively. Use this option to 2582suppress escaping (presumably because you've already escaped the data in some 2583more sophisticated manner). 2584 2585=head2 NoIndent => 1 I<# out - seldom used> 2586 2587Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode. 2588With this option enabled, the XML output will all be on one line (unless there 2589are newlines in the data) - this may be easier for downstream processing. 2590 2591=head2 NoSort => 1 I<# out - seldom used> 2592 2593Newer versions of XML::Simple sort elements and attributes alphabetically (*), 2594by default. Enable this option to suppress the sorting - possibly for 2595backwards compatibility. 2596 2597* Actually, sorting is alphabetical but 'key' attribute or element names (as in 2598'KeyAttr') sort first. Also, when a hash of hashes is 'unfolded', the elements 2599are sorted alphabetically by the value of the key field. 2600 2601=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy> 2602 2603This option controls how whitespace in text content is handled. Recognised 2604values for the option are: 2605 2606=over 4 2607 2608=item * 2609 26100 = (default) whitespace is passed through unaltered (except of course for the 2611normalisation of whitespace in attribute values which is mandated by the XML 2612recommendation) 2613 2614=item * 2615 26161 = whitespace is normalised in any value used as a hash key (normalising means 2617removing leading and trailing whitespace and collapsing sequences of whitespace 2618characters to a single space) 2619 2620=item * 2621 26222 = whitespace is normalised in all text content 2623 2624=back 2625 2626Note: you can spell this option with a 'z' if that is more natural for you. 2627 2628=head2 NSExpand => 1 I<# in+out handy - SAX only> 2629 2630This option controls namespace expansion - the translation of element and 2631attribute names of the form 'prefix:name' to '{uri}name'. For example the 2632element name 'xsl:template' might be expanded to: 2633'{http://www.w3.org/1999/XSL/Transform}template'. 2634 2635By default, C<XMLin()> will return element names and attribute names exactly as 2636they appear in the XML. Setting this option to 1 will cause all element and 2637attribute names to be expanded to include their namespace prefix. 2638 2639I<Note: You must be using a SAX parser for this option to work (ie: it does not 2640work with XML::Parser)>. 2641 2642This option also controls whether C<XMLout()> performs the reverse translation 2643from '{uri}name' back to 'prefix:name'. The default is no translation. If 2644your data contains expanded names, you should set this option to 1 otherwise 2645C<XMLout> will emit XML which is not well formed. 2646 2647I<Note: You must have the XML::NamespaceSupport module installed if you want 2648C<XMLout()> to translate URIs back to prefixes>. 2649 2650=head2 NumericEscape => 0 | 1 | 2 I<# out - handy> 2651 2652Use this option to have 'high' (non-ASCII) characters in your Perl data 2653structure converted to numeric entities (eg: €) in the XML output. Three 2654levels are possible: 2655 26560 - default: no numeric escaping (OK if you're writing out UTF8) 2657 26581 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output 2659 26602 - all characters above 0x7F are escaped (good for plain ASCII output) 2661 2662=head2 OutputFile => <file specifier> I<# out - handy> 2663 2664The default behaviour of C<XMLout()> is to return the XML as a string. If you 2665wish to write the XML to a file, simply supply the filename using the 2666'OutputFile' option. 2667 2668This option also accepts an IO handle object - especially useful in Perl 5.8.0 2669and later for output using an encoding other than UTF-8, eg: 2670 2671 open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!"; 2672 XMLout($ref, OutputFile => $fh); 2673 2674Note, XML::Simple does not require that the object you pass in to the 2675OutputFile option inherits from L<IO::Handle> - it simply assumes the object 2676supports a C<print> method. 2677 2678=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this> 2679 2680I<Note: This option is now officially deprecated. If you find it useful, email 2681the author with an example of what you use it for. Do not use this option to 2682set the ProtocolEncoding, that's just plain wrong - fix the XML>. 2683 2684This option allows you to pass parameters to the constructor of the underlying 2685XML::Parser object (which of course assumes you're not using SAX). 2686 2687=head2 RootName => 'string' I<# out - handy> 2688 2689By default, when C<XMLout()> generates XML, the root element will be named 2690'opt'. This option allows you to specify an alternative name. 2691 2692Specifying either undef or the empty string for the RootName option will 2693produce XML with no root elements. In most cases the resulting XML fragment 2694will not be 'well formed' and therefore could not be read back in by C<XMLin()>. 2695Nevertheless, the option has been found to be useful in certain circumstances. 2696 2697=head2 SearchPath => [ list ] I<# in - handy> 2698 2699If you pass C<XMLin()> a filename, but the filename include no directory 2700component, you can use this option to specify which directories should be 2701searched to locate the file. You might use this option to search first in the 2702user's home directory, then in a global directory such as /etc. 2703 2704If a filename is provided to C<XMLin()> but SearchPath is not defined, the 2705file is assumed to be in the current directory. 2706 2707If the first parameter to C<XMLin()> is undefined, the default SearchPath 2708will contain only the directory in which the script itself is located. 2709Otherwise the default SearchPath will be empty. 2710 2711=head2 StrictMode => 1 | 0 I<# in+out seldom used> 2712 2713This option allows you to turn L<STRICT MODE> on or off for a particular call, 2714regardless of whether it was enabled at the time XML::Simple was loaded. 2715 2716=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy> 2717 2718This option controls what C<XMLin()> should do with empty elements (no 2719attributes and no content). The default behaviour is to represent them as 2720empty hashes. Setting this option to a true value (eg: 1) will cause empty 2721elements to be skipped altogether. Setting the option to 'undef' or the empty 2722string will cause empty elements to be represented as the undefined value or 2723the empty string respectively. The latter two alternatives are a little 2724easier to test for in your code than a hash with no keys. 2725 2726The option also controls what C<XMLout()> does with undefined values. Setting 2727the option to undef causes undefined values to be output as empty elements 2728(rather than empty attributes), it also suppresses the generation of warnings 2729about undefined values. Setting the option to a true value (eg: 1) causes 2730undefined values to be skipped altogether on output. 2731 2732=head2 ValueAttr => [ names ] I<# in - handy> 2733 2734Use this option to deal elements which always have a single attribute and no 2735content. Eg: 2736 2737 <opt> 2738 <colour value="red" /> 2739 <size value="XXL" /> 2740 </opt> 2741 2742Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to: 2743 2744 { 2745 colour => 'red', 2746 size => 'XXL' 2747 } 2748 2749instead of this (the default): 2750 2751 { 2752 colour => { value => 'red' }, 2753 size => { value => 'XXL' } 2754 } 2755 2756Note: This form of the ValueAttr option is not compatible with C<XMLout()> - 2757since the attribute name is discarded at parse time, the original XML cannot be 2758reconstructed. 2759 2760=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy> 2761 2762This (preferred) form of the ValueAttr option requires you to specify both 2763the element and the attribute names. This is not only safer, it also allows 2764the original XML to be reconstructed by C<XMLout()>. 2765 2766Note: You probably don't want to use this option and the NoAttr option at the 2767same time. 2768 2769=head2 Variables => { name => value } I<# in - handy> 2770 2771This option allows variables in the XML to be expanded when the file is read. 2772(there is no facility for putting the variable names back if you regenerate 2773XML using C<XMLout>). 2774 2775A 'variable' is any text of the form C<${name}> which occurs in an attribute 2776value or in the text content of an element. If 'name' matches a key in the 2777supplied hashref, C<${name}> will be replaced with the corresponding value from 2778the hashref. If no matching key is found, the variable will not be replaced. 2779Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are 2780allowed). 2781 2782=head2 VarAttr => 'attr_name' I<# in - handy> 2783 2784In addition to the variables defined using C<Variables>, this option allows 2785variables to be defined in the XML. A variable definition consists of an 2786element with an attribute called 'attr_name' (the value of the C<VarAttr> 2787option). The value of the attribute will be used as the variable name and the 2788text content of the element will be used as the value. A variable defined in 2789this way will override a variable defined using the C<Variables> option. For 2790example: 2791 2792 XMLin( '<opt> 2793 <dir name="prefix">/usr/local/apache</dir> 2794 <dir name="exec_prefix">${prefix}</dir> 2795 <dir name="bindir">${exec_prefix}/bin</dir> 2796 </opt>', 2797 VarAttr => 'name', ContentKey => '-content' 2798 ); 2799 2800produces the following data structure: 2801 2802 { 2803 dir => { 2804 prefix => '/usr/local/apache', 2805 exec_prefix => '/usr/local/apache', 2806 bindir => '/usr/local/apache/bin', 2807 } 2808 } 2809 2810=head2 XMLDecl => 1 or XMLDecl => 'string' I<# out - handy> 2811 2812If you want the output from C<XMLout()> to start with the optional XML 2813declaration, simply set the option to '1'. The default XML declaration is: 2814 2815 <?xml version='1.0' standalone='yes'?> 2816 2817If you want some other string (for example to declare an encoding value), set 2818the value of this option to the complete string you require. 2819 2820 2821=head1 OPTIONAL OO INTERFACE 2822 2823The procedural interface is both simple and convenient however there are a 2824couple of reasons why you might prefer to use the object oriented (OO) 2825interface: 2826 2827=over 4 2828 2829=item * 2830 2831to define a set of default values which should be used on all subsequent calls 2832to C<XMLin()> or C<XMLout()> 2833 2834=item * 2835 2836to override methods in B<XML::Simple> to provide customised behaviour 2837 2838=back 2839 2840The default values for the options described above are unlikely to suit 2841everyone. The OO interface allows you to effectively override B<XML::Simple>'s 2842defaults with your preferred values. It works like this: 2843 2844First create an XML::Simple parser object with your preferred defaults: 2845 2846 my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1); 2847 2848then call C<XMLin()> or C<XMLout()> as a method of that object: 2849 2850 my $ref = $xs->XMLin($xml); 2851 my $xml = $xs->XMLout($ref); 2852 2853You can also specify options when you make the method calls and these values 2854will be merged with the values specified when the object was created. Values 2855specified in a method call take precedence. 2856 2857Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be 2858called as C<xml_in()> or C<xml_out()>. The method names are aliased so the 2859only difference is the aesthetics. 2860 2861=head2 Parsing Methods 2862 2863You can explicitly call one of the following methods rather than rely on the 2864C<xml_in()> method automatically determining whether the target to be parsed is 2865a string, a file or a filehandle: 2866 2867=over 4 2868 2869=item parse_string(text) 2870 2871Works exactly like the C<xml_in()> method but assumes the first argument is 2872a string of XML (or a reference to a scalar containing a string of XML). 2873 2874=item parse_file(filename) 2875 2876Works exactly like the C<xml_in()> method but assumes the first argument is 2877the name of a file containing XML. 2878 2879=item parse_fh(file_handle) 2880 2881Works exactly like the C<xml_in()> method but assumes the first argument is 2882a filehandle which can be read to get XML. 2883 2884=back 2885 2886=head2 Hook Methods 2887 2888You can make your own class which inherits from XML::Simple and overrides 2889certain behaviours. The following methods may provide useful 'hooks' upon 2890which to hang your modified behaviour. You may find other undocumented methods 2891by examining the source, but those may be subject to change in future releases. 2892 2893=over 4 2894 2895=item new_xml_parser() 2896 2897This method will be called when a new XML::Parser object must be constructed 2898(either because XML::SAX is not installed or XML::Parser is preferred). 2899 2900=item handle_options(direction, name => value ...) 2901 2902This method will be called when one of the parsing methods or the C<XMLout()> 2903method is called. The initial argument will be a string (either 'in' or 'out') 2904and the remaining arguments will be name value pairs. 2905 2906=item default_config_file() 2907 2908Calculates and returns the name of the file which should be parsed if no 2909filename is passed to C<XMLin()> (default: C<$0.xml>). 2910 2911=item build_simple_tree(filename, string) 2912 2913Called from C<XMLin()> or any of the parsing methods. Takes either a file name 2914as the first argument or C<undef> followed by a 'string' as the second 2915argument. Returns a simple tree data structure. You could override this 2916method to apply your own transformations before the data structure is returned 2917to the caller. 2918 2919=item new_hashref() 2920 2921When the 'simple tree' data structure is being built, this method will be 2922called to create any required anonymous hashrefs. 2923 2924=item sorted_keys(name, hashref) 2925 2926Called when C<XMLout()> is translating a hashref to XML. This routine returns 2927a list of hash keys in the order that the corresponding attributes/elements 2928should appear in the output. 2929 2930=item escape_value(string) 2931 2932Called from C<XMLout()>, takes a string and returns a copy of the string with 2933XML character escaping rules applied. 2934 2935=item escape_attr(string) 2936 2937Called from C<XMLout()>, to handle attribute values. By default, just calls 2938C<escape_value()>, but you can override this method if you want attributes 2939escaped differently than text content. 2940 2941=item numeric_escape(string) 2942 2943Called from C<escape_value()>, to handle non-ASCII characters (depending on the 2944value of the NumericEscape option). 2945 2946=item copy_hash(hashref, extra_key => value, ...) 2947 2948Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of 2949hashes. You might wish to override this method if you're using tied hashes and 2950don't want them to get untied. 2951 2952=back 2953 2954=head2 Cache Methods 2955 2956XML::Simple implements three caching schemes ('storable', 'memshare' and 2957'memcopy'). You can implement a custom caching scheme by implementing 2958two methods - one for reading from the cache and one for writing to it. 2959 2960For example, you might implement a new 'dbm' scheme that stores cached data 2961structures using the L<MLDBM> module. First, you would add a 2962C<cache_read_dbm()> method which accepted a filename for use as a lookup key 2963and returned a data structure on success, or undef on failure. Then, you would 2964implement a C<cache_read_dbm()> method which accepted a data structure and a 2965filename. 2966 2967You would use this caching scheme by specifying the option: 2968 2969 Cache => [ 'dbm' ] 2970 2971=head1 STRICT MODE 2972 2973If you import the B<XML::Simple> routines like this: 2974 2975 use XML::Simple qw(:strict); 2976 2977the following common mistakes will be detected and treated as fatal errors 2978 2979=over 4 2980 2981=item * 2982 2983Failing to explicitly set the C<KeyAttr> option - if you can't be bothered 2984reading about this option, turn it off with: KeyAttr => [ ] 2985 2986=item * 2987 2988Failing to explicitly set the C<ForceArray> option - if you can't be bothered 2989reading about this option, set it to the safest mode with: ForceArray => 1 2990 2991=item * 2992 2993Setting ForceArray to an array, but failing to list all the elements from the 2994KeyAttr hash. 2995 2996=item * 2997 2998Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains 2999one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested 3000element). Note: if strict mode is not set but C<use warnings;> is in force, 3001this condition triggers a warning. 3002 3003=item * 3004 3005Data error - as above, but non-unique values are present in the key attribute 3006(eg: more than one E<lt>partE<gt> element with the same partnum). This will 3007also trigger a warning if strict mode is not enabled. 3008 3009=item * 3010 3011Data error - as above, but value of key attribute (eg: partnum) is not a 3012scalar string (due to nested elements etc). This will also trigger a warning 3013if strict mode is not enabled. 3014 3015=back 3016 3017=head1 SAX SUPPORT 3018 3019From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API 3020for XML) - specifically SAX2. 3021 3022In a typical SAX application, an XML parser (or SAX 'driver') module generates 3023SAX events (start of element, character data, end of element, etc) as it parses 3024an XML document and a 'handler' module processes the events to extract the 3025required data. This simple model allows for some interesting and powerful 3026possibilities: 3027 3028=over 4 3029 3030=item * 3031 3032Applications written to the SAX API can extract data from huge XML documents 3033without the memory overheads of a DOM or tree API. 3034 3035=item * 3036 3037The SAX API allows for plug and play interchange of parser modules without 3038having to change your code to fit a new module's API. A number of SAX parsers 3039are available with capabilities ranging from extreme portability to blazing 3040performance. 3041 3042=item * 3043 3044A SAX 'filter' module can implement both a handler interface for receiving 3045data and a generator interface for passing modified data on to a downstream 3046handler. Filters can be chained together in 'pipelines'. 3047 3048=item * 3049 3050One filter module might split a data stream to direct data to two or more 3051downstream handlers. 3052 3053=item * 3054 3055Generating SAX events is not the exclusive preserve of XML parsing modules. 3056For example, a module might extract data from a relational database using DBI 3057and pass it on to a SAX pipeline for filtering and formatting. 3058 3059=back 3060 3061B<XML::Simple> can operate at either end of a SAX pipeline. For example, 3062you can take a data structure in the form of a hashref and pass it into a 3063SAX pipeline using the 'Handler' option on C<XMLout()>: 3064 3065 use XML::Simple; 3066 use Some::SAX::Filter; 3067 use XML::SAX::Writer; 3068 3069 my $ref = { 3070 .... # your data here 3071 }; 3072 3073 my $writer = XML::SAX::Writer->new(); 3074 my $filter = Some::SAX::Filter->new(Handler => $writer); 3075 my $simple = XML::Simple->new(Handler => $filter); 3076 $simple->XMLout($ref); 3077 3078You can also put B<XML::Simple> at the opposite end of the pipeline to take 3079advantage of the simple 'tree' data structure once the relevant data has been 3080isolated through filtering: 3081 3082 use XML::SAX; 3083 use Some::SAX::Filter; 3084 use XML::Simple; 3085 3086 my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']); 3087 my $filter = Some::SAX::Filter->new(Handler => $simple); 3088 my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); 3089 3090 my $ref = $parser->parse_uri('some_huge_file.xml'); 3091 3092 print $ref->{part}->{'555-1234'}; 3093 3094You can build a filter by using an XML::Simple object as a handler and setting 3095its DataHandler option to point to a routine which takes the resulting tree, 3096modifies it and sends it off as SAX events to a downstream handler: 3097 3098 my $writer = XML::SAX::Writer->new(); 3099 my $filter = XML::Simple->new( 3100 DataHandler => sub { 3101 my $simple = shift; 3102 my $data = shift; 3103 3104 # Modify $data here 3105 3106 $simple->XMLout($data, Handler => $writer); 3107 } 3108 ); 3109 my $parser = XML::SAX::ParserFactory->parser(Handler => $filter); 3110 3111 $parser->parse_uri($filename); 3112 3113I<Note: In this last example, the 'Handler' option was specified in the call to 3114C<XMLout()> but it could also have been specified in the constructor>. 3115 3116=head1 ENVIRONMENT 3117 3118If you don't care which parser module B<XML::Simple> uses then skip this 3119section entirely (it looks more complicated than it really is). 3120 3121B<XML::Simple> will default to using a B<SAX> parser if one is available or 3122B<XML::Parser> if SAX is not available. 3123 3124You can dictate which parser module is used by setting either the environment 3125variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable 3126$XML::Simple::PREFERRED_PARSER to contain the module name. The following rules 3127are used: 3128 3129=over 4 3130 3131=item * 3132 3133The package variable takes precedence over the environment variable if both are defined. To force B<XML::Simple> to ignore the environment settings and use 3134its default rules, you can set the package variable to an empty string. 3135 3136=item * 3137 3138If the 'preferred parser' is set to the string 'XML::Parser', then 3139L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not 3140installed). 3141 3142=item * 3143 3144If the 'preferred parser' is set to some other value, then it is assumed to be 3145the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory>. 3146If L<XML::SAX> is not installed, or the requested parser module is not 3147installed, then C<XMLin()> will die. 3148 3149=item * 3150 3151If the 'preferred parser' is not defined at all (the normal default 3152state), an attempt will be made to load L<XML::SAX>. If L<XML::SAX> is 3153installed, then a parser module will be selected according to 3154L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX 3155parser installed). 3156 3157=item * 3158 3159if the 'preferred parser' is not defined and B<XML::SAX> is not 3160installed, then B<XML::Parser> will be used. C<XMLin()> will die if 3161L<XML::Parser> is not installed. 3162 3163=back 3164 3165Note: The B<XML::SAX> distribution includes an XML parser written entirely in 3166Perl. It is very portable but it is not very fast. You should consider 3167installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your 3168platform. 3169 3170=head1 ERROR HANDLING 3171 3172The XML standard is very clear on the issue of non-compliant documents. An 3173error in parsing any single element (for example a missing end tag) must cause 3174the whole document to be rejected. B<XML::Simple> will die with an appropriate 3175message if it encounters a parsing error. 3176 3177If dying is not appropriate for your application, you should arrange to call 3178C<XMLin()> in an eval block and look for errors in $@. eg: 3179 3180 my $config = eval { XMLin() }; 3181 PopUpMessage($@) if($@); 3182 3183Note, there is a common misconception that use of B<eval> will significantly 3184slow down a script. While that may be true when the code being eval'd is in a 3185string, it is not true of code like the sample above. 3186 3187=head1 EXAMPLES 3188 3189When C<XMLin()> reads the following very simple piece of XML: 3190 3191 <opt username="testuser" password="frodo"></opt> 3192 3193it returns the following data structure: 3194 3195 { 3196 'username' => 'testuser', 3197 'password' => 'frodo' 3198 } 3199 3200The identical result could have been produced with this alternative XML: 3201 3202 <opt username="testuser" password="frodo" /> 3203 3204Or this (although see 'ForceArray' option for variations): 3205 3206 <opt> 3207 <username>testuser</username> 3208 <password>frodo</password> 3209 </opt> 3210 3211Repeated nested elements are represented as anonymous arrays: 3212 3213 <opt> 3214 <person firstname="Joe" lastname="Smith"> 3215 <email>joe@smith.com</email> 3216 <email>jsmith@yahoo.com</email> 3217 </person> 3218 <person firstname="Bob" lastname="Smith"> 3219 <email>bob@smith.com</email> 3220 </person> 3221 </opt> 3222 3223 { 3224 'person' => [ 3225 { 3226 'email' => [ 3227 'joe@smith.com', 3228 'jsmith@yahoo.com' 3229 ], 3230 'firstname' => 'Joe', 3231 'lastname' => 'Smith' 3232 }, 3233 { 3234 'email' => 'bob@smith.com', 3235 'firstname' => 'Bob', 3236 'lastname' => 'Smith' 3237 } 3238 ] 3239 } 3240 3241Nested elements with a recognised key attribute are transformed (folded) from 3242an array into a hash keyed on the value of that attribute (see the C<KeyAttr> 3243option): 3244 3245 <opt> 3246 <person key="jsmith" firstname="Joe" lastname="Smith" /> 3247 <person key="tsmith" firstname="Tom" lastname="Smith" /> 3248 <person key="jbloggs" firstname="Joe" lastname="Bloggs" /> 3249 </opt> 3250 3251 { 3252 'person' => { 3253 'jbloggs' => { 3254 'firstname' => 'Joe', 3255 'lastname' => 'Bloggs' 3256 }, 3257 'tsmith' => { 3258 'firstname' => 'Tom', 3259 'lastname' => 'Smith' 3260 }, 3261 'jsmith' => { 3262 'firstname' => 'Joe', 3263 'lastname' => 'Smith' 3264 } 3265 } 3266 } 3267 3268 3269The <anon> tag can be used to form anonymous arrays: 3270 3271 <opt> 3272 <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head> 3273 <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data> 3274 <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data> 3275 <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data> 3276 </opt> 3277 3278 { 3279 'head' => [ 3280 [ 'Col 1', 'Col 2', 'Col 3' ] 3281 ], 3282 'data' => [ 3283 [ 'R1C1', 'R1C2', 'R1C3' ], 3284 [ 'R2C1', 'R2C2', 'R2C3' ], 3285 [ 'R3C1', 'R3C2', 'R3C3' ] 3286 ] 3287 } 3288 3289Anonymous arrays can be nested to arbitrary levels and as a special case, if 3290the surrounding tags for an XML document contain only an anonymous array the 3291arrayref will be returned directly rather than the usual hashref: 3292 3293 <opt> 3294 <anon><anon>Col 1</anon><anon>Col 2</anon></anon> 3295 <anon><anon>R1C1</anon><anon>R1C2</anon></anon> 3296 <anon><anon>R2C1</anon><anon>R2C2</anon></anon> 3297 </opt> 3298 3299 [ 3300 [ 'Col 1', 'Col 2' ], 3301 [ 'R1C1', 'R1C2' ], 3302 [ 'R2C1', 'R2C2' ] 3303 ] 3304 3305Elements which only contain text content will simply be represented as a 3306scalar. Where an element has both attributes and text content, the element 3307will be represented as a hashref with the text content in the 'content' key 3308(see the C<ContentKey> option): 3309 3310 <opt> 3311 <one>first</one> 3312 <two attr="value">second</two> 3313 </opt> 3314 3315 { 3316 'one' => 'first', 3317 'two' => { 'attr' => 'value', 'content' => 'second' } 3318 } 3319 3320Mixed content (elements which contain both text content and nested elements) 3321will be not be represented in a useful way - element order and significant 3322whitespace will be lost. If you need to work with mixed content, then 3323XML::Simple is not the right tool for your job - check out the next section. 3324 3325=head1 WHERE TO FROM HERE? 3326 3327B<XML::Simple> is able to present a simple API because it makes some 3328assumptions on your behalf. These include: 3329 3330=over 4 3331 3332=item * 3333 3334You're not interested in text content consisting only of whitespace 3335 3336=item * 3337 3338You don't mind that when things get slurped into a hash the order is lost 3339 3340=item * 3341 3342You don't want fine-grained control of the formatting of generated XML 3343 3344=item * 3345 3346You would never use a hash key that was not a legal XML element name 3347 3348=item * 3349 3350You don't need help converting between different encodings 3351 3352=back 3353 3354In a serious XML project, you'll probably outgrow these assumptions fairly 3355quickly. This section of the document used to offer some advice on choosing a 3356more powerful option. That advice has now grown into the 'Perl-XML FAQ' 3357document which you can find at: L<http://perl-xml.sourceforge.net/faq/> 3358 3359The advice in the FAQ boils down to a quick explanation of tree versus 3360event based parsers and then recommends: 3361 3362For event based parsing, use SAX (do not set out to write any new code for 3363XML::Parser's handler API - it is obsolete). 3364 3365For tree-based parsing, you could choose between the 'Perlish' approach of 3366L<XML::Twig> and more standards based DOM implementations - preferably one with 3367XPath support such as L<XML::LibXML>. 3368 3369 3370=head1 SEE ALSO 3371 3372B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>. 3373 3374To generate documents with namespaces, L<XML::NamespaceSupport> is required. 3375 3376The optional caching functions require L<Storable>. 3377 3378Answers to Frequently Asked Questions about XML::Simple are bundled with this 3379distribution as: L<XML::Simple::FAQ> 3380 3381=head1 COPYRIGHT 3382 3383Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt> 3384 3385This library is free software; you can redistribute it and/or modify it 3386under the same terms as Perl itself. 3387 3388=cut 3389 3390 3391