1package XML::Simple;
2$XML::Simple::VERSION = '2.25';
3=head1 NAME
4
5XML::Simple - An API for simple XML files
6
7=head1 SYNOPSIS
8
9PLEASE DO NOT USE THIS MODULE IN NEW CODE.  If you ignore this
10warning and use it anyway, the C<qw(:strict)> mode will save you a little pain.
11
12    use XML::Simple qw(:strict);
13
14    my $ref = XMLin([<xml file or string>] [, <options>]);
15
16    my $xml = XMLout($hashref [, <options>]);
17
18Or the object oriented way:
19
20    require XML::Simple qw(:strict);
21
22    my $xs = XML::Simple->new([<options>]);
23
24    my $ref = $xs->XMLin([<xml file or string>] [, <options>]);
25
26    my $xml = $xs->XMLout($hashref [, <options>]);
27
28(or see L<"SAX SUPPORT"> for 'the SAX way').
29
30Note, in these examples, the square brackets are used to denote optional items
31not to imply items should be supplied in arrayrefs.
32
33=cut
34
35# See after __END__ for more POD documentation
36
37
38# Load essentials here, other modules loaded on demand later
39
40use strict;
41use warnings;
42use warnings::register;
43use Carp;
44use Scalar::Util qw();
45require Exporter;
46
47
48##############################################################################
49# Define some constants
50#
51
52use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $PREFERRED_PARSER);
53
54@ISA               = qw(Exporter);
55@EXPORT            = qw(XMLin XMLout);
56@EXPORT_OK         = qw(xml_in xml_out);
57
58my %StrictMode     = ();
59
60my @KnownOptIn     = qw(keyattr keeproot forcecontent contentkey noattr
61                        searchpath forcearray cache suppressempty parseropts
62                        grouptags nsexpand datahandler varattr variables
63                        normalisespace normalizespace valueattr strictmode);
64
65my @KnownOptOut    = qw(keyattr keeproot contentkey noattr
66                        rootname xmldecl outputfile noescape suppressempty
67                        grouptags nsexpand handler noindent attrindent nosort
68                        valueattr numericescape strictmode);
69
70my @DefKeyAttr     = qw(name key id);
71my $DefRootName    = qq(opt);
72my $DefContentKey  = qq(content);
73my $DefXmlDecl     = qq(<?xml version='1.0' standalone='yes'?>);
74
75my $xmlns_ns       = 'http://www.w3.org/2000/xmlns/';
76my $bad_def_ns_jcn = '{' . $xmlns_ns . '}';     # LibXML::SAX workaround
77
78
79##############################################################################
80# Globals for use by caching routines
81#
82
83my %MemShareCache  = ();
84my %MemCopyCache   = ();
85
86
87##############################################################################
88# Wrapper for Exporter - handles ':strict'
89#
90
91sub import {
92  # Handle the :strict tag
93
94  my($calling_package) = caller();
95  _strict_mode_for_caller(1) if grep(/^:strict$/, @_);
96
97  # Pass everything else to Exporter.pm
98
99  @_ = grep(!/^:strict$/, @_);
100  goto &Exporter::import;
101}
102
103
104##############################################################################
105# Constructor for optional object interface.
106#
107
108sub new {
109  my $class = shift;
110
111  if(@_ % 2) {
112    croak "Default options must be name=>value pairs (odd number supplied)";
113  }
114
115  my %known_opt;
116  @known_opt{@KnownOptIn, @KnownOptOut} = ();
117
118  my %raw_opt = @_;
119  $raw_opt{strictmode} = _strict_mode_for_caller()
120    unless exists $raw_opt{strictmode};
121  my %def_opt;
122  while(my($key, $val) = each %raw_opt) {
123    my $lkey = lc($key);
124    $lkey =~ s/_//g;
125    croak "Unrecognised option: $key" unless(exists($known_opt{$lkey}));
126    $def_opt{$lkey} = $val;
127  }
128  my $self = { def_opt => \%def_opt };
129
130  return(bless($self, $class));
131}
132
133
134##############################################################################
135# Sub: _strict_mode_for_caller()
136#
137# Gets or sets the XML::Simple :strict mode flag for the calling namespace.
138# Walks back through call stack to find the calling namespace and sets the
139# :strict mode flag for that namespace if an argument was supplied and returns
140# the flag value if not.
141#
142
143sub _strict_mode_for_caller {
144  my $set_mode = @_;
145  my $frame = 1;
146  while(my($package) = caller($frame++)) {
147    next if $package eq 'XML::Simple';
148    $StrictMode{$package} = 1 if $set_mode;
149    return $StrictMode{$package};
150  }
151  return(0);
152}
153
154
155##############################################################################
156# Sub: _get_object()
157#
158# Helper routine called from XMLin() and XMLout() to create an object if none
159# was provided.  Note, this routine does mess with the caller's @_ array.
160#
161
162sub _get_object {
163  my $self;
164  if($_[0]  and  UNIVERSAL::isa($_[0], 'XML::Simple')) {
165    $self = shift;
166  }
167  else {
168    $self = XML::Simple->new();
169  }
170
171  return $self;
172}
173
174
175##############################################################################
176# Sub/Method: XMLin()
177#
178# Exported routine for slurping XML into a hashref - see pod for info.
179#
180# May be called as object method or as a plain function.
181#
182# Expects one arg for the source XML, optionally followed by a number of
183# name => value option pairs.
184#
185
186sub XMLin {
187  my $self = &_get_object;      # note, @_ is passed implicitly
188
189  my $target = shift;
190
191
192  # Work out whether to parse a string, a file or a filehandle
193
194  if(not defined $target) {
195    return $self->parse_file(undef, @_);
196  }
197
198  elsif($target eq '-') {
199    local($/) = undef;
200    $target = <STDIN>;
201    return $self->parse_string(\$target, @_);
202  }
203
204  elsif(my $type = ref($target)) {
205    if($type eq 'SCALAR') {
206      return $self->parse_string($target, @_);
207    }
208    else {
209      return $self->parse_fh($target, @_);
210    }
211  }
212
213  elsif($target =~ m{<.*?>}s) {
214    return $self->parse_string(\$target, @_);
215  }
216
217  else {
218    return $self->parse_file($target, @_);
219  }
220}
221
222
223##############################################################################
224# Sub/Method: parse_file()
225#
226# Same as XMLin, but only parses from a named file.
227#
228
229sub parse_file {
230  my $self = &_get_object;      # note, @_ is passed implicitly
231
232  my $filename = shift;
233
234  $self->handle_options('in', @_);
235
236  $filename = $self->default_config_file if not defined $filename;
237
238  $filename = $self->find_xml_file($filename, @{$self->{opt}->{searchpath}});
239
240  # Check cache for previous parse
241
242  if($self->{opt}->{cache}) {
243    foreach my $scheme (@{$self->{opt}->{cache}}) {
244      my $method = 'cache_read_' . $scheme;
245      my $opt = $self->$method($filename);
246      return($opt) if($opt);
247    }
248  }
249
250  my $ref = $self->build_simple_tree($filename, undef);
251
252  if($self->{opt}->{cache}) {
253    my $method = 'cache_write_' . $self->{opt}->{cache}->[0];
254    $self->$method($ref, $filename);
255  }
256
257  return $ref;
258}
259
260
261##############################################################################
262# Sub/Method: parse_fh()
263#
264# Same as XMLin, but only parses from a filehandle.
265#
266
267sub parse_fh {
268  my $self = &_get_object;      # note, @_ is passed implicitly
269
270  my $fh = shift;
271  croak "Can't use " . (defined $fh ? qq{string ("$fh")} : 'undef') .
272        " as a filehandle" unless ref $fh;
273
274  $self->handle_options('in', @_);
275
276  return $self->build_simple_tree(undef, $fh);
277}
278
279
280##############################################################################
281# Sub/Method: parse_string()
282#
283# Same as XMLin, but only parses from a string or a reference to a string.
284#
285
286sub parse_string {
287  my $self = &_get_object;      # note, @_ is passed implicitly
288
289  my $string = shift;
290
291  $self->handle_options('in', @_);
292
293  return $self->build_simple_tree(undef, ref $string ? $string : \$string);
294}
295
296
297##############################################################################
298# Method: default_config_file()
299#
300# Returns the name of the XML file to parse if no filename (or XML string)
301# was provided.
302#
303
304sub default_config_file {
305  my $self = shift;
306
307  require File::Basename;
308
309  my($basename, $script_dir, $ext) = File::Basename::fileparse($0, '\.[^\.]+');
310
311  # Add script directory to searchpath
312
313  if($script_dir) {
314    unshift(@{$self->{opt}->{searchpath}}, $script_dir);
315  }
316
317  return $basename . '.xml';
318}
319
320
321##############################################################################
322# Method: build_simple_tree()
323#
324# Builds a 'tree' data structure as provided by XML::Parser and then
325# 'simplifies' it as specified by the various options in effect.
326#
327
328sub build_simple_tree {
329  my $self = shift;
330
331  my $tree = eval {
332    $self->build_tree(@_);
333  };
334  Carp::croak("$@XML::Simple called") if $@;
335
336  return $self->{opt}->{keeproot}
337         ? $self->collapse({}, @$tree)
338         : $self->collapse(@{$tree->[1]});
339}
340
341
342##############################################################################
343# Method: build_tree()
344#
345# This routine will be called if there is no suitable pre-parsed tree in a
346# cache.  It parses the XML and returns an XML::Parser 'Tree' style data
347# structure (summarised in the comments for the collapse() routine below).
348#
349# XML::Simple requires the services of another module that knows how to parse
350# XML.  If XML::SAX is installed, the default SAX parser will be used,
351# otherwise XML::Parser will be used.
352#
353# This routine expects to be passed a filename as argument 1 or a 'string' as
354# argument 2.  The 'string' might be a string of XML (passed by reference to
355# save memory) or it might be a reference to an IO::Handle.  (This
356# non-intuitive mess results in part from the way XML::Parser works but that's
357# really no excuse).
358#
359
360sub build_tree {
361  my $self     = shift;
362  my $filename = shift;
363  my $string   = shift;
364
365
366  my $preferred_parser = $PREFERRED_PARSER;
367  unless(defined($preferred_parser)) {
368    $preferred_parser = $ENV{XML_SIMPLE_PREFERRED_PARSER} || '';
369  }
370  if($preferred_parser eq 'XML::Parser') {
371    return($self->build_tree_xml_parser($filename, $string));
372  }
373
374  eval { require XML::SAX; };      # We didn't need it until now
375  if($@) {                         # No XML::SAX - fall back to XML::Parser
376    if($preferred_parser) {        # unless a SAX parser was expressly requested
377      croak "XMLin() could not load XML::SAX";
378    }
379    return($self->build_tree_xml_parser($filename, $string));
380  }
381
382  $XML::SAX::ParserPackage = $preferred_parser if($preferred_parser);
383
384  my $sp = XML::SAX::ParserFactory->parser(Handler => $self);
385
386  $self->{nocollapse} = 1;
387  my($tree);
388  if($filename) {
389    $tree = $sp->parse_uri($filename);
390  }
391  else {
392    if(ref($string) && ref($string) ne 'SCALAR') {
393      $tree = $sp->parse_file($string);
394    }
395    else {
396      $tree = $sp->parse_string($$string);
397    }
398  }
399
400  return($tree);
401}
402
403
404##############################################################################
405# Method: build_tree_xml_parser()
406#
407# This routine will be called if XML::SAX is not installed, or if XML::Parser
408# was specifically requested.  It takes the same arguments as build_tree() and
409# returns the same data structure (XML::Parser 'Tree' style).
410#
411
412sub build_tree_xml_parser {
413  my $self     = shift;
414  my $filename = shift;
415  my $string   = shift;
416
417
418  eval {
419    local($^W) = 0;      # Suppress warning from Expat.pm re File::Spec::load()
420    require XML::Parser; # We didn't need it until now
421  };
422  if($@) {
423    croak "XMLin() requires either XML::SAX or XML::Parser";
424  }
425
426  if($self->{opt}->{nsexpand}) {
427    carp "'nsexpand' option requires XML::SAX";
428  }
429
430  my $xp = $self->new_xml_parser();
431
432  my($tree);
433  if($filename) {
434    # $tree = $xp->parsefile($filename);  # Changed due to prob w/mod_perl
435    open(my $xfh, '<', $filename) || croak qq($filename - $!);
436    $tree = $xp->parse($xfh);
437  }
438  else {
439    $tree = $xp->parse($$string);
440  }
441
442  return($tree);
443}
444
445
446##############################################################################
447# Method: new_xml_parser()
448#
449# Simply calls the XML::Parser constructor.  Override this method to customise
450# the behaviour of the parser.
451#
452
453sub new_xml_parser {
454  my($self) = @_;
455
456  my $xp = XML::Parser->new(Style => 'Tree', @{$self->{opt}->{parseropts}});
457  $xp->setHandlers(ExternEnt => sub {return $_[2]});
458
459  return $xp;
460}
461
462
463##############################################################################
464# Method: cache_write_storable()
465#
466# Wrapper routine for invoking Storable::nstore() to cache a parsed data
467# structure.
468#
469
470sub cache_write_storable {
471  my($self, $data, $filename) = @_;
472
473  my $cachefile = $self->storable_filename($filename);
474
475  require Storable;           # We didn't need it until now
476
477  if ('VMS' eq $^O) {
478    Storable::nstore($data, $cachefile);
479  }
480  else {
481    # If the following line fails for you, your Storable.pm is old - upgrade
482    Storable::lock_nstore($data, $cachefile);
483  }
484
485}
486
487
488##############################################################################
489# Method: cache_read_storable()
490#
491# Wrapper routine for invoking Storable::retrieve() to read a cached parsed
492# data structure.  Only returns cached data if the cache file exists and is
493# newer than the source XML file.
494#
495
496sub cache_read_storable {
497  my($self, $filename) = @_;
498
499  my $cachefile = $self->storable_filename($filename);
500
501  return unless(-r $cachefile);
502  return unless((stat($cachefile))[9] > (stat($filename))[9]);
503
504  require Storable;           # We didn't need it until now
505
506  if ('VMS' eq $^O) {
507    return(Storable::retrieve($cachefile));
508  }
509  else {
510    return(Storable::lock_retrieve($cachefile));
511  }
512
513}
514
515
516##############################################################################
517# Method: storable_filename()
518#
519# Translates the supplied source XML filename into a filename for the storable
520# cached data.  A '.stor' suffix is added after stripping an optional '.xml'
521# suffix.
522#
523
524sub storable_filename {
525  my($self, $cachefile) = @_;
526
527  $cachefile =~ s{(\.xml)?$}{.stor};
528  return $cachefile;
529}
530
531
532##############################################################################
533# Method: cache_write_memshare()
534#
535# Takes the supplied data structure reference and stores it away in a global
536# hash structure.
537#
538
539sub cache_write_memshare {
540  my($self, $data, $filename) = @_;
541
542  $MemShareCache{$filename} = [time(), $data];
543}
544
545
546##############################################################################
547# Method: cache_read_memshare()
548#
549# Takes a filename and looks in a global hash for a cached parsed version.
550#
551
552sub cache_read_memshare {
553  my($self, $filename) = @_;
554
555  return unless($MemShareCache{$filename});
556  return unless($MemShareCache{$filename}->[0] > (stat($filename))[9]);
557
558  return($MemShareCache{$filename}->[1]);
559
560}
561
562
563##############################################################################
564# Method: cache_write_memcopy()
565#
566# Takes the supplied data structure and stores a copy of it in a global hash
567# structure.
568#
569
570sub cache_write_memcopy {
571  my($self, $data, $filename) = @_;
572
573  require Storable;           # We didn't need it until now
574
575  $MemCopyCache{$filename} = [time(), Storable::dclone($data)];
576}
577
578
579##############################################################################
580# Method: cache_read_memcopy()
581#
582# Takes a filename and looks in a global hash for a cached parsed version.
583# Returns a reference to a copy of that data structure.
584#
585
586sub cache_read_memcopy {
587  my($self, $filename) = @_;
588
589  return unless($MemCopyCache{$filename});
590  return unless($MemCopyCache{$filename}->[0] > (stat($filename))[9]);
591
592  return(Storable::dclone($MemCopyCache{$filename}->[1]));
593
594}
595
596
597##############################################################################
598# Sub/Method: XMLout()
599#
600# Exported routine for 'unslurping' a data structure out to XML.
601#
602# Expects a reference to a data structure and an optional list of option
603# name => value pairs.
604#
605
606sub XMLout {
607  my $self = &_get_object;      # note, @_ is passed implicitly
608
609  croak "XMLout() requires at least one argument" unless(@_);
610  my $ref = shift;
611
612  $self->handle_options('out', @_);
613
614
615  # If namespace expansion is set, XML::NamespaceSupport is required
616
617  if($self->{opt}->{nsexpand}) {
618    require XML::NamespaceSupport;
619    $self->{nsup} = XML::NamespaceSupport->new();
620    $self->{ns_prefix} = 'aaa';
621  }
622
623
624  # Wrap top level arrayref in a hash
625
626  if(UNIVERSAL::isa($ref, 'ARRAY')) {
627    $ref = { anon => $ref };
628  }
629
630
631  # Extract rootname from top level hash if keeproot enabled
632
633  if($self->{opt}->{keeproot}) {
634    my(@keys) = keys(%$ref);
635    if(@keys == 1) {
636      $ref = $ref->{$keys[0]};
637      $self->{opt}->{rootname} = $keys[0];
638    }
639  }
640
641  # Ensure there are no top level attributes if we're not adding root elements
642
643  elsif($self->{opt}->{rootname} eq '') {
644    if(UNIVERSAL::isa($ref, 'HASH')) {
645      my $refsave = $ref;
646      $ref = {};
647      foreach (keys(%$refsave)) {
648        if(ref($refsave->{$_})) {
649          $ref->{$_} = $refsave->{$_};
650        }
651        else {
652          $ref->{$_} = [ $refsave->{$_} ];
653        }
654      }
655    }
656  }
657
658
659  # Encode the hashref and write to file if necessary
660
661  $self->{_ancestors} = {};
662  my $xml = $self->value_to_xml($ref, $self->{opt}->{rootname}, '');
663  delete $self->{_ancestors};
664
665  if($self->{opt}->{xmldecl}) {
666    $xml = $self->{opt}->{xmldecl} . "\n" . $xml;
667  }
668
669  if($self->{opt}->{outputfile}) {
670    if(ref($self->{opt}->{outputfile})) {
671      my $fh = $self->{opt}->{outputfile};
672      if(UNIVERSAL::isa($fh, 'GLOB') and !UNIVERSAL::can($fh, 'print')) {
673        eval { require IO::Handle; };
674        croak $@ if $@;
675      }
676      return($fh->print($xml));
677    }
678    else {
679      open(my $out, '>', "$self->{opt}->{outputfile}") ||
680        croak "open($self->{opt}->{outputfile}): $!";
681      binmode($out, ':utf8') if($] >= 5.008);
682      print $out $xml or croak "print: $!";
683      close $out or croak "close: $!";
684    }
685  }
686  elsif($self->{opt}->{handler}) {
687    require XML::SAX;
688    my $sp = XML::SAX::ParserFactory->parser(
689               Handler => $self->{opt}->{handler}
690             );
691    return($sp->parse_string($xml));
692  }
693  else {
694    return($xml);
695  }
696}
697
698
699##############################################################################
700# Method: handle_options()
701#
702# Helper routine for both XMLin() and XMLout().  Both routines handle their
703# first argument and assume all other args are options handled by this routine.
704# Saves a hash of options in $self->{opt}.
705#
706# If default options were passed to the constructor, they will be retrieved
707# here and merged with options supplied to the method call.
708#
709# First argument should be the string 'in' or the string 'out'.
710#
711# Remaining arguments should be name=>value pairs.  Sets up default values
712# for options not supplied.  Unrecognised options are a fatal error.
713#
714
715sub handle_options  {
716  my $self = shift;
717  my $dirn = shift;
718
719
720  # Determine valid options based on context
721
722  my %known_opt;
723  if($dirn eq 'in') {
724    @known_opt{@KnownOptIn} = @KnownOptIn;
725  }
726  else {
727    @known_opt{@KnownOptOut} = @KnownOptOut;
728  }
729
730
731  # Store supplied options in hashref and weed out invalid ones
732
733  if(@_ % 2) {
734    croak "Options must be name=>value pairs (odd number supplied)";
735  }
736  my %raw_opt  = @_;
737  my $opt      = {};
738  $self->{opt} = $opt;
739
740  while(my($key, $val) = each %raw_opt) {
741    my $lkey = lc($key);
742    $lkey =~ s/_//g;
743    croak "Unrecognised option: $key" unless($known_opt{$lkey});
744    $opt->{$lkey} = $val;
745  }
746
747
748  # Merge in options passed to constructor
749
750  foreach (keys(%known_opt)) {
751    unless(exists($opt->{$_})) {
752      if(exists($self->{def_opt}->{$_})) {
753        $opt->{$_} = $self->{def_opt}->{$_};
754      }
755    }
756  }
757
758
759  # Set sensible defaults if not supplied
760
761  if(exists($opt->{rootname})) {
762    unless(defined($opt->{rootname})) {
763      $opt->{rootname} = '';
764    }
765  }
766  else {
767    $opt->{rootname} = $DefRootName;
768  }
769
770  if($opt->{xmldecl}  and  $opt->{xmldecl} eq '1') {
771    $opt->{xmldecl} = $DefXmlDecl;
772  }
773
774  if(exists($opt->{contentkey})) {
775    if($opt->{contentkey} =~ m{^-(.*)$}) {
776      $opt->{contentkey} = $1;
777      $opt->{collapseagain} = 1;
778    }
779  }
780  else {
781    $opt->{contentkey} = $DefContentKey;
782  }
783
784  unless(exists($opt->{normalisespace})) {
785    $opt->{normalisespace} = $opt->{normalizespace};
786  }
787  $opt->{normalisespace} = 0 unless(defined($opt->{normalisespace}));
788
789  # Cleanups for values assumed to be arrays later
790
791  if($opt->{searchpath}) {
792    unless(ref($opt->{searchpath})) {
793      $opt->{searchpath} = [ $opt->{searchpath} ];
794    }
795  }
796  else  {
797    $opt->{searchpath} = [ ];
798  }
799
800  if($opt->{cache}  and !ref($opt->{cache})) {
801    $opt->{cache} = [ $opt->{cache} ];
802  }
803  if($opt->{cache}) {
804    $_ = lc($_) foreach (@{$opt->{cache}});
805    foreach my $scheme (@{$opt->{cache}}) {
806      my $method = 'cache_read_' . $scheme;
807      croak "Unsupported caching scheme: $scheme"
808        unless($self->can($method));
809    }
810  }
811
812  if(exists($opt->{parseropts})) {
813    if(warnings::enabled()) {
814      carp "Warning: " .
815           "'ParserOpts' is deprecated, contact the author if you need it";
816    }
817  }
818  else {
819    $opt->{parseropts} = [ ];
820  }
821
822
823  # Special cleanup for {forcearray} which could be regex, arrayref or boolean
824  # or left to default to 0
825
826  if(exists($opt->{forcearray})) {
827    if(ref($opt->{forcearray}) eq 'Regexp') {
828      $opt->{forcearray} = [ $opt->{forcearray} ];
829    }
830
831    if(ref($opt->{forcearray}) eq 'ARRAY') {
832      my @force_list = @{$opt->{forcearray}};
833      if(@force_list) {
834        $opt->{forcearray} = {};
835        foreach my $tag (@force_list) {
836          if(ref($tag) eq 'Regexp') {
837            push @{$opt->{forcearray}->{_regex}}, $tag;
838          }
839          else {
840            $opt->{forcearray}->{$tag} = 1;
841          }
842        }
843      }
844      else {
845        $opt->{forcearray} = 0;
846      }
847    }
848    else {
849      $opt->{forcearray} = ( $opt->{forcearray} ? 1 : 0 );
850    }
851  }
852  else {
853    if($opt->{strictmode}  and  $dirn eq 'in') {
854      croak "No value specified for 'ForceArray' option in call to XML$dirn()";
855    }
856    $opt->{forcearray} = 0;
857  }
858
859
860  # Special cleanup for {keyattr} which could be arrayref or hashref or left
861  # to default to arrayref
862
863  if(exists($opt->{keyattr}))  {
864    if(ref($opt->{keyattr})) {
865      if(ref($opt->{keyattr}) eq 'HASH') {
866
867        # Make a copy so we can mess with it
868
869        $opt->{keyattr} = { %{$opt->{keyattr}} };
870
871
872        # Convert keyattr => { elem => '+attr' }
873        # to keyattr => { elem => [ 'attr', '+' ] }
874
875        foreach my $el (keys(%{$opt->{keyattr}})) {
876          if($opt->{keyattr}->{$el} =~ /^(\+|-)?(.*)$/) {
877            $opt->{keyattr}->{$el} = [ $2, ($1 ? $1 : '') ];
878            if($opt->{strictmode}  and  $dirn eq 'in') {
879              next if($opt->{forcearray} == 1);
880              next if(ref($opt->{forcearray}) eq 'HASH'
881                      and $opt->{forcearray}->{$el});
882              croak "<$el> set in KeyAttr but not in ForceArray";
883            }
884          }
885          else {
886            delete($opt->{keyattr}->{$el}); # Never reached (famous last words?)
887          }
888        }
889      }
890      else {
891        if(@{$opt->{keyattr}} == 0) {
892          delete($opt->{keyattr});
893        }
894      }
895    }
896    else {
897      $opt->{keyattr} = [ $opt->{keyattr} ];
898    }
899  }
900  else  {
901    if($opt->{strictmode}) {
902      croak "No value specified for 'KeyAttr' option in call to XML$dirn()";
903    }
904    $opt->{keyattr} = [ @DefKeyAttr ];
905  }
906
907
908  # Special cleanup for {valueattr} which could be arrayref or hashref
909
910  if(exists($opt->{valueattr})) {
911    if(ref($opt->{valueattr}) eq 'ARRAY') {
912      $opt->{valueattrlist} = {};
913      $opt->{valueattrlist}->{$_} = 1 foreach(@{ delete $opt->{valueattr} });
914    }
915  }
916
917  # make sure there's nothing weird in {grouptags}
918
919  if($opt->{grouptags}) {
920    croak "Illegal value for 'GroupTags' option - expected a hashref"
921      unless UNIVERSAL::isa($opt->{grouptags}, 'HASH');
922
923    while(my($key, $val) = each %{$opt->{grouptags}}) {
924      next if $key ne $val;
925      croak "Bad value in GroupTags: '$key' => '$val'";
926    }
927  }
928
929
930  # Check the {variables} option is valid and initialise variables hash
931
932  if($opt->{variables} and !UNIVERSAL::isa($opt->{variables}, 'HASH')) {
933    croak "Illegal value for 'Variables' option - expected a hashref";
934  }
935
936  if($opt->{variables}) {
937    $self->{_var_values} = { %{$opt->{variables}} };
938  }
939  elsif($opt->{varattr}) {
940    $self->{_var_values} = {};
941  }
942
943}
944
945
946##############################################################################
947# Method: find_xml_file()
948#
949# Helper routine for XMLin().
950# Takes a filename, and a list of directories, attempts to locate the file in
951# the directories listed.
952# Returns a full pathname on success; croaks on failure.
953#
954
955sub find_xml_file  {
956  my $self = shift;
957  my $file = shift;
958  my @search_path = @_;
959
960
961  require File::Basename;
962  require File::Spec;
963
964  my($filename, $filedir) = File::Basename::fileparse($file);
965
966  if($filename ne $file) {        # Ignore searchpath if dir component
967    return($file) if(-e $file);
968  }
969  else {
970    my($path);
971    foreach $path (@search_path)  {
972      my $fullpath = File::Spec->catfile($path, $file);
973      return($fullpath) if(-e $fullpath);
974    }
975  }
976
977  # If user did not supply a search path, default to current directory
978
979  if(!@search_path) {
980    return($file) if(-e $file);
981    croak "File does not exist: $file";
982  }
983
984  croak "Could not find $file in ", join(':', @search_path);
985}
986
987
988##############################################################################
989# Method: collapse()
990#
991# Helper routine for XMLin().  This routine really comprises the 'smarts' (or
992# value add) of this module.
993#
994# Takes the parse tree that XML::Parser produced from the supplied XML and
995# recurses through it 'collapsing' unnecessary levels of indirection (nested
996# arrays etc) to produce a data structure that is easier to work with.
997#
998# Elements in the original parser tree are represented as an element name
999# followed by an arrayref.  The first element of the array is a hashref
1000# containing the attributes.  The rest of the array contains a list of any
1001# nested elements as name+arrayref pairs:
1002#
1003#  <element name>, [ { <attribute hashref> }, <element name>, [ ... ], ... ]
1004#
1005# The special element name '0' (zero) flags text content.
1006#
1007# This routine cuts down the noise by discarding any text content consisting of
1008# only whitespace and then moves the nested elements into the attribute hash
1009# using the name of the nested element as the hash key and the collapsed
1010# version of the nested element as the value.  Multiple nested elements with
1011# the same name will initially be represented as an arrayref, but this may be
1012# 'folded' into a hashref depending on the value of the keyattr option.
1013#
1014
1015sub collapse {
1016  my $self = shift;
1017
1018
1019  # Start with the hash of attributes
1020
1021  my $attr  = shift;
1022  if($self->{opt}->{noattr}) {                    # Discard if 'noattr' set
1023    $attr = $self->new_hashref;
1024  }
1025  elsif($self->{opt}->{normalisespace} == 2) {
1026    while(my($key, $value) = each %$attr) {
1027      $attr->{$key} = $self->normalise_space($value)
1028    }
1029  }
1030
1031
1032  # Do variable substitutions
1033
1034  if(my $var = $self->{_var_values}) {
1035    while(my($key, $val) = each(%$attr)) {
1036      $val =~ s^\$\{([\w.]+)\}^ $self->get_var($1) ^ge;
1037      $attr->{$key} = $val;
1038    }
1039  }
1040
1041
1042  # Roll up 'value' attributes (but only if no nested elements)
1043
1044  if(!@_  and  keys %$attr == 1) {
1045    my($k) = keys %$attr;
1046    if($self->{opt}->{valueattrlist}  and $self->{opt}->{valueattrlist}->{$k}) {
1047      return $attr->{$k};
1048    }
1049  }
1050
1051
1052  # Add any nested elements
1053
1054  my($key, $val);
1055  while(@_) {
1056    $key = shift;
1057    $val = shift;
1058    $val = '' if not defined $val;
1059
1060    if(ref($val)) {
1061      $val = $self->collapse(@$val);
1062      next if(!defined($val)  and  $self->{opt}->{suppressempty});
1063    }
1064    elsif($key eq '0') {
1065      next if($val =~ m{^\s*$}s);  # Skip all whitespace content
1066
1067      $val = $self->normalise_space($val)
1068        if($self->{opt}->{normalisespace} == 2);
1069
1070      # do variable substitutions
1071
1072      if(my $var = $self->{_var_values}) {
1073        $val =~ s^\$\{(\w+)\}^ $self->get_var($1) ^ge;
1074      }
1075
1076
1077      # look for variable definitions
1078
1079      if(my $var = $self->{opt}->{varattr}) {
1080        if(exists $attr->{$var}) {
1081          $self->set_var($attr->{$var}, $val);
1082        }
1083      }
1084
1085
1086      # Collapse text content in element with no attributes to a string
1087
1088      if(!%$attr  and  !@_) {
1089        return($self->{opt}->{forcecontent} ?
1090          { $self->{opt}->{contentkey} => $val } : $val
1091        );
1092      }
1093      $key = $self->{opt}->{contentkey};
1094    }
1095
1096
1097    # Combine duplicate attributes into arrayref if required
1098
1099    if(exists($attr->{$key})) {
1100      if(UNIVERSAL::isa($attr->{$key}, 'ARRAY')) {
1101        push(@{$attr->{$key}}, $val);
1102      }
1103      else {
1104        $attr->{$key} = [ $attr->{$key}, $val ];
1105      }
1106    }
1107    elsif(defined($val)  and  UNIVERSAL::isa($val, 'ARRAY')) {
1108      $attr->{$key} = [ $val ];
1109    }
1110    else {
1111      if( $key ne $self->{opt}->{contentkey}
1112          and (
1113            ($self->{opt}->{forcearray} == 1)
1114            or (
1115              (ref($self->{opt}->{forcearray}) eq 'HASH')
1116              and (
1117                $self->{opt}->{forcearray}->{$key}
1118                or (grep $key =~ $_, @{$self->{opt}->{forcearray}->{_regex}})
1119              )
1120            )
1121          )
1122        ) {
1123        $attr->{$key} = [ $val ];
1124      }
1125      else {
1126        $attr->{$key} = $val;
1127      }
1128    }
1129
1130  }
1131
1132
1133  # Turn arrayrefs into hashrefs if key fields present
1134
1135  if($self->{opt}->{keyattr}) {
1136    while(($key,$val) = each %$attr) {
1137      if(defined($val)  and  UNIVERSAL::isa($val, 'ARRAY')) {
1138        $attr->{$key} = $self->array_to_hash($key, $val);
1139      }
1140    }
1141  }
1142
1143
1144  # disintermediate grouped tags
1145
1146  if($self->{opt}->{grouptags}) {
1147    while(my($key, $val) = each(%$attr)) {
1148      next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1149      next unless(exists($self->{opt}->{grouptags}->{$key}));
1150
1151      my($child_key, $child_val) =  %$val;
1152
1153      if($self->{opt}->{grouptags}->{$key} eq $child_key) {
1154        $attr->{$key}= $child_val;
1155      }
1156    }
1157  }
1158
1159
1160  # Fold hashes containing a single anonymous array up into just the array
1161
1162  my $count = scalar keys %$attr;
1163  if($count == 1
1164     and  exists $attr->{anon}
1165     and  UNIVERSAL::isa($attr->{anon}, 'ARRAY')
1166  ) {
1167    return($attr->{anon});
1168  }
1169
1170
1171  # Do the right thing if hash is empty, otherwise just return it
1172
1173  if(!%$attr  and  exists($self->{opt}->{suppressempty})) {
1174    if(defined($self->{opt}->{suppressempty})  and
1175       $self->{opt}->{suppressempty} eq '') {
1176      return('');
1177    }
1178    return(undef);
1179  }
1180
1181
1182  # Roll up named elements with named nested 'value' attributes
1183
1184  if($self->{opt}->{valueattr}) {
1185    while(my($key, $val) = each(%$attr)) {
1186      next unless($self->{opt}->{valueattr}->{$key});
1187      next unless(UNIVERSAL::isa($val, 'HASH') and (keys %$val == 1));
1188      my($k) = keys %$val;
1189      next unless($k eq $self->{opt}->{valueattr}->{$key});
1190      $attr->{$key} = $val->{$k};
1191    }
1192  }
1193
1194  return($attr)
1195
1196}
1197
1198
1199##############################################################################
1200# Method: set_var()
1201#
1202# Called when a variable definition is encountered in the XML.  (A variable
1203# definition looks like <element attrname="name">value</element> where attrname
1204# matches the varattr setting).
1205#
1206
1207sub set_var {
1208  my($self, $name, $value) = @_;
1209
1210  $self->{_var_values}->{$name} = $value;
1211}
1212
1213
1214##############################################################################
1215# Method: get_var()
1216#
1217# Called during variable substitution to get the value for the named variable.
1218#
1219
1220sub get_var {
1221  my($self, $name) = @_;
1222
1223  my $value = $self->{_var_values}->{$name};
1224  return $value if(defined($value));
1225
1226  return '${' . $name . '}';
1227}
1228
1229
1230##############################################################################
1231# Method: normalise_space()
1232#
1233# Strips leading and trailing whitespace and collapses sequences of whitespace
1234# characters to a single space.
1235#
1236
1237sub normalise_space {
1238  my($self, $text) = @_;
1239
1240  $text =~ s/^\s+//s;
1241  $text =~ s/\s+$//s;
1242  $text =~ s/\s\s+/ /sg;
1243
1244  return $text;
1245}
1246
1247
1248##############################################################################
1249# Method: array_to_hash()
1250#
1251# Helper routine for collapse().
1252# Attempts to 'fold' an array of hashes into an hash of hashes.  Returns a
1253# reference to the hash on success or the original array if folding is
1254# not possible.  Behaviour is controlled by 'keyattr' option.
1255#
1256
1257sub array_to_hash {
1258  my $self     = shift;
1259  my $name     = shift;
1260  my $arrayref = shift;
1261
1262  my $hashref  = $self->new_hashref;
1263
1264  my($i, $key, $val, $flag);
1265
1266
1267  # Handle keyattr => { .... }
1268
1269  if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1270    return($arrayref) unless(exists($self->{opt}->{keyattr}->{$name}));
1271    ($key, $flag) = @{$self->{opt}->{keyattr}->{$name}};
1272    for($i = 0; $i < @$arrayref; $i++)  {
1273      if(UNIVERSAL::isa($arrayref->[$i], 'HASH') and
1274         exists($arrayref->[$i]->{$key})
1275      ) {
1276        $val = $arrayref->[$i]->{$key};
1277        if(ref($val)) {
1278          $self->die_or_warn("<$name> element has non-scalar '$key' key attribute");
1279          return($arrayref);
1280        }
1281        $val = $self->normalise_space($val)
1282          if($self->{opt}->{normalisespace} == 1);
1283        $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1284          if(exists($hashref->{$val}));
1285        $hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} );
1286        $hashref->{$val}->{"-$key"} = $hashref->{$val}->{$key} if($flag eq '-');
1287        delete $hashref->{$val}->{$key} unless($flag eq '+');
1288      }
1289      else {
1290        $self->die_or_warn("<$name> element has no '$key' key attribute");
1291        return($arrayref);
1292      }
1293    }
1294  }
1295
1296
1297  # Or assume keyattr => [ .... ]
1298
1299  else {
1300    my $default_keys =
1301      join(',', @DefKeyAttr) eq join(',', @{$self->{opt}->{keyattr}});
1302
1303    ELEMENT: for($i = 0; $i < @$arrayref; $i++)  {
1304      return($arrayref) unless(UNIVERSAL::isa($arrayref->[$i], 'HASH'));
1305
1306      foreach $key (@{$self->{opt}->{keyattr}}) {
1307        if(defined($arrayref->[$i]->{$key}))  {
1308          $val = $arrayref->[$i]->{$key};
1309          if(ref($val)) {
1310            $self->die_or_warn("<$name> element has non-scalar '$key' key attribute")
1311              if not $default_keys;
1312            return($arrayref);
1313          }
1314          $val = $self->normalise_space($val)
1315            if($self->{opt}->{normalisespace} == 1);
1316          $self->die_or_warn("<$name> element has non-unique value in '$key' key attribute: $val")
1317            if(exists($hashref->{$val}));
1318          $hashref->{$val} = $self->new_hashref( %{$arrayref->[$i]} );
1319          delete $hashref->{$val}->{$key};
1320          next ELEMENT;
1321        }
1322      }
1323
1324      return($arrayref);    # No keyfield matched
1325    }
1326  }
1327
1328  # collapse any hashes which now only have a 'content' key
1329
1330  if($self->{opt}->{collapseagain}) {
1331    $hashref = $self->collapse_content($hashref);
1332  }
1333
1334  return($hashref);
1335}
1336
1337
1338##############################################################################
1339# Method: die_or_warn()
1340#
1341# Takes a diagnostic message and does one of three things:
1342# 1. dies if strict mode is enabled
1343# 2. warns if warnings are enabled but strict mode is not
1344# 3. ignores message and returns silently if neither strict mode nor warnings
1345#    are enabled
1346#
1347
1348sub die_or_warn {
1349  my $self = shift;
1350  my $msg  = shift;
1351
1352  croak $msg if($self->{opt}->{strictmode});
1353  if(warnings::enabled()) {
1354    carp "Warning: $msg";
1355  }
1356}
1357
1358
1359##############################################################################
1360# Method: new_hashref()
1361#
1362# This is a hook routine for overriding in a sub-class.  Some people believe
1363# that using Tie::IxHash here will solve order-loss problems.
1364#
1365
1366sub new_hashref {
1367  my $self = shift;
1368
1369  return { @_ };
1370}
1371
1372
1373##############################################################################
1374# Method: collapse_content()
1375#
1376# Helper routine for array_to_hash
1377#
1378# Arguments expected are:
1379# - an XML::Simple object
1380# - a hashref
1381# the hashref is a former array, turned into a hash by array_to_hash because
1382# of the presence of key attributes
1383# at this point collapse_content avoids over-complicated structures like
1384# dir => { libexecdir    => { content => '$exec_prefix/libexec' },
1385#          localstatedir => { content => '$prefix' },
1386#        }
1387# into
1388# dir => { libexecdir    => '$exec_prefix/libexec',
1389#          localstatedir => '$prefix',
1390#        }
1391
1392sub collapse_content {
1393  my $self       = shift;
1394  my $hashref    = shift;
1395
1396  my $contentkey = $self->{opt}->{contentkey};
1397
1398  # first go through the values,checking that they are fit to collapse
1399  foreach my $val (values %$hashref) {
1400    return $hashref unless (     (ref($val) eq 'HASH')
1401                             and (keys %$val == 1)
1402                             and (exists $val->{$contentkey})
1403                           );
1404  }
1405
1406  # now collapse them
1407  foreach my $key (keys %$hashref) {
1408    $hashref->{$key}=  $hashref->{$key}->{$contentkey};
1409  }
1410
1411  return $hashref;
1412}
1413
1414
1415##############################################################################
1416# Method: value_to_xml()
1417#
1418# Helper routine for XMLout() - recurses through a data structure building up
1419# and returning an XML representation of that structure as a string.
1420#
1421# Arguments expected are:
1422# - the data structure to be encoded (usually a reference)
1423# - the XML tag name to use for this item
1424# - a string of spaces for use as the current indent level
1425#
1426
1427sub value_to_xml {
1428  my $self = shift;;
1429
1430
1431  # Grab the other arguments
1432
1433  my($ref, $name, $indent) = @_;
1434
1435  my $named = (defined($name) and $name ne '' ? 1 : 0);
1436
1437  my $nl = "\n";
1438
1439  my $is_root = $indent eq '' ? 1 : 0;   # Warning, dirty hack!
1440  if($self->{opt}->{noindent}) {
1441    $indent = '';
1442    $nl     = '';
1443  }
1444
1445
1446  # Convert to XML
1447
1448  my $refaddr = Scalar::Util::refaddr($ref);
1449  if($refaddr) {
1450    croak "circular data structures not supported"
1451      if $self->{_ancestors}->{$refaddr};
1452    $self->{_ancestors}->{$refaddr} = $ref;  # keep ref alive until we delete it
1453  }
1454  else {
1455    if($named) {
1456      return(join('',
1457              $indent, '<', $name, '>',
1458              ($self->{opt}->{noescape} ? $ref : $self->escape_value($ref)),
1459              '</', $name, ">", $nl
1460            ));
1461    }
1462    else {
1463      return("$ref$nl");
1464    }
1465  }
1466
1467
1468  # Unfold hash to array if possible
1469
1470  if(UNIVERSAL::isa($ref, 'HASH')      # It is a hash
1471     and keys %$ref                    # and it's not empty
1472     and $self->{opt}->{keyattr}       # and folding is enabled
1473     and !$is_root                     # and its not the root element
1474  ) {
1475    $ref = $self->hash_to_array($name, $ref);
1476  }
1477
1478
1479  my @result = ();
1480  my($key, $value);
1481
1482
1483  # Handle hashrefs
1484
1485  if(UNIVERSAL::isa($ref, 'HASH')) {
1486
1487    # Reintermediate grouped values if applicable
1488
1489    if($self->{opt}->{grouptags}) {
1490      $ref = $self->copy_hash($ref);
1491      while(my($key, $val) = each %$ref) {
1492        if($self->{opt}->{grouptags}->{$key}) {
1493          $ref->{$key} = $self->new_hashref(
1494            $self->{opt}->{grouptags}->{$key} => $val
1495          );
1496        }
1497      }
1498    }
1499
1500
1501    # Scan for namespace declaration attributes
1502
1503    my $nsdecls = '';
1504    my $default_ns_uri;
1505    if($self->{nsup}) {
1506      $ref = $self->copy_hash($ref);
1507      $self->{nsup}->push_context();
1508
1509      # Look for default namespace declaration first
1510
1511      if(exists($ref->{xmlns})) {
1512        $self->{nsup}->declare_prefix('', $ref->{xmlns});
1513        $nsdecls .= qq( xmlns="$ref->{xmlns}");
1514        delete($ref->{xmlns});
1515      }
1516      $default_ns_uri = $self->{nsup}->get_uri('');
1517
1518
1519      # Then check all the other keys
1520
1521      foreach my $qname (keys(%$ref)) {
1522        my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1523        if($uri) {
1524          if($uri eq $xmlns_ns) {
1525            $self->{nsup}->declare_prefix($lname, $ref->{$qname});
1526            $nsdecls .= qq( xmlns:$lname="$ref->{$qname}");
1527            delete($ref->{$qname});
1528          }
1529        }
1530      }
1531
1532      # Translate any remaining Clarkian names
1533
1534      foreach my $qname (keys(%$ref)) {
1535        my($uri, $lname) = $self->{nsup}->parse_jclark_notation($qname);
1536        if($uri) {
1537          if($default_ns_uri  and  $uri eq $default_ns_uri) {
1538            $ref->{$lname} = $ref->{$qname};
1539            delete($ref->{$qname});
1540          }
1541          else {
1542            my $prefix = $self->{nsup}->get_prefix($uri);
1543            unless($prefix) {
1544              # $self->{nsup}->declare_prefix(undef, $uri);
1545              # $prefix = $self->{nsup}->get_prefix($uri);
1546              $prefix = $self->{ns_prefix}++;
1547              $self->{nsup}->declare_prefix($prefix, $uri);
1548              $nsdecls .= qq( xmlns:$prefix="$uri");
1549            }
1550            $ref->{"$prefix:$lname"} = $ref->{$qname};
1551            delete($ref->{$qname});
1552          }
1553        }
1554      }
1555    }
1556
1557
1558    my @nested = ();
1559    my $text_content = undef;
1560    if($named) {
1561      push @result, $indent, '<', $name, $nsdecls;
1562    }
1563
1564    if(keys %$ref) {
1565      my $first_arg = 1;
1566      foreach my $key ($self->sorted_keys($name, $ref)) {
1567        my $value = $ref->{$key};
1568        next if(substr($key, 0, 1) eq '-');
1569        if(!defined($value)) {
1570          next if $self->{opt}->{suppressempty};
1571          unless(exists($self->{opt}->{suppressempty})
1572             and !defined($self->{opt}->{suppressempty})
1573          ) {
1574            carp 'Use of uninitialized value' if warnings::enabled();
1575          }
1576          if($key eq $self->{opt}->{contentkey}) {
1577            $text_content = '';
1578          }
1579          else {
1580            $value = exists($self->{opt}->{suppressempty}) ? {} : '';
1581          }
1582        }
1583
1584        if(!ref($value)
1585           and $self->{opt}->{valueattr}
1586           and $self->{opt}->{valueattr}->{$key}
1587        ) {
1588          $value = $self->new_hashref(
1589            $self->{opt}->{valueattr}->{$key} => $value
1590          );
1591        }
1592
1593        if(ref($value)  or  $self->{opt}->{noattr}) {
1594          push @nested,
1595            $self->value_to_xml($value, $key, "$indent  ");
1596        }
1597        else {
1598          if($key eq $self->{opt}->{contentkey}) {
1599            $value = $self->escape_value($value) unless($self->{opt}->{noescape});
1600            $text_content = $value;
1601          }
1602          else {
1603            $value = $self->escape_attr($value) unless($self->{opt}->{noescape});
1604            push @result, "\n$indent " . ' ' x length($name)
1605              if($self->{opt}->{attrindent}  and  !$first_arg);
1606            push @result, ' ', $key, '="', $value , '"';
1607            $first_arg = 0;
1608          }
1609        }
1610      }
1611    }
1612    else {
1613      $text_content = '';
1614    }
1615
1616    if(@nested  or  defined($text_content)) {
1617      if($named) {
1618        push @result, ">";
1619        if(defined($text_content)) {
1620          push @result, $text_content;
1621          $nested[0] =~ s/^\s+// if(@nested);
1622        }
1623        else {
1624          push @result, $nl;
1625        }
1626        if(@nested) {
1627          push @result, @nested, $indent;
1628        }
1629        push @result, '</', $name, ">", $nl;
1630      }
1631      else {
1632        push @result, @nested;             # Special case if no root elements
1633      }
1634    }
1635    else {
1636      push @result, " />", $nl;
1637    }
1638    $self->{nsup}->pop_context() if($self->{nsup});
1639  }
1640
1641
1642  # Handle arrayrefs
1643
1644  elsif(UNIVERSAL::isa($ref, 'ARRAY')) {
1645    foreach $value (@$ref) {
1646      next if !defined($value) and $self->{opt}->{suppressempty};
1647      if(!ref($value)) {
1648        push @result,
1649             $indent, '<', $name, '>',
1650             ($self->{opt}->{noescape} ? $value : $self->escape_value($value)),
1651             '</', $name, ">$nl";
1652      }
1653      elsif(UNIVERSAL::isa($value, 'HASH')) {
1654        push @result, $self->value_to_xml($value, $name, $indent);
1655      }
1656      else {
1657        push @result,
1658               $indent, '<', $name, ">$nl",
1659               $self->value_to_xml($value, 'anon', "$indent  "),
1660               $indent, '</', $name, ">$nl";
1661      }
1662    }
1663  }
1664
1665  else {
1666    croak "Can't encode a value of type: " . ref($ref);
1667  }
1668
1669
1670  delete $self->{_ancestors}->{$refaddr};
1671
1672  return(join('', @result));
1673}
1674
1675
1676##############################################################################
1677# Method: sorted_keys()
1678#
1679# Returns the keys of the referenced hash sorted into alphabetical order, but
1680# with the 'key' key (as in KeyAttr) first, if there is one.
1681#
1682
1683sub sorted_keys {
1684  my($self, $name, $ref) = @_;
1685
1686  return keys %$ref if $self->{opt}->{nosort};
1687
1688  my %hash = %$ref;
1689  my $keyattr = $self->{opt}->{keyattr};
1690
1691  my @key;
1692
1693  if(ref $keyattr eq 'HASH') {
1694    if(exists $keyattr->{$name} and exists $hash{$keyattr->{$name}->[0]}) {
1695      push @key, $keyattr->{$name}->[0];
1696      delete $hash{$keyattr->{$name}->[0]};
1697    }
1698  }
1699  elsif(ref $keyattr eq 'ARRAY') {
1700    foreach (@{$keyattr}) {
1701      if(exists $hash{$_}) {
1702        push @key, $_;
1703        delete $hash{$_};
1704        last;
1705      }
1706    }
1707  }
1708
1709  return(@key, sort keys %hash);
1710}
1711
1712##############################################################################
1713# Method: escape_value()
1714#
1715# Helper routine for automatically escaping values for XMLout().
1716# Expects a scalar data value.  Returns escaped version.
1717#
1718
1719sub escape_value {
1720  my($self, $data) = @_;
1721
1722  return '' unless(defined($data));
1723
1724  $data =~ s/&/&amp;/sg;
1725  $data =~ s/</&lt;/sg;
1726  $data =~ s/>/&gt;/sg;
1727  $data =~ s/"/&quot;/sg;
1728
1729  my $level = $self->{opt}->{numericescape} or return $data;
1730
1731  return $self->numeric_escape($data, $level);
1732}
1733
1734sub numeric_escape {
1735  my($self, $data, $level) = @_;
1736
1737  if($self->{opt}->{numericescape} eq '2') {
1738    $data =~ s/([^\x00-\x7F])/'&#' . ord($1) . ';'/gse;
1739  }
1740  else {
1741    $data =~ s/([^\x00-\xFF])/'&#' . ord($1) . ';'/gse;
1742  }
1743
1744  return $data;
1745}
1746
1747##############################################################################
1748# Method: escape_attr()
1749#
1750# Helper routine for escaping attribute values.  Defaults to escape_value(),
1751# but may be overridden by a subclass to customise behaviour.
1752#
1753
1754sub escape_attr {
1755  my $self = shift;
1756
1757  return $self->escape_value(@_);
1758}
1759
1760
1761##############################################################################
1762# Method: hash_to_array()
1763#
1764# Helper routine for value_to_xml().
1765# Attempts to 'unfold' a hash of hashes into an array of hashes.  Returns a
1766# reference to the array on success or the original hash if unfolding is
1767# not possible.
1768#
1769
1770sub hash_to_array {
1771  my $self    = shift;
1772  my $parent  = shift;
1773  my $hashref = shift;
1774
1775  my $arrayref = [];
1776
1777  my($key, $value);
1778
1779  my @keys = $self->{opt}->{nosort} ? keys %$hashref : sort keys %$hashref;
1780  foreach $key (@keys) {
1781    $value = $hashref->{$key};
1782    return($hashref) unless(UNIVERSAL::isa($value, 'HASH'));
1783
1784    if(ref($self->{opt}->{keyattr}) eq 'HASH') {
1785      return($hashref) unless(defined($self->{opt}->{keyattr}->{$parent}));
1786      push @$arrayref, $self->copy_hash(
1787        $value, $self->{opt}->{keyattr}->{$parent}->[0] => $key
1788      );
1789    }
1790    else {
1791      push(@$arrayref, { $self->{opt}->{keyattr}->[0] => $key, %$value });
1792    }
1793  }
1794
1795  return($arrayref);
1796}
1797
1798
1799##############################################################################
1800# Method: copy_hash()
1801#
1802# Helper routine for hash_to_array().  When unfolding a hash of hashes into
1803# an array of hashes, we need to copy the key from the outer hash into the
1804# inner hash.  This routine makes a copy of the original hash so we don't
1805# destroy the original data structure.  You might wish to override this
1806# method if you're using tied hashes and don't want them to get untied.
1807#
1808
1809sub copy_hash {
1810  my($self, $orig, @extra) = @_;
1811
1812  return { @extra, %$orig };
1813}
1814
1815##############################################################################
1816# Methods required for building trees from SAX events
1817##############################################################################
1818
1819sub start_document {
1820  my $self = shift;
1821
1822  $self->handle_options('in') unless($self->{opt});
1823
1824  $self->{lists} = [];
1825  $self->{curlist} = $self->{tree} = [];
1826}
1827
1828
1829sub start_element {
1830  my $self    = shift;
1831  my $element = shift;
1832
1833  my $name = $element->{Name};
1834  if($self->{opt}->{nsexpand}) {
1835    $name = $element->{LocalName} || '';
1836    if($element->{NamespaceURI}) {
1837      $name = '{' . $element->{NamespaceURI} . '}' . $name;
1838    }
1839  }
1840  my $attributes = {};
1841  if($element->{Attributes}) {  # Might be undef
1842    foreach my $attr (values %{$element->{Attributes}}) {
1843      if($self->{opt}->{nsexpand}) {
1844        my $name = $attr->{LocalName} || '';
1845        if($attr->{NamespaceURI}) {
1846          $name = '{' . $attr->{NamespaceURI} . '}' . $name
1847        }
1848        $name = 'xmlns' if($name eq $bad_def_ns_jcn);
1849        $attributes->{$name} = $attr->{Value};
1850      }
1851      else {
1852        $attributes->{$attr->{Name}} = $attr->{Value};
1853      }
1854    }
1855  }
1856  my $newlist = [ $attributes ];
1857  push @{ $self->{lists} }, $self->{curlist};
1858  push @{ $self->{curlist} }, $name => $newlist;
1859  $self->{curlist} = $newlist;
1860}
1861
1862
1863sub characters {
1864  my $self  = shift;
1865  my $chars = shift;
1866
1867  my $text  = $chars->{Data};
1868  my $clist = $self->{curlist};
1869  my $pos = $#$clist;
1870
1871  if ($pos > 0 and $clist->[$pos - 1] eq '0') {
1872    $clist->[$pos] .= $text;
1873  }
1874  else {
1875    push @$clist, 0 => $text;
1876  }
1877}
1878
1879
1880sub end_element {
1881  my $self    = shift;
1882
1883  $self->{curlist} = pop @{ $self->{lists} };
1884}
1885
1886
1887sub end_document {
1888  my $self = shift;
1889
1890  delete($self->{curlist});
1891  delete($self->{lists});
1892
1893  my $tree = $self->{tree};
1894  delete($self->{tree});
1895
1896
1897  # Return tree as-is to XMLin()
1898
1899  return($tree) if($self->{nocollapse});
1900
1901
1902  # Or collapse it before returning it to SAX parser class
1903
1904  if($self->{opt}->{keeproot}) {
1905    $tree = $self->collapse({}, @$tree);
1906  }
1907  else {
1908    $tree = $self->collapse(@{$tree->[1]});
1909  }
1910
1911  if($self->{opt}->{datahandler}) {
1912    return($self->{opt}->{datahandler}->($self, $tree));
1913  }
1914
1915  return($tree);
1916}
1917
1918*xml_in  = \&XMLin;
1919*xml_out = \&XMLout;
1920
19211;
1922
1923__END__
1924
1925=head1 STATUS OF THIS MODULE
1926
1927The use of this module in new code is B<strongly discouraged>.  Other modules
1928are available which provide more straightforward and consistent interfaces.  In
1929particular, L<XML::LibXML> is highly recommended and you can refer to
1930L<Perl XML::LibXML by Example|http://grantm.github.io/perl-libxml-by-example/>
1931for a tutorial introduction.
1932
1933L<XML::Twig> is another excellent alternative.
1934
1935The major problems with this module are the large number of options (some of
1936which have unfortunate defaults) and the arbitrary ways in which these options
1937interact - often producing unexpected results.
1938
1939Patches with bug fixes and documentation fixes are welcome, but new features
1940are unlikely to be added.
1941
1942=head1 QUICK START
1943
1944Say you have a script called B<foo> and a file of configuration options
1945called B<foo.xml> containing the following:
1946
1947  <config logdir="/var/log/foo/" debugfile="/tmp/foo.debug">
1948    <server name="sahara" osname="solaris" osversion="2.6">
1949      <address>10.0.0.101</address>
1950      <address>10.0.1.101</address>
1951    </server>
1952    <server name="gobi" osname="irix" osversion="6.5">
1953      <address>10.0.0.102</address>
1954    </server>
1955    <server name="kalahari" osname="linux" osversion="2.0.34">
1956      <address>10.0.0.103</address>
1957      <address>10.0.1.103</address>
1958    </server>
1959  </config>
1960
1961The following lines of code in B<foo>:
1962
1963  use XML::Simple qw(:strict);
1964
1965  my $config = XMLin(undef, KeyAttr => { server => 'name' }, ForceArray => [ 'server', 'address' ]);
1966
1967will 'slurp' the configuration options into the hashref $config (because no
1968filename or XML string was passed as the first argument to C<XMLin()> the name
1969and location of the XML file will be inferred from name and location of the
1970script).  You can dump out the contents of the hashref using Data::Dumper:
1971
1972  use Data::Dumper;
1973
1974  print Dumper($config);
1975
1976which will produce something like this (formatting has been adjusted for
1977brevity):
1978
1979  {
1980      'logdir'        => '/var/log/foo/',
1981      'debugfile'     => '/tmp/foo.debug',
1982      'server'        => {
1983          'sahara'        => {
1984              'osversion'     => '2.6',
1985              'osname'        => 'solaris',
1986              'address'       => [ '10.0.0.101', '10.0.1.101' ]
1987          },
1988          'gobi'          => {
1989              'osversion'     => '6.5',
1990              'osname'        => 'irix',
1991              'address'       => [ '10.0.0.102' ]
1992          },
1993          'kalahari'      => {
1994              'osversion'     => '2.0.34',
1995              'osname'        => 'linux',
1996              'address'       => [ '10.0.0.103', '10.0.1.103' ]
1997          }
1998      }
1999  }
2000
2001Your script could then access the name of the log directory like this:
2002
2003  print $config->{logdir};
2004
2005similarly, the second address on the server 'kalahari' could be referenced as:
2006
2007  print $config->{server}->{kalahari}->{address}->[1];
2008
2009Note: If the mapping between the output of Data::Dumper and the print
2010statements above is not obvious to you, then please refer to the 'references'
2011tutorial (AKA: "Mark's very short tutorial about references") at L<perlreftut>.
2012
2013In this example, the C<< ForceArray >> option was used to list elements that
2014might occur multiple times and should therefore be represented as arrayrefs
2015(even when only one element is present).
2016
2017The C<< KeyAttr >> option was used to indicate that each C<< <server> >>
2018element has a unique identifier in the C<< name >> attribute.  This allows you
2019to index directly to a particular server record using the name as a hash key
2020(as shown above).
2021
2022For simple requirements, that's really all there is to it.  If you want to
2023store your XML in a different directory or file, or pass it in as a string or
2024even pass it in via some derivative of an IO::Handle, you'll need to check out
2025L<"OPTIONS">.  If you want to turn off or tweak the array folding feature (that
2026neat little transformation that produced $config->{server}) you'll find options
2027for that as well.
2028
2029If you want to generate XML (for example to write a modified version of
2030$config back out as XML), check out C<XMLout()>.
2031
2032If your needs are not so simple, this may not be the module for you.  In that
2033case, you might want to read L<"WHERE TO FROM HERE?">.
2034
2035=head1 DESCRIPTION
2036
2037The XML::Simple module provides a simple API layer on top of an underlying XML
2038parsing module (either XML::Parser or one of the SAX2 parser modules).  Two
2039functions are exported: C<XMLin()> and C<XMLout()>.  Note: you can explicitly
2040request the lower case versions of the function names: C<xml_in()> and
2041C<xml_out()>.
2042
2043The simplest approach is to call these two functions directly, but an
2044optional object oriented interface (see L<"OPTIONAL OO INTERFACE"> below)
2045allows them to be called as methods of an B<XML::Simple> object.  The object
2046interface can also be used at either end of a SAX pipeline.
2047
2048=head2 XMLin()
2049
2050Parses XML formatted data and returns a reference to a data structure which
2051contains the same information in a more readily accessible form.  (Skip
2052down to L<"EXAMPLES"> below, for more sample code).
2053
2054C<XMLin()> accepts an optional XML specifier followed by zero or more 'name =>
2055value' option pairs.  The XML specifier can be one of the following:
2056
2057=over 4
2058
2059=item A filename
2060
2061If the filename contains no directory components C<XMLin()> will look for the
2062file in each directory in the SearchPath (see L<"OPTIONS"> below) or in the
2063current directory if the SearchPath option is not defined.  eg:
2064
2065  $ref = XMLin('/etc/params.xml');
2066
2067Note, the filename '-' can be used to parse from STDIN.
2068
2069=item undef
2070
2071If there is no XML specifier, C<XMLin()> will check the script directory and
2072each of the SearchPath directories for a file with the same name as the script
2073but with the extension '.xml'.  Note: if you wish to specify options, you
2074must specify the value 'undef'.  eg:
2075
2076  $ref = XMLin(undef, ForceArray => 1);
2077
2078=item A string of XML
2079
2080A string containing XML (recognised by the presence of '<' and '>' characters)
2081will be parsed directly.  eg:
2082
2083  $ref = XMLin('<opt username="bob" password="flurp" />');
2084
2085=item An IO::Handle object
2086
2087An IO::Handle object will be read to EOF and its contents parsed. eg:
2088
2089  $fh = IO::File->new('/etc/params.xml');
2090  $ref = XMLin($fh);
2091
2092=back
2093
2094=head2 XMLout()
2095
2096Takes a data structure (generally a hashref) and returns an XML encoding of
2097that structure.  If the resulting XML is parsed using C<XMLin()>, it should
2098return a data structure equivalent to the original (see caveats below).
2099
2100The C<XMLout()> function can also be used to output the XML as SAX events
2101see the C<Handler> option and L<"SAX SUPPORT"> for more details).
2102
2103When translating hashes to XML, hash keys which have a leading '-' will be
2104silently skipped.  This is the approved method for marking elements of a
2105data structure which should be ignored by C<XMLout>.  (Note: If these items
2106were not skipped the key names would be emitted as element or attribute names
2107with a leading '-' which would not be valid XML).
2108
2109=head2 Caveats
2110
2111Some care is required in creating data structures which will be passed to
2112C<XMLout()>.  Hash keys from the data structure will be encoded as either XML
2113element names or attribute names.  Therefore, you should use hash key names
2114which conform to the relatively strict XML naming rules:
2115
2116Names in XML must begin with a letter.  The remaining characters may be
2117letters, digits, hyphens (-), underscores (_) or full stops (.).  It is also
2118allowable to include one colon (:) in an element name but this should only be
2119used when working with namespaces (B<XML::Simple> can only usefully work with
2120namespaces when teamed with a SAX Parser).
2121
2122You can use other punctuation characters in hash values (just not in hash
2123keys) however B<XML::Simple> does not support dumping binary data.
2124
2125If you break these rules, the current implementation of C<XMLout()> will
2126simply emit non-compliant XML which will be rejected if you try to read it
2127back in.  (A later version of B<XML::Simple> might take a more proactive
2128approach).
2129
2130Note also that although you can nest hashes and arrays to arbitrary levels,
2131circular data structures are not supported and will cause C<XMLout()> to die.
2132
2133If you wish to 'round-trip' arbitrary data structures from Perl to XML and back
2134to Perl, then you should probably disable array folding (using the KeyAttr
2135option) both with C<XMLout()> and with C<XMLin()>.  If you still don't get the
2136expected results, you may prefer to use L<XML::Dumper> which is designed for
2137exactly that purpose.
2138
2139Refer to L<"WHERE TO FROM HERE?"> if C<XMLout()> is too simple for your needs.
2140
2141
2142=head1 OPTIONS
2143
2144B<XML::Simple> supports a number of options (in fact as each release of
2145B<XML::Simple> adds more options, the module's claim to the name 'Simple'
2146becomes increasingly tenuous).  If you find yourself repeatedly having to
2147specify the same options, you might like to investigate L<"OPTIONAL OO
2148INTERFACE"> below.
2149
2150If you can't be bothered reading the documentation, refer to
2151L<"STRICT MODE"> to automatically catch common mistakes.
2152
2153Because there are so many options, it's hard for new users to know which ones
2154are important, so here are the two you really need to know about:
2155
2156=over 4
2157
2158=item *
2159
2160check out C<ForceArray> because you'll almost certainly want to turn it on
2161
2162=item *
2163
2164make sure you know what the C<KeyAttr> option does and what its default value is
2165because it may surprise you otherwise (note in particular that 'KeyAttr'
2166affects both C<XMLin> and C<XMLout>)
2167
2168=back
2169
2170The option name headings below have a trailing 'comment' - a hash followed by
2171two pieces of metadata:
2172
2173=over 4
2174
2175=item *
2176
2177Options are marked with 'I<in>' if they are recognised by C<XMLin()> and
2178'I<out>' if they are recognised by C<XMLout()>.
2179
2180=item *
2181
2182Each option is also flagged to indicate whether it is:
2183
2184 'important'   - don't use the module until you understand this one
2185 'handy'       - you can skip this on the first time through
2186 'advanced'    - you can skip this on the second time through
2187 'SAX only'    - don't worry about this unless you're using SAX (or
2188                 alternatively if you need this, you also need SAX)
2189 'seldom used' - you'll probably never use this unless you were the
2190                 person that requested the feature
2191
2192=back
2193
2194The options are listed alphabetically:
2195
2196Note: option names are no longer case sensitive so you can use the mixed case
2197versions shown here; all lower case as required by versions 2.03 and earlier;
2198or you can add underscores between the words (eg: key_attr).
2199
2200
2201=head2 AttrIndent => 1 I<# out - handy>
2202
2203When you are using C<XMLout()>, enable this option to have attributes printed
2204one-per-line with sensible indentation rather than all on one line.
2205
2206=head2 Cache => [ cache schemes ] I<# in - advanced>
2207
2208Because loading the B<XML::Parser> module and parsing an XML file can consume a
2209significant number of CPU cycles, it is often desirable to cache the output of
2210C<XMLin()> for later reuse.
2211
2212When parsing from a named file, B<XML::Simple> supports a number of caching
2213schemes.  The 'Cache' option may be used to specify one or more schemes (using
2214an anonymous array).  Each scheme will be tried in turn in the hope of finding
2215a cached pre-parsed representation of the XML file.  If no cached copy is
2216found, the file will be parsed and the first cache scheme in the list will be
2217used to save a copy of the results.  The following cache schemes have been
2218implemented:
2219
2220=over 4
2221
2222=item storable
2223
2224Utilises B<Storable.pm> to read/write a cache file with the same name as the
2225XML file but with the extension .stor
2226
2227=item memshare
2228
2229When a file is first parsed, a copy of the resulting data structure is retained
2230in memory in the B<XML::Simple> module's namespace.  Subsequent calls to parse
2231the same file will return a reference to this structure.  This cached version
2232will persist only for the life of the Perl interpreter (which in the case of
2233mod_perl for example, may be some significant time).
2234
2235Because each caller receives a reference to the same data structure, a change
2236made by one caller will be visible to all.  For this reason, the reference
2237returned should be treated as read-only.
2238
2239=item memcopy
2240
2241This scheme works identically to 'memshare' (above) except that each caller
2242receives a reference to a new data structure which is a copy of the cached
2243version.  Copying the data structure will add a little processing overhead,
2244therefore this scheme should only be used where the caller intends to modify
2245the data structure (or wishes to protect itself from others who might).  This
2246scheme uses B<Storable.pm> to perform the copy.
2247
2248=back
2249
2250Warning! The memory-based caching schemes compare the timestamp on the file to
2251the time when it was last parsed.  If the file is stored on an NFS filesystem
2252(or other network share) and the clock on the file server is not exactly
2253synchronised with the clock where your script is run, updates to the source XML
2254file may appear to be ignored.
2255
2256=head2 ContentKey => 'keyname' I<# in+out - seldom used>
2257
2258When text content is parsed to a hash value, this option lets you specify a
2259name for the hash key to override the default 'content'.  So for example:
2260
2261  XMLin('<opt one="1">Text</opt>', ContentKey => 'text')
2262
2263will parse to:
2264
2265  { 'one' => 1, 'text' => 'Text' }
2266
2267instead of:
2268
2269  { 'one' => 1, 'content' => 'Text' }
2270
2271C<XMLout()> will also honour the value of this option when converting a hashref
2272to XML.
2273
2274You can also prefix your selected key name with a '-' character to have
2275C<XMLin()> try a little harder to eliminate unnecessary 'content' keys after
2276array folding.  For example:
2277
2278  XMLin(
2279    '<opt><item name="one">First</item><item name="two">Second</item></opt>',
2280    KeyAttr => {item => 'name'},
2281    ForceArray => [ 'item' ],
2282    ContentKey => '-content'
2283  )
2284
2285will parse to:
2286
2287  {
2288    'item' => {
2289      'one' =>  'First'
2290      'two' =>  'Second'
2291    }
2292  }
2293
2294rather than this (without the '-'):
2295
2296  {
2297    'item' => {
2298      'one' => { 'content' => 'First' }
2299      'two' => { 'content' => 'Second' }
2300    }
2301  }
2302
2303=head2 DataHandler => code_ref I<# in - SAX only>
2304
2305When you use an B<XML::Simple> object as a SAX handler, it will return a
2306'simple tree' data structure in the same format as C<XMLin()> would return.  If
2307this option is set (to a subroutine reference), then when the tree is built the
2308subroutine will be called and passed two arguments: a reference to the
2309B<XML::Simple> object and a reference to the data tree.  The return value from
2310the subroutine will be returned to the SAX driver.  (See L<"SAX SUPPORT"> for
2311more details).
2312
2313=head2 ForceArray => 1 I<# in - important>
2314
2315This option should be set to '1' to force nested elements to be represented
2316as arrays even when there is only one.  Eg, with ForceArray enabled, this
2317XML:
2318
2319    <opt>
2320      <name>value</name>
2321    </opt>
2322
2323would parse to this:
2324
2325    {
2326      'name' => [
2327                  'value'
2328                ]
2329    }
2330
2331instead of this (the default):
2332
2333    {
2334      'name' => 'value'
2335    }
2336
2337This option is especially useful if the data structure is likely to be written
2338back out as XML and the default behaviour of rolling single nested elements up
2339into attributes is not desirable.
2340
2341If you are using the array folding feature, you should almost certainly enable
2342this option.  If you do not, single nested elements will not be parsed to
2343arrays and therefore will not be candidates for folding to a hash.  (Given that
2344the default value of 'KeyAttr' enables array folding, the default value of this
2345option should probably also have been enabled too - sorry).
2346
2347=head2 ForceArray => [ names ] I<# in - important>
2348
2349This alternative (and preferred) form of the 'ForceArray' option allows you to
2350specify a list of element names which should always be forced into an array
2351representation, rather than the 'all or nothing' approach above.
2352
2353It is also possible (since version 2.05) to include compiled regular
2354expressions in the list - any element names which match the pattern will be
2355forced to arrays.  If the list contains only a single regex, then it is not
2356necessary to enclose it in an arrayref.  Eg:
2357
2358  ForceArray => qr/_list$/
2359
2360=head2 ForceContent => 1 I<# in - seldom used>
2361
2362When C<XMLin()> parses elements which have text content as well as attributes,
2363the text content must be represented as a hash value rather than a simple
2364scalar.  This option allows you to force text content to always parse to
2365a hash value even when there are no attributes.  So for example:
2366
2367  XMLin('<opt><x>text1</x><y a="2">text2</y></opt>', ForceContent => 1)
2368
2369will parse to:
2370
2371  {
2372    'x' => {           'content' => 'text1' },
2373    'y' => { 'a' => 2, 'content' => 'text2' }
2374  }
2375
2376instead of:
2377
2378  {
2379    'x' => 'text1',
2380    'y' => { 'a' => 2, 'content' => 'text2' }
2381  }
2382
2383=head2 GroupTags => { grouping tag => grouped tag } I<# in+out - handy>
2384
2385You can use this option to eliminate extra levels of indirection in your Perl
2386data structure.  For example this XML:
2387
2388  <opt>
2389   <searchpath>
2390     <dir>/usr/bin</dir>
2391     <dir>/usr/local/bin</dir>
2392     <dir>/usr/X11/bin</dir>
2393   </searchpath>
2394 </opt>
2395
2396Would normally be read into a structure like this:
2397
2398  {
2399    searchpath => {
2400                    dir => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2401                  }
2402  }
2403
2404But when read in with the appropriate value for 'GroupTags':
2405
2406  my $opt = XMLin($xml, GroupTags => { searchpath => 'dir' });
2407
2408It will return this simpler structure:
2409
2410  {
2411    searchpath => [ '/usr/bin', '/usr/local/bin', '/usr/X11/bin' ]
2412  }
2413
2414The grouping element (C<< <searchpath> >> in the example) must not contain any
2415attributes or elements other than the grouped element.
2416
2417You can specify multiple 'grouping element' to 'grouped element' mappings in
2418the same hashref.  If this option is combined with C<KeyAttr>, the array
2419folding will occur first and then the grouped element names will be eliminated.
2420
2421C<XMLout> will also use the grouptag mappings to re-introduce the tags around
2422the grouped elements.  Beware though that this will occur in all places that
2423the 'grouping tag' name occurs - you probably don't want to use the same name
2424for elements as well as attributes.
2425
2426=head2 Handler => object_ref I<# out - SAX only>
2427
2428Use the 'Handler' option to have C<XMLout()> generate SAX events rather than
2429returning a string of XML.  For more details see L<"SAX SUPPORT"> below.
2430
2431Note: the current implementation of this option generates a string of XML
2432and uses a SAX parser to translate it into SAX events.  The normal encoding
2433rules apply here - your data must be UTF8 encoded unless you specify an
2434alternative encoding via the 'XMLDecl' option; and by the time the data reaches
2435the handler object, it will be in UTF8 form regardless of the encoding you
2436supply.  A future implementation of this option may generate the events
2437directly.
2438
2439=head2 KeepRoot => 1 I<# in+out - handy>
2440
2441In its attempt to return a data structure free of superfluous detail and
2442unnecessary levels of indirection, C<XMLin()> normally discards the root
2443element name.  Setting the 'KeepRoot' option to '1' will cause the root element
2444name to be retained.  So after executing this code:
2445
2446  $config = XMLin('<config tempdir="/tmp" />', KeepRoot => 1)
2447
2448You'll be able to reference the tempdir as
2449C<$config-E<gt>{config}-E<gt>{tempdir}> instead of the default
2450C<$config-E<gt>{tempdir}>.
2451
2452Similarly, setting the 'KeepRoot' option to '1' will tell C<XMLout()> that the
2453data structure already contains a root element name and it is not necessary to
2454add another.
2455
2456=head2 KeyAttr => [ list ] I<# in+out - important>
2457
2458This option controls the 'array folding' feature which translates nested
2459elements from an array to a hash.  It also controls the 'unfolding' of hashes
2460to arrays.
2461
2462For example, this XML:
2463
2464    <opt>
2465      <user login="grep" fullname="Gary R Epstein" />
2466      <user login="stty" fullname="Simon T Tyson" />
2467    </opt>
2468
2469would, by default, parse to this:
2470
2471    {
2472      'user' => [
2473                  {
2474                    'login' => 'grep',
2475                    'fullname' => 'Gary R Epstein'
2476                  },
2477                  {
2478                    'login' => 'stty',
2479                    'fullname' => 'Simon T Tyson'
2480                  }
2481                ]
2482    }
2483
2484If the option 'KeyAttr => "login"' were used to specify that the 'login'
2485attribute is a key, the same XML would parse to:
2486
2487    {
2488      'user' => {
2489                  'stty' => {
2490                              'fullname' => 'Simon T Tyson'
2491                            },
2492                  'grep' => {
2493                              'fullname' => 'Gary R Epstein'
2494                            }
2495                }
2496    }
2497
2498The key attribute names should be supplied in an arrayref if there is more
2499than one.  C<XMLin()> will attempt to match attribute names in the order
2500supplied.  C<XMLout()> will use the first attribute name supplied when
2501'unfolding' a hash into an array.
2502
2503Note 1: The default value for 'KeyAttr' is ['name', 'key', 'id'].  If you do
2504not want folding on input or unfolding on output you must set this option
2505to an empty list to disable the feature.
2506
2507Note 2: If you wish to use this option, you should also enable the
2508C<ForceArray> option.  Without 'ForceArray', a single nested element will be
2509rolled up into a scalar rather than an array and therefore will not be folded
2510(since only arrays get folded).
2511
2512=head2 KeyAttr => { list } I<# in+out - important>
2513
2514This alternative (and preferred) method of specifying the key attributes
2515allows more fine grained control over which elements are folded and on which
2516attributes.  For example the option 'KeyAttr => { package => 'id' } will cause
2517any package elements to be folded on the 'id' attribute.  No other elements
2518which have an 'id' attribute will be folded at all.
2519
2520Note: C<XMLin()> will generate a warning (or a fatal error in L<"STRICT MODE">)
2521if this syntax is used and an element which does not have the specified key
2522attribute is encountered (eg: a 'package' element without an 'id' attribute, to
2523use the example above).  Warnings can be suppressed with the lexical
2524C<no warnings;> pragma or C<no warnings 'XML::Simple';>.
2525
2526Two further variations are made possible by prefixing a '+' or a '-' character
2527to the attribute name:
2528
2529The option 'KeyAttr => { user => "+login" }' will cause this XML:
2530
2531    <opt>
2532      <user login="grep" fullname="Gary R Epstein" />
2533      <user login="stty" fullname="Simon T Tyson" />
2534    </opt>
2535
2536to parse to this data structure:
2537
2538    {
2539      'user' => {
2540                  'stty' => {
2541                              'fullname' => 'Simon T Tyson',
2542                              'login'    => 'stty'
2543                            },
2544                  'grep' => {
2545                              'fullname' => 'Gary R Epstein',
2546                              'login'    => 'grep'
2547                            }
2548                }
2549    }
2550
2551The '+' indicates that the value of the key attribute should be copied rather
2552than moved to the folded hash key.
2553
2554A '-' prefix would produce this result:
2555
2556    {
2557      'user' => {
2558                  'stty' => {
2559                              'fullname' => 'Simon T Tyson',
2560                              '-login'    => 'stty'
2561                            },
2562                  'grep' => {
2563                              'fullname' => 'Gary R Epstein',
2564                              '-login'    => 'grep'
2565                            }
2566                }
2567    }
2568
2569As described earlier, C<XMLout> will ignore hash keys starting with a '-'.
2570
2571=head2 NoAttr => 1 I<# in+out - handy>
2572
2573When used with C<XMLout()>, the generated XML will contain no attributes.
2574All hash key/values will be represented as nested elements instead.
2575
2576When used with C<XMLin()>, any attributes in the XML will be ignored.
2577
2578=head2 NoEscape => 1 I<# out - seldom used>
2579
2580By default, C<XMLout()> will translate the characters 'E<lt>', 'E<gt>', '&' and
2581'"' to '&lt;', '&gt;', '&amp;' and '&quot' respectively.  Use this option to
2582suppress escaping (presumably because you've already escaped the data in some
2583more sophisticated manner).
2584
2585=head2 NoIndent => 1 I<# out - seldom used>
2586
2587Set this option to 1 to disable C<XMLout()>'s default 'pretty printing' mode.
2588With this option enabled, the XML output will all be on one line (unless there
2589are newlines in the data) - this may be easier for downstream processing.
2590
2591=head2 NoSort => 1 I<# out - seldom used>
2592
2593Newer versions of XML::Simple sort elements and attributes alphabetically (*),
2594by default.  Enable this option to suppress the sorting - possibly for
2595backwards compatibility.
2596
2597* Actually, sorting is alphabetical but 'key' attribute or element names (as in
2598'KeyAttr') sort first.  Also, when a hash of hashes is 'unfolded', the elements
2599are sorted alphabetically by the value of the key field.
2600
2601=head2 NormaliseSpace => 0 | 1 | 2 I<# in - handy>
2602
2603This option controls how whitespace in text content is handled.  Recognised
2604values for the option are:
2605
2606=over 4
2607
2608=item *
2609
26100 = (default) whitespace is passed through unaltered (except of course for the
2611normalisation of whitespace in attribute values which is mandated by the XML
2612recommendation)
2613
2614=item *
2615
26161 = whitespace is normalised in any value used as a hash key (normalising means
2617removing leading and trailing whitespace and collapsing sequences of whitespace
2618characters to a single space)
2619
2620=item *
2621
26222 = whitespace is normalised in all text content
2623
2624=back
2625
2626Note: you can spell this option with a 'z' if that is more natural for you.
2627
2628=head2 NSExpand => 1 I<# in+out handy - SAX only>
2629
2630This option controls namespace expansion - the translation of element and
2631attribute names of the form 'prefix:name' to '{uri}name'.  For example the
2632element name 'xsl:template' might be expanded to:
2633'{http://www.w3.org/1999/XSL/Transform}template'.
2634
2635By default, C<XMLin()> will return element names and attribute names exactly as
2636they appear in the XML.  Setting this option to 1 will cause all element and
2637attribute names to be expanded to include their namespace prefix.
2638
2639I<Note: You must be using a SAX parser for this option to work (ie: it does not
2640work with XML::Parser)>.
2641
2642This option also controls whether C<XMLout()> performs the reverse translation
2643from '{uri}name' back to 'prefix:name'.  The default is no translation.  If
2644your data contains expanded names, you should set this option to 1 otherwise
2645C<XMLout> will emit XML which is not well formed.
2646
2647I<Note: You must have the XML::NamespaceSupport module installed if you want
2648C<XMLout()> to translate URIs back to prefixes>.
2649
2650=head2 NumericEscape => 0 | 1 | 2 I<# out - handy>
2651
2652Use this option to have 'high' (non-ASCII) characters in your Perl data
2653structure converted to numeric entities (eg: &#8364;) in the XML output.  Three
2654levels are possible:
2655
26560 - default: no numeric escaping (OK if you're writing out UTF8)
2657
26581 - only characters above 0xFF are escaped (ie: characters in the 0x80-FF range are not escaped), possibly useful with ISO8859-1 output
2659
26602 - all characters above 0x7F are escaped (good for plain ASCII output)
2661
2662=head2 OutputFile => <file specifier> I<# out - handy>
2663
2664The default behaviour of C<XMLout()> is to return the XML as a string.  If you
2665wish to write the XML to a file, simply supply the filename using the
2666'OutputFile' option.
2667
2668This option also accepts an IO handle object - especially useful in Perl 5.8.0
2669and later for output using an encoding other than UTF-8, eg:
2670
2671  open my $fh, '>:encoding(iso-8859-1)', $path or die "open($path): $!";
2672  XMLout($ref, OutputFile => $fh);
2673
2674Note, XML::Simple does not require that the object you pass in to the
2675OutputFile option inherits from L<IO::Handle> - it simply assumes the object
2676supports a C<print> method.
2677
2678=head2 ParserOpts => [ XML::Parser Options ] I<# in - don't use this>
2679
2680I<Note: This option is now officially deprecated.  If you find it useful, email
2681the author with an example of what you use it for.  Do not use this option to
2682set the ProtocolEncoding, that's just plain wrong - fix the XML>.
2683
2684This option allows you to pass parameters to the constructor of the underlying
2685XML::Parser object (which of course assumes you're not using SAX).
2686
2687=head2 RootName => 'string' I<# out - handy>
2688
2689By default, when C<XMLout()> generates XML, the root element will be named
2690'opt'.  This option allows you to specify an alternative name.
2691
2692Specifying either undef or the empty string for the RootName option will
2693produce XML with no root elements.  In most cases the resulting XML fragment
2694will not be 'well formed' and therefore could not be read back in by C<XMLin()>.
2695Nevertheless, the option has been found to be useful in certain circumstances.
2696
2697=head2 SearchPath => [ list ] I<# in - handy>
2698
2699If you pass C<XMLin()> a filename, but the filename include no directory
2700component, you can use this option to specify which directories should be
2701searched to locate the file.  You might use this option to search first in the
2702user's home directory, then in a global directory such as /etc.
2703
2704If a filename is provided to C<XMLin()> but SearchPath is not defined, the
2705file is assumed to be in the current directory.
2706
2707If the first parameter to C<XMLin()> is undefined, the default SearchPath
2708will contain only the directory in which the script itself is located.
2709Otherwise the default SearchPath will be empty.
2710
2711=head2 StrictMode => 1 | 0  I<# in+out seldom used>
2712
2713This option allows you to turn L<STRICT MODE> on or off for a particular call,
2714regardless of whether it was enabled at the time XML::Simple was loaded.
2715
2716=head2 SuppressEmpty => 1 | '' | undef I<# in+out - handy>
2717
2718This option controls what C<XMLin()> should do with empty elements (no
2719attributes and no content).  The default behaviour is to represent them as
2720empty hashes.  Setting this option to a true value (eg: 1) will cause empty
2721elements to be skipped altogether.  Setting the option to 'undef' or the empty
2722string will cause empty elements to be represented as the undefined value or
2723the empty string respectively.  The latter two alternatives are a little
2724easier to test for in your code than a hash with no keys.
2725
2726The option also controls what C<XMLout()> does with undefined values.  Setting
2727the option to undef causes undefined values to be output as empty elements
2728(rather than empty attributes), it also suppresses the generation of warnings
2729about undefined values.  Setting the option to a true value (eg: 1) causes
2730undefined values to be skipped altogether on output.
2731
2732=head2 ValueAttr => [ names ] I<# in - handy>
2733
2734Use this option to deal elements which always have a single attribute and no
2735content.  Eg:
2736
2737  <opt>
2738    <colour value="red" />
2739    <size   value="XXL" />
2740  </opt>
2741
2742Setting C<< ValueAttr => [ 'value' ] >> will cause the above XML to parse to:
2743
2744  {
2745    colour => 'red',
2746    size   => 'XXL'
2747  }
2748
2749instead of this (the default):
2750
2751  {
2752    colour => { value => 'red' },
2753    size   => { value => 'XXL' }
2754  }
2755
2756Note: This form of the ValueAttr option is not compatible with C<XMLout()> -
2757since the attribute name is discarded at parse time, the original XML cannot be
2758reconstructed.
2759
2760=head2 ValueAttr => { element => attribute, ... } I<# in+out - handy>
2761
2762This (preferred) form of the ValueAttr option requires you to specify both
2763the element and the attribute names.  This is not only safer, it also allows
2764the original XML to be reconstructed by C<XMLout()>.
2765
2766Note: You probably don't want to use this option and the NoAttr option at the
2767same time.
2768
2769=head2 Variables => { name => value } I<# in - handy>
2770
2771This option allows variables in the XML to be expanded when the file is read.
2772(there is no facility for putting the variable names back if you regenerate
2773XML using C<XMLout>).
2774
2775A 'variable' is any text of the form C<${name}> which occurs in an attribute
2776value or in the text content of an element.  If 'name' matches a key in the
2777supplied hashref, C<${name}> will be replaced with the corresponding value from
2778the hashref.  If no matching key is found, the variable will not be replaced.
2779Names must match the regex: C<[\w.]+> (ie: only 'word' characters and dots are
2780allowed).
2781
2782=head2 VarAttr => 'attr_name' I<# in - handy>
2783
2784In addition to the variables defined using C<Variables>, this option allows
2785variables to be defined in the XML.  A variable definition consists of an
2786element with an attribute called 'attr_name' (the value of the C<VarAttr>
2787option).  The value of the attribute will be used as the variable name and the
2788text content of the element will be used as the value.  A variable defined in
2789this way will override a variable defined using the C<Variables> option.  For
2790example:
2791
2792  XMLin( '<opt>
2793            <dir name="prefix">/usr/local/apache</dir>
2794            <dir name="exec_prefix">${prefix}</dir>
2795            <dir name="bindir">${exec_prefix}/bin</dir>
2796          </opt>',
2797         VarAttr => 'name', ContentKey => '-content'
2798        );
2799
2800produces the following data structure:
2801
2802  {
2803    dir => {
2804             prefix      => '/usr/local/apache',
2805             exec_prefix => '/usr/local/apache',
2806             bindir      => '/usr/local/apache/bin',
2807           }
2808  }
2809
2810=head2 XMLDecl => 1  or  XMLDecl => 'string'  I<# out - handy>
2811
2812If you want the output from C<XMLout()> to start with the optional XML
2813declaration, simply set the option to '1'.  The default XML declaration is:
2814
2815        <?xml version='1.0' standalone='yes'?>
2816
2817If you want some other string (for example to declare an encoding value), set
2818the value of this option to the complete string you require.
2819
2820
2821=head1 OPTIONAL OO INTERFACE
2822
2823The procedural interface is both simple and convenient however there are a
2824couple of reasons why you might prefer to use the object oriented (OO)
2825interface:
2826
2827=over 4
2828
2829=item *
2830
2831to define a set of default values which should be used on all subsequent calls
2832to C<XMLin()> or C<XMLout()>
2833
2834=item *
2835
2836to override methods in B<XML::Simple> to provide customised behaviour
2837
2838=back
2839
2840The default values for the options described above are unlikely to suit
2841everyone.  The OO interface allows you to effectively override B<XML::Simple>'s
2842defaults with your preferred values.  It works like this:
2843
2844First create an XML::Simple parser object with your preferred defaults:
2845
2846  my $xs = XML::Simple->new(ForceArray => 1, KeepRoot => 1);
2847
2848then call C<XMLin()> or C<XMLout()> as a method of that object:
2849
2850  my $ref = $xs->XMLin($xml);
2851  my $xml = $xs->XMLout($ref);
2852
2853You can also specify options when you make the method calls and these values
2854will be merged with the values specified when the object was created.  Values
2855specified in a method call take precedence.
2856
2857Note: when called as methods, the C<XMLin()> and C<XMLout()> routines may be
2858called as C<xml_in()> or C<xml_out()>.  The method names are aliased so the
2859only difference is the aesthetics.
2860
2861=head2 Parsing Methods
2862
2863You can explicitly call one of the following methods rather than rely on the
2864C<xml_in()> method automatically determining whether the target to be parsed is
2865a string, a file or a filehandle:
2866
2867=over 4
2868
2869=item parse_string(text)
2870
2871Works exactly like the C<xml_in()> method but assumes the first argument is
2872a string of XML (or a reference to a scalar containing a string of XML).
2873
2874=item parse_file(filename)
2875
2876Works exactly like the C<xml_in()> method but assumes the first argument is
2877the name of a file containing XML.
2878
2879=item parse_fh(file_handle)
2880
2881Works exactly like the C<xml_in()> method but assumes the first argument is
2882a filehandle which can be read to get XML.
2883
2884=back
2885
2886=head2 Hook Methods
2887
2888You can make your own class which inherits from XML::Simple and overrides
2889certain behaviours.  The following methods may provide useful 'hooks' upon
2890which to hang your modified behaviour.  You may find other undocumented methods
2891by examining the source, but those may be subject to change in future releases.
2892
2893=over 4
2894
2895=item new_xml_parser()
2896
2897This method will be called when a new XML::Parser object must be constructed
2898(either because XML::SAX is not installed or XML::Parser is preferred).
2899
2900=item handle_options(direction, name => value ...)
2901
2902This method will be called when one of the parsing methods or the C<XMLout()>
2903method is called.  The initial argument will be a string (either 'in' or 'out')
2904and the remaining arguments will be name value pairs.
2905
2906=item default_config_file()
2907
2908Calculates and returns the name of the file which should be parsed if no
2909filename is passed to C<XMLin()> (default: C<$0.xml>).
2910
2911=item build_simple_tree(filename, string)
2912
2913Called from C<XMLin()> or any of the parsing methods.  Takes either a file name
2914as the first argument or C<undef> followed by a 'string' as the second
2915argument.  Returns a simple tree data structure.  You could override this
2916method to apply your own transformations before the data structure is returned
2917to the caller.
2918
2919=item new_hashref()
2920
2921When the 'simple tree' data structure is being built, this method will be
2922called to create any required anonymous hashrefs.
2923
2924=item sorted_keys(name, hashref)
2925
2926Called when C<XMLout()> is translating a hashref to XML.  This routine returns
2927a list of hash keys in the order that the corresponding attributes/elements
2928should appear in the output.
2929
2930=item escape_value(string)
2931
2932Called from C<XMLout()>, takes a string and returns a copy of the string with
2933XML character escaping rules applied.
2934
2935=item escape_attr(string)
2936
2937Called from C<XMLout()>, to handle attribute values.  By default, just calls
2938C<escape_value()>, but you can override this method if you want attributes
2939escaped differently than text content.
2940
2941=item numeric_escape(string)
2942
2943Called from C<escape_value()>, to handle non-ASCII characters (depending on the
2944value of the NumericEscape option).
2945
2946=item copy_hash(hashref, extra_key => value, ...)
2947
2948Called from C<XMLout()>, when 'unfolding' a hash of hashes into an array of
2949hashes.  You might wish to override this method if you're using tied hashes and
2950don't want them to get untied.
2951
2952=back
2953
2954=head2 Cache Methods
2955
2956XML::Simple implements three caching schemes ('storable', 'memshare' and
2957'memcopy').  You can implement a custom caching scheme by implementing
2958two methods - one for reading from the cache and one for writing to it.
2959
2960For example, you might implement a new 'dbm' scheme that stores cached data
2961structures using the L<MLDBM> module.  First, you would add a
2962C<cache_read_dbm()> method which accepted a filename for use as a lookup key
2963and returned a data structure on success, or undef on failure.  Then, you would
2964implement a C<cache_read_dbm()> method which accepted a data structure and a
2965filename.
2966
2967You would use this caching scheme by specifying the option:
2968
2969  Cache => [ 'dbm' ]
2970
2971=head1 STRICT MODE
2972
2973If you import the B<XML::Simple> routines like this:
2974
2975  use XML::Simple qw(:strict);
2976
2977the following common mistakes will be detected and treated as fatal errors
2978
2979=over 4
2980
2981=item *
2982
2983Failing to explicitly set the C<KeyAttr> option - if you can't be bothered
2984reading about this option, turn it off with: KeyAttr => [ ]
2985
2986=item *
2987
2988Failing to explicitly set the C<ForceArray> option - if you can't be bothered
2989reading about this option, set it to the safest mode with: ForceArray => 1
2990
2991=item *
2992
2993Setting ForceArray to an array, but failing to list all the elements from the
2994KeyAttr hash.
2995
2996=item *
2997
2998Data error - KeyAttr is set to say { part => 'partnum' } but the XML contains
2999one or more E<lt>partE<gt> elements without a 'partnum' attribute (or nested
3000element).  Note: if strict mode is not set but C<use warnings;> is in force,
3001this condition triggers a warning.
3002
3003=item *
3004
3005Data error - as above, but non-unique values are present in the key attribute
3006(eg: more than one E<lt>partE<gt> element with the same partnum).  This will
3007also trigger a warning if strict mode is not enabled.
3008
3009=item *
3010
3011Data error - as above, but value of key attribute (eg: partnum) is not a
3012scalar string (due to nested elements etc).  This will also trigger a warning
3013if strict mode is not enabled.
3014
3015=back
3016
3017=head1 SAX SUPPORT
3018
3019From version 1.08_01, B<XML::Simple> includes support for SAX (the Simple API
3020for XML) - specifically SAX2.
3021
3022In a typical SAX application, an XML parser (or SAX 'driver') module generates
3023SAX events (start of element, character data, end of element, etc) as it parses
3024an XML document and a 'handler' module processes the events to extract the
3025required data.  This simple model allows for some interesting and powerful
3026possibilities:
3027
3028=over 4
3029
3030=item *
3031
3032Applications written to the SAX API can extract data from huge XML documents
3033without the memory overheads of a DOM or tree API.
3034
3035=item *
3036
3037The SAX API allows for plug and play interchange of parser modules without
3038having to change your code to fit a new module's API.  A number of SAX parsers
3039are available with capabilities ranging from extreme portability to blazing
3040performance.
3041
3042=item *
3043
3044A SAX 'filter' module can implement both a handler interface for receiving
3045data and a generator interface for passing modified data on to a downstream
3046handler.  Filters can be chained together in 'pipelines'.
3047
3048=item *
3049
3050One filter module might split a data stream to direct data to two or more
3051downstream handlers.
3052
3053=item *
3054
3055Generating SAX events is not the exclusive preserve of XML parsing modules.
3056For example, a module might extract data from a relational database using DBI
3057and pass it on to a SAX pipeline for filtering and formatting.
3058
3059=back
3060
3061B<XML::Simple> can operate at either end of a SAX pipeline.  For example,
3062you can take a data structure in the form of a hashref and pass it into a
3063SAX pipeline using the 'Handler' option on C<XMLout()>:
3064
3065  use XML::Simple;
3066  use Some::SAX::Filter;
3067  use XML::SAX::Writer;
3068
3069  my $ref = {
3070               ....   # your data here
3071            };
3072
3073  my $writer = XML::SAX::Writer->new();
3074  my $filter = Some::SAX::Filter->new(Handler => $writer);
3075  my $simple = XML::Simple->new(Handler => $filter);
3076  $simple->XMLout($ref);
3077
3078You can also put B<XML::Simple> at the opposite end of the pipeline to take
3079advantage of the simple 'tree' data structure once the relevant data has been
3080isolated through filtering:
3081
3082  use XML::SAX;
3083  use Some::SAX::Filter;
3084  use XML::Simple;
3085
3086  my $simple = XML::Simple->new(ForceArray => 1, KeyAttr => ['partnum']);
3087  my $filter = Some::SAX::Filter->new(Handler => $simple);
3088  my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
3089
3090  my $ref = $parser->parse_uri('some_huge_file.xml');
3091
3092  print $ref->{part}->{'555-1234'};
3093
3094You can build a filter by using an XML::Simple object as a handler and setting
3095its DataHandler option to point to a routine which takes the resulting tree,
3096modifies it and sends it off as SAX events to a downstream handler:
3097
3098  my $writer = XML::SAX::Writer->new();
3099  my $filter = XML::Simple->new(
3100                 DataHandler => sub {
3101                                  my $simple = shift;
3102                                  my $data = shift;
3103
3104                                  # Modify $data here
3105
3106                                  $simple->XMLout($data, Handler => $writer);
3107                                }
3108               );
3109  my $parser = XML::SAX::ParserFactory->parser(Handler => $filter);
3110
3111  $parser->parse_uri($filename);
3112
3113I<Note: In this last example, the 'Handler' option was specified in the call to
3114C<XMLout()> but it could also have been specified in the constructor>.
3115
3116=head1 ENVIRONMENT
3117
3118If you don't care which parser module B<XML::Simple> uses then skip this
3119section entirely (it looks more complicated than it really is).
3120
3121B<XML::Simple> will default to using a B<SAX> parser if one is available or
3122B<XML::Parser> if SAX is not available.
3123
3124You can dictate which parser module is used by setting either the environment
3125variable 'XML_SIMPLE_PREFERRED_PARSER' or the package variable
3126$XML::Simple::PREFERRED_PARSER to contain the module name.  The following rules
3127are used:
3128
3129=over 4
3130
3131=item *
3132
3133The package variable takes precedence over the environment variable if both are defined.  To force B<XML::Simple> to ignore the environment settings and use
3134its default rules, you can set the package variable to an empty string.
3135
3136=item *
3137
3138If the 'preferred parser' is set to the string 'XML::Parser', then
3139L<XML::Parser> will be used (or C<XMLin()> will die if L<XML::Parser> is not
3140installed).
3141
3142=item *
3143
3144If the 'preferred parser' is set to some other value, then it is assumed to be
3145the name of a SAX parser module and is passed to L<XML::SAX::ParserFactory>.
3146If L<XML::SAX> is not installed, or the requested parser module is not
3147installed, then C<XMLin()> will die.
3148
3149=item *
3150
3151If the 'preferred parser' is not defined at all (the normal default
3152state), an attempt will be made to load L<XML::SAX>.  If L<XML::SAX> is
3153installed, then a parser module will be selected according to
3154L<XML::SAX::ParserFactory>'s normal rules (which typically means the last SAX
3155parser installed).
3156
3157=item *
3158
3159if the 'preferred parser' is not defined and B<XML::SAX> is not
3160installed, then B<XML::Parser> will be used.  C<XMLin()> will die if
3161L<XML::Parser> is not installed.
3162
3163=back
3164
3165Note: The B<XML::SAX> distribution includes an XML parser written entirely in
3166Perl.  It is very portable but it is not very fast.  You should consider
3167installing L<XML::LibXML> or L<XML::SAX::Expat> if they are available for your
3168platform.
3169
3170=head1 ERROR HANDLING
3171
3172The XML standard is very clear on the issue of non-compliant documents.  An
3173error in parsing any single element (for example a missing end tag) must cause
3174the whole document to be rejected.  B<XML::Simple> will die with an appropriate
3175message if it encounters a parsing error.
3176
3177If dying is not appropriate for your application, you should arrange to call
3178C<XMLin()> in an eval block and look for errors in $@.  eg:
3179
3180    my $config = eval { XMLin() };
3181    PopUpMessage($@) if($@);
3182
3183Note, there is a common misconception that use of B<eval> will significantly
3184slow down a script.  While that may be true when the code being eval'd is in a
3185string, it is not true of code like the sample above.
3186
3187=head1 EXAMPLES
3188
3189When C<XMLin()> reads the following very simple piece of XML:
3190
3191    <opt username="testuser" password="frodo"></opt>
3192
3193it returns the following data structure:
3194
3195    {
3196      'username' => 'testuser',
3197      'password' => 'frodo'
3198    }
3199
3200The identical result could have been produced with this alternative XML:
3201
3202    <opt username="testuser" password="frodo" />
3203
3204Or this (although see 'ForceArray' option for variations):
3205
3206    <opt>
3207      <username>testuser</username>
3208      <password>frodo</password>
3209    </opt>
3210
3211Repeated nested elements are represented as anonymous arrays:
3212
3213    <opt>
3214      <person firstname="Joe" lastname="Smith">
3215        <email>joe@smith.com</email>
3216        <email>jsmith@yahoo.com</email>
3217      </person>
3218      <person firstname="Bob" lastname="Smith">
3219        <email>bob@smith.com</email>
3220      </person>
3221    </opt>
3222
3223    {
3224      'person' => [
3225                    {
3226                      'email' => [
3227                                   'joe@smith.com',
3228                                   'jsmith@yahoo.com'
3229                                 ],
3230                      'firstname' => 'Joe',
3231                      'lastname' => 'Smith'
3232                    },
3233                    {
3234                      'email' => 'bob@smith.com',
3235                      'firstname' => 'Bob',
3236                      'lastname' => 'Smith'
3237                    }
3238                  ]
3239    }
3240
3241Nested elements with a recognised key attribute are transformed (folded) from
3242an array into a hash keyed on the value of that attribute (see the C<KeyAttr>
3243option):
3244
3245    <opt>
3246      <person key="jsmith" firstname="Joe" lastname="Smith" />
3247      <person key="tsmith" firstname="Tom" lastname="Smith" />
3248      <person key="jbloggs" firstname="Joe" lastname="Bloggs" />
3249    </opt>
3250
3251    {
3252      'person' => {
3253                    'jbloggs' => {
3254                                   'firstname' => 'Joe',
3255                                   'lastname' => 'Bloggs'
3256                                 },
3257                    'tsmith' => {
3258                                  'firstname' => 'Tom',
3259                                  'lastname' => 'Smith'
3260                                },
3261                    'jsmith' => {
3262                                  'firstname' => 'Joe',
3263                                  'lastname' => 'Smith'
3264                                }
3265                  }
3266    }
3267
3268
3269The <anon> tag can be used to form anonymous arrays:
3270
3271    <opt>
3272      <head><anon>Col 1</anon><anon>Col 2</anon><anon>Col 3</anon></head>
3273      <data><anon>R1C1</anon><anon>R1C2</anon><anon>R1C3</anon></data>
3274      <data><anon>R2C1</anon><anon>R2C2</anon><anon>R2C3</anon></data>
3275      <data><anon>R3C1</anon><anon>R3C2</anon><anon>R3C3</anon></data>
3276    </opt>
3277
3278    {
3279      'head' => [
3280                  [ 'Col 1', 'Col 2', 'Col 3' ]
3281                ],
3282      'data' => [
3283                  [ 'R1C1', 'R1C2', 'R1C3' ],
3284                  [ 'R2C1', 'R2C2', 'R2C3' ],
3285                  [ 'R3C1', 'R3C2', 'R3C3' ]
3286                ]
3287    }
3288
3289Anonymous arrays can be nested to arbitrary levels and as a special case, if
3290the surrounding tags for an XML document contain only an anonymous array the
3291arrayref will be returned directly rather than the usual hashref:
3292
3293    <opt>
3294      <anon><anon>Col 1</anon><anon>Col 2</anon></anon>
3295      <anon><anon>R1C1</anon><anon>R1C2</anon></anon>
3296      <anon><anon>R2C1</anon><anon>R2C2</anon></anon>
3297    </opt>
3298
3299    [
3300      [ 'Col 1', 'Col 2' ],
3301      [ 'R1C1', 'R1C2' ],
3302      [ 'R2C1', 'R2C2' ]
3303    ]
3304
3305Elements which only contain text content will simply be represented as a
3306scalar.  Where an element has both attributes and text content, the element
3307will be represented as a hashref with the text content in the 'content' key
3308(see the C<ContentKey> option):
3309
3310  <opt>
3311    <one>first</one>
3312    <two attr="value">second</two>
3313  </opt>
3314
3315  {
3316    'one' => 'first',
3317    'two' => { 'attr' => 'value', 'content' => 'second' }
3318  }
3319
3320Mixed content (elements which contain both text content and nested elements)
3321will be not be represented in a useful way - element order and significant
3322whitespace will be lost.  If you need to work with mixed content, then
3323XML::Simple is not the right tool for your job - check out the next section.
3324
3325=head1 WHERE TO FROM HERE?
3326
3327B<XML::Simple> is able to present a simple API because it makes some
3328assumptions on your behalf.  These include:
3329
3330=over 4
3331
3332=item *
3333
3334You're not interested in text content consisting only of whitespace
3335
3336=item *
3337
3338You don't mind that when things get slurped into a hash the order is lost
3339
3340=item *
3341
3342You don't want fine-grained control of the formatting of generated XML
3343
3344=item *
3345
3346You would never use a hash key that was not a legal XML element name
3347
3348=item *
3349
3350You don't need help converting between different encodings
3351
3352=back
3353
3354In a serious XML project, you'll probably outgrow these assumptions fairly
3355quickly.  This section of the document used to offer some advice on choosing a
3356more powerful option.  That advice has now grown into the 'Perl-XML FAQ'
3357document which you can find at: L<http://perl-xml.sourceforge.net/faq/>
3358
3359The advice in the FAQ boils down to a quick explanation of tree versus
3360event based parsers and then recommends:
3361
3362For event based parsing, use SAX (do not set out to write any new code for
3363XML::Parser's handler API - it is obsolete).
3364
3365For tree-based parsing, you could choose between the 'Perlish' approach of
3366L<XML::Twig> and more standards based DOM implementations - preferably one with
3367XPath support such as L<XML::LibXML>.
3368
3369
3370=head1 SEE ALSO
3371
3372B<XML::Simple> requires either L<XML::Parser> or L<XML::SAX>.
3373
3374To generate documents with namespaces, L<XML::NamespaceSupport> is required.
3375
3376The optional caching functions require L<Storable>.
3377
3378Answers to Frequently Asked Questions about XML::Simple are bundled with this
3379distribution as: L<XML::Simple::FAQ>
3380
3381=head1 COPYRIGHT
3382
3383Copyright 1999-2004 Grant McLean E<lt>grantm@cpan.orgE<gt>
3384
3385This library is free software; you can redistribute it and/or modify it
3386under the same terms as Perl itself.
3387
3388=cut
3389
3390
3391