1package XML::MyXML;
2
3use 5.008001;
4use strict;
5use warnings;
6
7use XML::MyXML::Object;
8use XML::MyXML::Util 'trim', 'strip_ns';
9
10use Encode;
11use Carp;
12use Scalar::Util 'weaken';
13
14require Exporter;
15our @ISA = qw(Exporter);
16our @EXPORT_OK = qw(tidy_xml object_to_xml xml_to_object simple_to_xml xml_to_simple check_xml xml_escape);
17our %EXPORT_TAGS = (all => [@EXPORT_OK]);
18
19our $VERSION = "1.08";
20
21my $DEFAULT_INDENTSTRING = ' ' x 4;
22
23
24=encoding utf-8
25
26=head1 NAME
27
28XML::MyXML - A simple-to-use XML module, for parsing and creating XML documents
29
30=head1 SYNOPSIS
31
32    use XML::MyXML qw(tidy_xml xml_to_object);
33    use XML::MyXML qw(:all);
34
35    my $xml = "<item><name>Table</name><price><usd>10.00</usd><eur>8.50</eur></price></item>";
36    print tidy_xml($xml);
37
38    my $obj = xml_to_object($xml);
39    print "Price in Euros = " . $obj->path('price/eur')->text;
40
41    $obj->simplify is hashref { item => { name => 'Table', price => { usd => '10.00', eur => '8.50' } } }
42    $obj->simplify({ internal => 1 }) is hashref { name => 'Table', price => { usd => '10.00', eur => '8.50' } }
43
44=head1 EXPORTABLE
45
46xml_escape, tidy_xml, xml_to_object, object_to_xml, simple_to_xml, xml_to_simple, check_xml
47
48=head1 FEATURES & LIMITATIONS
49
50This module can parse XML comments, CDATA sections, XML entities (the standard five and numeric ones) and
51simple non-recursive C<< <!ENTITY> >>s
52
53It will ignore (won't parse) C<< <!DOCTYPE...> >>, C<< <?...?> >> and other C<< <!...> >> special markup
54
55All strings (XML documents, attribute names, values, etc) produced by this module or passed as parameters
56to its functions, are strings that contain characters, rather than bytes/octets. Unless you use the C<bytes>
57function flag (see below), in which case the XML documents (and just the XML documents) will be byte/octet
58strings.
59
60XML documents to be parsed may not contain the C<< > >> character unencoded in attribute values
61
62=head1 OPTIONAL FUNCTION FLAGS
63
64Some functions and methods in this module accept optional flags, listed under each function in the
65documentation. They are optional, default to zero unless stated otherwise, and can be used as follows:
66S<C<< function_name( $param1, { flag1 => 1, flag2 => 1 } ) >>>. This is what each flag does:
67
68C<strip> : the function will strip initial and ending whitespace from all text values returned
69
70C<file> : the function will expect the path to a file containing an XML document to parse, instead of an
71XML string
72
73C<complete> : the function's XML output will include an XML declaration (C<< <?xml ... ?>  >>) in the
74beginning
75
76C<internal> : the function will only return the contents of an element in a hashref instead of the
77element itself (see L</SYNOPSIS> for example)
78
79C<tidy> : the function will return tidy XML
80
81C<indentstring> : when producing tidy XML, this denotes the string with which child elements will be
82indented (Default is a string of 4 spaces)
83
84C<save> : the function (apart from doing what it's supposed to do) will also save its XML output in a
85file whose path is denoted by this flag
86
87C<strip_ns> : strip the namespaces (characters up to and including ':') from the tags
88
89C<xslt> : will add a <?xml-stylesheet?> link in the XML that's being output, of type 'text/xsl',
90pointing to the filename or URL denoted by this flag
91
92C<arrayref> : the function will create a simple arrayref instead of a simple hashref (which will preserve
93order and elements with duplicate tags)
94
95C<bytes> : the XML document string which is parsed and/or produced by this function, should contain
96bytes/octets rather than characters
97
98=head1 FUNCTIONS
99
100=cut
101
102sub _encode {
103    my $string = shift;
104    defined $string or $string = '';
105    my %replace =   (
106                    '<' => '&lt;',
107                    '>' => '&gt;',
108                    '&' => '&amp;',
109                    '\'' => '&apos;',
110                    '"' => '&quot;',
111                    );
112    my $keys = "(".join("|", sort {length($b) <=> length($a)} keys %replace).")";
113    $string =~ s/$keys/$replace{$1}/g;
114    return $string;
115}
116
117
118=head2 xml_escape($string)
119
120Returns the same string, but with the C<< < >>, C<< > >>, C<< & >>, C<< " >> and C<< ' >> characters
121replaced by their XML entities (e.g. C<< &amp; >>).
122
123=cut
124
125sub xml_escape {
126    my ($string) = @_;
127
128    return _encode($string);
129}
130
131sub _decode {
132    my $string = shift;
133    my $entities = shift || {};
134    my $flags = shift || {};
135    defined $string or $string = '';
136    my %replace = (
137        %$entities,
138        '&lt;'   => '<',
139        '&gt;'   => '>',
140        '&amp;'  => '&',
141        '&apos;' => "'",
142        '&quot;' => '"',
143    );
144    my @capture = map quotemeta, keys %replace;
145    push @capture, '\&\#x([0-9A-Fa-f]+)\;', '\&\#([0-9]+)\;';
146    my $capture = "(".join("|", @capture).")";
147    $string =~ s|
148        $capture
149    |
150        my $reference = $1;
151        my $number = $2;
152        $reference =~ /\&\#x/ ? chr(hex($number))
153            : $reference =~ /\&\#/ ? chr($number)
154            : $replace{$reference};
155    |gex;
156    return $string;
157}
158
159
160=head2 tidy_xml($raw_xml)
161
162Returns the XML string in a tidy format (with tabs & newlines)
163
164Optional flags: C<file>, C<complete>, C<indentstring>, C<save>, C<bytes>
165
166=cut
167
168sub tidy_xml {
169    my $xml = shift;
170    my $flags = shift || {};
171
172    my $object = xml_to_object($xml, $flags);
173    defined $object or return $object;
174    _tidy_object($object, undef, $flags);
175    my $return = $object->to_xml({ %$flags, tidy => 0 }) . "\n";
176    return $return;
177}
178
179
180=head2 xml_to_object($raw_xml)
181
182Creates an 'XML::MyXML::Object' object from the raw XML provided
183
184Optional flags: C<file>, C<bytes>
185
186=cut
187
188sub xml_to_object {
189    my $xml = shift;
190    my $flags = shift || {};
191
192    if ($flags->{file}) {
193        open my $fh, '<', $xml or croak "Error: The file '$xml' could not be opened for reading: $!";
194        $xml = do { local $/; <$fh> };
195        close $fh;
196    }
197
198    if ($flags->{bytes} or $flags->{file}) {
199        my (undef, undef, $encoding) = $xml =~ /<\?xml(\s[^>]+)?\sencoding=(['"])(.*?)\2/;
200        $encoding = 'UTF-8' if ! defined $encoding or $encoding =~ /^utf\-?8\z/i;
201        my $encoding_obj = find_encoding($encoding) or croak "Error: encoding '$encoding' not found";
202        eval { $xml = $encoding_obj->decode($xml, Encode::FB_CROAK); 1 }
203            or croak "Error: Input string is invalid $encoding";
204    }
205
206    my $entities = {};
207
208    # Parse CDATA sections
209    $xml =~ s/\<\!\[CDATA\[(.*?)\]\]\>/_encode($1)/egs;
210    my @items = $xml =~ /(<!--.*?(?:-->|$)|<[^>]*?>|[^<>]+)/sg;
211    # Remove comments, special markup and initial whitespace
212    {
213        my $init_ws = 1; # whether we are inside initial whitespace
214        foreach my $item (@items) {
215            if ($item =~ /\A<!--/) {
216                if ($item !~ /-->\z/) { croak encode_utf8("Error: unclosed XML comment block - '$item'"); }
217                undef $item;
218            } elsif ($item =~ /\A<\?/) { # like <?xml?> or <?target?>
219                if ($item !~ /\?>\z/) { croak encode_utf8("Error: Erroneous special markup - '$item'"); }
220                undef $item;
221            } elsif (my ($entname, undef, $entvalue) = $item =~ /^<!ENTITY\s+(\S+)\s+(['"])(.*?)\2\s*>\z/) {
222                $entities->{"&$entname;"} = _decode($entvalue);
223                undef $item;
224            } elsif ($item =~ /<!/) { # like <!DOCTYPE> or <!ELEMENT> or <!ATTLIST>
225                undef $item;
226            } elsif ($init_ws) {
227                if ($item =~ /\S/) {
228                    $init_ws = 0;
229                } else {
230                    undef $item;
231                }
232            }
233        }
234        @items = grep defined, @items or croak "Error: No elements in the XML document";
235    }
236    my @stack;
237    my $object = bless ({
238        content      => [],
239        full_ns_info => {},
240        ns_data      => {},
241    }, 'XML::MyXML::Object');
242    my $pointer = $object;
243    foreach my $item (@items) {
244        if ($item =~ /^\<\/?\>\z/) {
245            croak encode_utf8("Error: Strange tag: '$item'");
246        } elsif ($item =~ /^\<\/([^\s>]+)\>\z/) {
247            my ($el_name) = $1;
248            $stack[-1]{el_name} eq $el_name
249                or croak encode_utf8("Error: Incompatible stack element: stack='$stack[-1]{el_name}' item='$item'");
250            my $stack_entry = pop @stack;
251            delete $stack_entry->{content} if ! @{$stack_entry->{content}};
252            $pointer = $stack_entry->{parent};
253        } elsif ($item =~ /^\<[^>]+?(\/)?\>\z/) {
254            my $is_self_closing = defined $1;
255            my ($el_name) = $item =~ /^<([^\s>\/]+)/;
256            defined $el_name or croak encode_utf8("Error: Strange tag: '$item'");
257            $item =~ s/^\<\Q$el_name\E//;
258            $item =~ s/\/>\z//;
259            my @attrs = $item =~ /\s+(\S+=(['"]).*?\2)/g;
260            my $i = 0;
261            @attrs = grep {++$i % 2} @attrs;
262            my %attr;
263            foreach my $attr (@attrs) {
264                my ($attr_name, undef, $attr_value) = $attr =~ /^(\S+?)=(['"])(.*?)\2\z/;
265                defined $attr_name or croak encode_utf8("Error: Strange attribute: '$attr'");
266                $attr{$attr_name} = _decode($attr_value, $entities);
267            }
268            my $entry = bless {
269                el_name => $el_name,
270                attrs   => \%attr,
271                $is_self_closing ? () : (content => []),
272                parent  => $pointer,
273            }, 'XML::MyXML::Object';
274            weaken $entry->{parent};
275            $entry->_apply_namespace_declarations;
276            push @stack, $entry unless $is_self_closing;
277            push @{$pointer->{content}}, $entry;
278            $pointer = $entry unless $is_self_closing;
279        } elsif ($item =~ /^[^<>]*\z/) {
280            my $entry = bless {
281                text => _decode($item, $entities),
282                parent => $pointer,
283            }, 'XML::MyXML::Object';
284            weaken $entry->{parent};
285            push @{$pointer->{content}}, $entry;
286        } else {
287            croak encode_utf8("Error: Strange element: '$item'");
288        }
289    }
290    ! @stack or croak encode_utf8("Error: The <$stack[-1]{el_name}> element has not been closed in the XML document");
291    $object = $object->{content}[0];
292    $object->{parent} = undef;
293    return $object;
294}
295
296sub _objectarray_to_xml {
297    my $object = shift;
298
299    my $xml = '';
300    foreach my $stuff (@$object) {
301        if (! defined $stuff->{el_name} and defined $stuff->{text}) {
302            $xml .= _encode($stuff->{text});
303        } else {
304            $xml .= "<".$stuff->{el_name};
305            foreach my $attrname (keys %{$stuff->{attrs}}) {
306                $xml .= " ".$attrname.'="'._encode($stuff->{attrs}{$attrname}).'"';
307            }
308            if (! defined $stuff->{content} or ! @{ $stuff->{content} }) {
309                $xml .= "/>"
310            } else {
311                $xml .= ">";
312                $xml .= _objectarray_to_xml($stuff->{content});
313                $xml .= "</".$stuff->{el_name}.">";
314            }
315        }
316    }
317    return $xml;
318}
319
320
321=head2 object_to_xml($object)
322
323Creates an XML string from the 'XML::MyXML::Object' object provided
324
325Optional flags: C<complete>, C<tidy>, C<indentstring>, C<save>, C<bytes>
326
327=cut
328
329sub object_to_xml {
330    my $object = shift;
331    my $flags = shift || {};
332
333    return $object->to_xml( $flags );
334}
335
336sub _tidy_object {
337    my $object = shift;
338    my $tabs = shift || 0;
339    my $flags = shift || {};
340
341    my $indentstring = exists $flags->{indentstring} ? $flags->{indentstring} : $DEFAULT_INDENTSTRING;
342
343    return unless defined $object->{content} and @{$object->{content}};
344    my $hastext;
345    my @children = @{$object->{content}};
346    foreach my $i (0..$#children) {
347        my $child = $children[$i];
348        if (defined $child->{text} and $child->{text} =~ /\S/) {
349            $hastext = 1;
350            last;
351        }
352    }
353    return if $hastext;
354
355    @{$object->{content}} = grep { ! defined $_->{text} or $_->{text} =~ /\S/ } @{$object->{content}};
356
357    @children = @{$object->{content}};
358    $object->{content} = [];
359    for my $i (0..$#children) {
360        my $whitespace = bless {
361            text => "\n".($indentstring x ($tabs+1)),
362            parent => $object,
363        }, 'XML::MyXML::Object';
364        weaken $whitespace->{parent};
365        push @{$object->{content}}, $whitespace;
366        push @{$object->{content}}, $children[$i];
367    }
368    my $whitespace = bless {
369        text => "\n".($indentstring x $tabs),
370        parent => $object,
371    }, 'XML::MyXML::Object';
372    weaken $whitespace->{parent};
373    push @{$object->{content}}, $whitespace;
374
375    for my $i (0..$#{$object->{content}}) {
376        _tidy_object($object->{content}[$i], $tabs+1, $flags);
377    }
378}
379
380
381=head2 simple_to_xml($simple_array_ref)
382
383Produces a raw XML string from either an array reference, a hash reference or a mixed structure such as these examples:
384
385    { thing => { name => 'John', location => { city => 'New York', country => 'U.S.A.' } } }
386    # <thing><name>John</name><location><country>U.S.A.</country><city>New York</city></location></thing>
387
388    [ thing => [ name => 'John', location => [ city => 'New York', country => 'U.S.A.' ] ] ]
389    # <thing><name>John</name><location><country>U.S.A.</country><city>New York</city></location></thing>
390
391    { thing => { name => 'John', location => [ city => 'New York', city => 'Boston', country => 'U.S.A.' ] } }
392    # <thing><name>John</name><location><city>New York</city><city>Boston</city><country>U.S.A.</country></location></thing>
393
394Here's a mini-tutorial on how to use this function, in which you'll also see how to set attributes.
395
396The simplest invocations are these:
397
398    simple_to_xml({target => undef})
399    # <target/>
400
401    simple_to_xml({target => 123})
402    # <target>123</target>
403
404Every set of sibling elements (such as the document itself, which is a single top-level element, or a pack of
4055 elements all children to the same parent element) is represented in the $simple_array_ref parameter as
406key-value pairs inside either a hashref or an arrayref (you can choose which).
407
408Keys represent tags+attributes of the sibling elements, whereas values represent the contents of those elements.
409
410Eg:
411
412    [
413        first => 'John',
414        last => 'Doe,'
415    ]
416
417...and...
418
419    {
420        first => 'John',
421        last => 'Doe',
422    }
423
424both translate to:
425
426    <first>John</first><last>Doe</last>
427
428A value can either be undef (to denote an empty element), or a string (to denote a string), or another
429hashref/arrayref to denote a set of children elements, like this:
430
431    {
432        person => {
433            name => {
434                first => 'John',
435                last => 'Doe'
436            }
437        }
438    }
439
440...becomes:
441
442    <person>
443        <name>
444            <first>John</first>
445            <last>Doe</last>
446        </name>
447    </person>
448
449
450The only difference between using an arrayref or using a hashref, is that arrayrefs preserve the
451order of the elements, and allow repetition of identical tags. So a person with many addresses, should choose to
452represent its list of addresses under an arrayref, like this:
453
454    {
455        person => [
456            name => {
457                first => 'John',
458                last => 'Doe',
459            },
460            address => {
461                country => 'Malta',
462            },
463            address => {
464                country => 'Indonesia',
465            },
466            address => {
467                country => 'China',
468            }
469        ]
470    }
471
472...which becomes:
473
474    <person>
475        <name>
476            <last>Doe</last>
477            <first>John</first>
478        </name>
479        <address>
480            <country>Malta</country>
481        </address>
482        <address>
483            <country>Indonesia</country>
484        </address>
485        <address>
486            <country>China</country>
487        </address>
488    </person>
489
490Finally, to set attributes to your elements (eg id="12") you need to replace the key with either
491a string containing attributes as well (eg: C<'address id="12"'>), or replace it with a reference, as the many
492items in the examples below:
493
494    {thing => [
495        'item id="1"' => 'chair',
496        [item => {id => 2}] => 'table',
497        [item => [id => 3]] => 'door',
498        [item => id => 4] => 'sofa',
499        {item => {id => 5}} => 'bed',
500        {item => [id => 6]} => 'shirt',
501        [item => {id => 7, other => 8}, [more => 9, also => 10, but_not => undef]] => 'towel'
502    ]}
503
504...which becomes:
505
506    <thing>
507        <item id="1">chair</item>
508        <item id="2">table</item>
509        <item id="3">door</item>
510        <item id="4">sofa</item>
511        <item id="5">bed</item>
512        <item id="6">shirt</item>
513        <item id="7" other="8" more="9" also="10">towel</item>
514    </thing>
515
516As you see, attributes may be represented in a great variety of ways, so you don't need to remember
517the "correct" one.
518
519Of course if the "simple structure" is a hashref, the key cannot be a reference (because hash keys are always
520strings), so if you want attributes on your elements, you either need the enclosing structure to be an
521arrayref as in the example above, to allow keys to be refs which contain the attributes, or you need to
522represent the key (=tag+attrs) as a string, like this (also in the previous example): C<'item id="1"'>
523
524This concludes the mini-tutorial of the simple_to_xml function.
525
526All the strings in C<$simple_array_ref> need to contain characters, rather than bytes/octets. The C<bytes>
527optional flag only affects the produced XML string.
528
529Optional flags: C<complete>, C<tidy>, C<indentstring>, C<save>, C<xslt>, C<bytes>
530
531=cut
532
533sub simple_to_xml {
534    my $arref = shift;
535    my $flags = shift || {};
536
537    my $xml = '';
538    my ($key, $value, @residue) = (ref $arref eq 'HASH') ? %$arref : @$arref;
539    $key = _key_to_string($key);
540    ! @residue or croak "Error: the provided simple ref contains more than 1 top element";
541    my ($el_name) = $key =~ /^(\S+)/;
542    defined $el_name or croak encode_utf8 "Error: Strange key: $key";
543
544    if (! ref $value) {
545        if ($key eq '!as_is') {
546            check_xml $value or croak "invalid xml: $value";
547            $xml .= $value;
548        } elsif (defined $value and length $value) {
549            $xml .= "<$key>"._encode($value)."</$el_name>";
550        } else {
551            $xml .= "<$key/>";
552        }
553    } else {
554        $xml .= "<$key>@{[ _arrayref_to_xml($value, $flags) ]}</$el_name>";
555    }
556    if ($flags->{tidy}) {
557        $xml = tidy_xml($xml, {
558            exists $flags->{indentstring} ? (indentstring => $flags->{indentstring}) : ()
559        });
560    }
561    my $decl = '';
562    $decl .= qq'<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>\n' if $flags->{complete};
563    $decl .= qq'<?xml-stylesheet type="text/xsl" href="$flags->{xslt}"?>\n' if $flags->{xslt};
564    $xml = $decl . $xml;
565
566    if (defined $flags->{save}) {
567        open my $fh, '>', $flags->{save} or croak "Error: Couldn't open file '$flags->{save}' for writing: $!";
568        binmode $fh, ':encoding(UTF-8)';
569        print $fh $xml;
570        close $fh;
571    }
572
573    $xml = encode_utf8($xml) if $flags->{bytes};
574    return $xml;
575}
576
577sub _flatten {
578    my ($thing) = @_;
579
580    if (!ref $thing) { return $thing; }
581    elsif (ref $thing eq 'HASH') { return map _flatten($_), %$thing; }
582    elsif (ref $thing eq 'ARRAY') { return map _flatten($_), @$thing; }
583    else { croak 'Error: reference of invalid type in simple_to_xml: '.(ref $thing); }
584}
585
586sub _key_to_string {
587    my ($key) = @_;
588
589    if (! ref $key) {
590        return $key;
591    } else {
592        my ($tag, %attrs) = _flatten($key);
593        return $tag . join('', map " $_=\""._encode($attrs{$_}).'"', grep {defined $attrs{$_}} keys %attrs);
594    }
595}
596
597sub _arrayref_to_xml {
598    my $arref = shift;
599    my $flags = shift || {};
600
601    my $xml = '';
602
603    if (ref $arref eq 'HASH') { return _hashref_to_xml($arref, $flags); }
604
605    foreach (my $i = 0; $i <= $#$arref; ) {
606        my $key = $arref->[$i++];
607        $key = _key_to_string($key);
608        my ($el_name) = $key =~ /^(\S+)/;
609        defined $el_name or croak encode_utf8 "Error: Strange key: $key";
610        my $value = $arref->[$i++];
611
612        if ($key eq '!as_is') {
613            check_xml $value or croak "invalid xml: $value";
614            $xml .= $value;
615        } elsif (! ref $value) {
616            if (defined $value and length $value) {
617                $xml .= "<$key>@{[ _encode($value) ]}</$el_name>";
618            } else {
619                $xml .= "<$key/>";
620            }
621        } else {
622            $xml .= "<$key>@{[ _arrayref_to_xml($value, $flags) ]}</$el_name>";
623        }
624    }
625    return $xml;
626}
627
628sub _hashref_to_xml {
629    my $hashref = shift;
630    my $flags = shift || {};
631
632    my $xml = '';
633
634    while (my ($key, $value) = each %$hashref) {
635        my ($el_name) = $key =~ /^(\S+)/;
636        defined $el_name or croak encode_utf8 "Error: Strange key: $key";
637
638        if ($key eq '!as_is') {
639            check_xml $value or croak "invalid xml: $value";
640            $xml .= $value;
641        } elsif (! ref $value) {
642            if (defined $value and length $value) {
643                $xml .= "<$key>@{[ _encode($value) ]}</$el_name>";
644            } else {
645                $xml .= "<$key/>";
646            }
647        } else {
648            $xml .= "<$key>@{[ _arrayref_to_xml($value, $flags) ]}</$el_name>";
649        }
650    }
651    return $xml;
652}
653
654
655=head2 xml_to_simple($raw_xml)
656
657Produces a very simple hash object from the raw XML string provided. An example hash object created thusly is this:
658S<C<< { thing => { name => 'John', location => { city => 'New York', country => 'U.S.A.' } } } >>>
659
660B<WARNING:> This function only works on very simple XML strings, i.e. children of an element may not consist of both
661text and elements (child elements will be discarded in that case). Also attributes in tags are ignored.
662
663Since the object created is a hashref (unless used with the C<arrayref> optional flag), duplicate keys will be
664discarded.
665
666All strings contained in the output simple structure will always contain characters rather than octets/bytes,
667regardless of the C<bytes> optional flag.
668
669Optional flags: C<internal>, C<strip>, C<file>, C<strip_ns>, C<arrayref>, C<bytes>
670
671=cut
672
673sub xml_to_simple {
674    my $xml = shift;
675    my $flags = shift || {};
676
677    my $object = xml_to_object($xml, $flags);
678
679    $object = $object->simplify($flags) if defined $object;
680
681    return $object;
682}
683
684sub _objectarray_to_simple {
685    my $object = shift;
686    my $flags = shift || {};
687
688    defined $object or return undef;
689
690    return $flags->{arrayref}
691        ? _objectarray_to_simple_arrayref($object, $flags)
692        : _objectarray_to_simple_hashref($object, $flags);
693}
694
695sub _objectarray_to_simple_hashref {
696    my $object = shift;
697    my $flags = shift || {};
698
699    defined $object or return undef;
700
701    my $hashref = {};
702
703    foreach my $stuff (@$object) {
704        if (defined(my $key = $stuff->{el_name})) {
705            $key = strip_ns $key if $flags->{strip_ns};
706            $hashref->{ $key } = _objectarray_to_simple($stuff->{content}, $flags);
707        }
708        elsif (defined(my $value = $stuff->{text})) {
709            $value = trim $value if $flags->{strip};
710            return $value if $value =~ /\S/;
711        }
712    }
713
714    return %$hashref ? $hashref : undef;
715}
716
717sub _objectarray_to_simple_arrayref {
718    my $object = shift;
719    my $flags = shift || {};
720
721    defined $object or return undef;
722
723    my $arrayref = [];
724
725    foreach my $stuff (@$object) {
726        if (defined(my $key = $stuff->{el_name})) {
727            $key = strip_ns $key if $flags->{strip_ns};
728            push @$arrayref, ( $key, _objectarray_to_simple($stuff->{content}, $flags) );
729        } elsif (defined(my $value = $stuff->{text})) {
730            $value = trim $value if $flags->{strip};
731            return $value if $value =~ /\S/;
732        }
733    }
734
735    return @$arrayref ? $arrayref : undef;
736}
737
738
739=head2 check_xml($raw_xml)
740
741Returns true if the $raw_xml string is valid XML (valid enough to be used by this module), and false otherwise.
742
743Optional flags: C<file>, C<bytes>
744
745=cut
746
747sub check_xml {
748    my $xml = shift;
749    my $flags = shift || {};
750
751    my $ok = eval { xml_to_object($xml, $flags); 1 };
752    return !!$ok;
753}
754
755
7561; # End of XML::MyXML
757
758__END__
759
760
761=head1 OBJECT METHODS
762
763=head2 $obj->path("subtag1/subsubtag2[attr1=val1][attr2]/.../subsubsubtagX")
764
765Returns the element specified by the path as an XML::MyXML::Object object. When there are more than one tags
766with the specified name in the last step of the path, it will return all of them as an array. In scalar
767context will only return the first one. Simple CSS3-style attribute selectors are allowed in the path next
768to the tagnames, for example: C<< p[class=big] >> will only return C<< <p> >> elements that contain an
769attribute called "class" with a value of "big". p[class] on the other hand will return p elements having a
770"class" attribute, but that attribute can have any value. It's possible to surround attribute values with
771quotes, like so: C<< input[name="foo[]"] >>
772
773An example... To print the last names of all the students from the following XML, do:
774
775    my $xml = <<'EOB';
776    <people>
777        <student>
778            <name>
779                <first>Alex</first>
780                <last>Karelas</last>
781            </name>
782        </student>
783        <student>
784            <name>
785                <first>John</first>
786                <last>Doe</last>
787            </name>
788        </student>
789        <teacher>
790            <name>
791                <first>Mary</first>
792                <last>Poppins</last>
793            </name>
794        </teacher>
795        <teacher>
796            <name>
797                <first>Peter</first>
798                <last>Gabriel</last>
799            </name>
800        </teacher>
801    </people>
802    EOB
803
804    my $obj = xml_to_object($xml);
805    my @students = $obj->path('student');
806    foreach my $student (@students) {
807        print $student->path('name/last')->value, "\n";
808    }
809
810...or like this...
811
812    my @last = $obj->path('student/name/last');
813    foreach my $last (@last) {
814        print $last->value, "\n";
815    }
816
817If you wish to describe the root element in the path as well, prepend it in the path with a slash like so:
818
819    if( $student->path('/student/name/last')->value eq $student->path('name/last')->value ) {
820        print "The two are identical", "\n";
821    }
822
823B<Since XML::MyXML version 1.08, the path method supports namespaces.>
824
825You can replace the namespace prefix of an attribute or an element name in the path string with the
826namespace name inside curly brackets, and place the curly-bracketed expression after the local part.
827
828B<< I<Example #1:> >> Suppose the XML you want to go through is:
829
830    <stream:stream xmlns:stream="http://foo/bar">
831        <a>b</a>
832    </stream:stream>
833
834Then this will return the string C<"b">:
835
836    $obj->path('/stream{http://foo/bar}/a')->value;
837
838B<< I<Example #2:> >> Suppose the XML you want to go through is:
839
840    <stream xmlns="http://foo/bar">
841        <a>b</a>
842    </stream>
843
844Then both of these expressions will return C<"b">:
845
846    $obj->path('/stream/a{http://foo/bar}')->value;
847    $obj->path('/stream{http://foo/bar}/a{http://foo/bar}')->value;
848
849B<Since XML::MyXML version 1.08, quotes in attribute match strings have no special meaning.>
850
851If you want to use the "]" character in attribute values, you need to escape it with a
852backslash character. As you need if you want to use the "}" character in a namespace value
853in the path string.
854
855B<< I<Example #1:> >> Suppose the XML you want to go through is:
856
857    <stream xmlns:o="http://foo}bar">
858        <a o:name="c]d">b</a>
859    </stream>
860
861Then this expression will return C<"b">:
862
863    $obj->path('/stream/a[name{http://foo\}bar}=c\]d]')->value;
864
865B<< I<Example #2:> >> You can match attribute values containing quote characters with just C<"> in the path string.
866
867If the XML is:
868
869    <stream id="&quot;1&quot;">a</stream>
870
871...then this will return the string C<"a">:
872
873    $obj->path('/stream[id="1"]')->value;
874
875Optional flags: none
876
877=head2 $obj->text([set_value]), also known as $obj->value([set_value])
878
879If provided a set_value, will delete all contents of $obj and will place C<set_value> as its text contents.
880Otherwise will return the text contents of this object, and of its descendants, in a single string.
881
882Optional flags: C<strip>
883
884=head2 $obj->inner_xml([xml_string])
885
886Gets or sets the inner XML of the $obj node, depending on whether C<xml_string> is provided.
887
888Optional flags: C<bytes>
889
890=head2 $obj->attr('attrname' [, 'attrvalue'])
891
892Gets/Sets the value of the 'attrname' attribute of the top element. Returns undef if attribute does not exist.
893If called without the 'attrname' parameter, returns a hash with all attribute => value pairs. If setting with
894an attrvalue of C<undef>, then removes that attribute entirely.
895
896Input parameters and output are all in character strings, rather than octets/bytes.
897
898Optional flags: none
899
900=head2 $obj->tag
901
902Returns the tag of the $obj element. E.g. if $obj represents an <rss:item> element, C<< $obj->tag >> will
903return the string 'rss:item'. Returns undef if $obj doesn't represent a tag.
904
905Optional flags: C<strip_ns>
906
907=head2 $obj->name
908
909Same as C<< $obj->tag >> (alias).
910
911=head2 $obj->parent
912
913Returns the XML::MyXML::Object element that is the parent of $obj in the document. Returns undef if $obj
914doesn't have a parent.
915
916Optional flags: none
917
918=head2 $obj->simplify
919
920Returns a very simple hashref, like the one returned with C<&XML::MyXML::xml_to_simple>. Same restrictions
921and warnings apply.
922
923Optional flags: C<internal>, C<strip>, C<strip_ns>, C<arrayref>
924
925=head2 $obj->to_xml
926
927Returns the XML string of the object, just like calling C<object_to_xml( $obj )>
928
929Optional flags: C<complete>, C<tidy>, C<indentstring>, C<save>, C<bytes>
930
931=head2 $obj->to_tidy_xml
932
933Returns the XML string of the object in tidy form, just like calling C<tidy_xml( object_to_xml( $obj ) )>
934
935Optional flags: C<complete>, C<indentstring>, C<save>, C<bytes>
936
937=head1 BUGS
938
939If you have a Github account, report your issues at
940L<https://github.com/akarelas/xml-myxml/issues>.
941I will be notified, and then you'll automatically be notified of progress on
942your bug as I make changes.
943
944You can get notified of new versions of this module for free, by email or RSS,
945at L<https://www.perlmodules.net/viewfeed/distro/XML-MyXML>
946
947=head1 LICENSE
948
949Copyright (C) Alexander Karelas.
950
951This library is free software; you can redistribute it and/or modify
952it under the same terms as Perl itself.
953
954=head1 AUTHOR
955
956Alexander Karelas E<lt>karjala@cpan.orgE<gt>
957