1package XML::MyXML; 2 3use 5.008001; 4use strict; 5use warnings; 6 7use XML::MyXML::Object; 8use XML::MyXML::Util 'trim', 'strip_ns'; 9 10use Encode; 11use Carp; 12use Scalar::Util 'weaken'; 13 14require Exporter; 15our @ISA = qw(Exporter); 16our @EXPORT_OK = qw(tidy_xml object_to_xml xml_to_object simple_to_xml xml_to_simple check_xml xml_escape); 17our %EXPORT_TAGS = (all => [@EXPORT_OK]); 18 19our $VERSION = "1.08"; 20 21my $DEFAULT_INDENTSTRING = ' ' x 4; 22 23 24=encoding utf-8 25 26=head1 NAME 27 28XML::MyXML - A simple-to-use XML module, for parsing and creating XML documents 29 30=head1 SYNOPSIS 31 32 use XML::MyXML qw(tidy_xml xml_to_object); 33 use XML::MyXML qw(:all); 34 35 my $xml = "<item><name>Table</name><price><usd>10.00</usd><eur>8.50</eur></price></item>"; 36 print tidy_xml($xml); 37 38 my $obj = xml_to_object($xml); 39 print "Price in Euros = " . $obj->path('price/eur')->text; 40 41 $obj->simplify is hashref { item => { name => 'Table', price => { usd => '10.00', eur => '8.50' } } } 42 $obj->simplify({ internal => 1 }) is hashref { name => 'Table', price => { usd => '10.00', eur => '8.50' } } 43 44=head1 EXPORTABLE 45 46xml_escape, tidy_xml, xml_to_object, object_to_xml, simple_to_xml, xml_to_simple, check_xml 47 48=head1 FEATURES & LIMITATIONS 49 50This module can parse XML comments, CDATA sections, XML entities (the standard five and numeric ones) and 51simple non-recursive C<< <!ENTITY> >>s 52 53It will ignore (won't parse) C<< <!DOCTYPE...> >>, C<< <?...?> >> and other C<< <!...> >> special markup 54 55All strings (XML documents, attribute names, values, etc) produced by this module or passed as parameters 56to its functions, are strings that contain characters, rather than bytes/octets. Unless you use the C<bytes> 57function flag (see below), in which case the XML documents (and just the XML documents) will be byte/octet 58strings. 59 60XML documents to be parsed may not contain the C<< > >> character unencoded in attribute values 61 62=head1 OPTIONAL FUNCTION FLAGS 63 64Some functions and methods in this module accept optional flags, listed under each function in the 65documentation. They are optional, default to zero unless stated otherwise, and can be used as follows: 66S<C<< function_name( $param1, { flag1 => 1, flag2 => 1 } ) >>>. This is what each flag does: 67 68C<strip> : the function will strip initial and ending whitespace from all text values returned 69 70C<file> : the function will expect the path to a file containing an XML document to parse, instead of an 71XML string 72 73C<complete> : the function's XML output will include an XML declaration (C<< <?xml ... ?> >>) in the 74beginning 75 76C<internal> : the function will only return the contents of an element in a hashref instead of the 77element itself (see L</SYNOPSIS> for example) 78 79C<tidy> : the function will return tidy XML 80 81C<indentstring> : when producing tidy XML, this denotes the string with which child elements will be 82indented (Default is a string of 4 spaces) 83 84C<save> : the function (apart from doing what it's supposed to do) will also save its XML output in a 85file whose path is denoted by this flag 86 87C<strip_ns> : strip the namespaces (characters up to and including ':') from the tags 88 89C<xslt> : will add a <?xml-stylesheet?> link in the XML that's being output, of type 'text/xsl', 90pointing to the filename or URL denoted by this flag 91 92C<arrayref> : the function will create a simple arrayref instead of a simple hashref (which will preserve 93order and elements with duplicate tags) 94 95C<bytes> : the XML document string which is parsed and/or produced by this function, should contain 96bytes/octets rather than characters 97 98=head1 FUNCTIONS 99 100=cut 101 102sub _encode { 103 my $string = shift; 104 defined $string or $string = ''; 105 my %replace = ( 106 '<' => '<', 107 '>' => '>', 108 '&' => '&', 109 '\'' => ''', 110 '"' => '"', 111 ); 112 my $keys = "(".join("|", sort {length($b) <=> length($a)} keys %replace).")"; 113 $string =~ s/$keys/$replace{$1}/g; 114 return $string; 115} 116 117 118=head2 xml_escape($string) 119 120Returns the same string, but with the C<< < >>, C<< > >>, C<< & >>, C<< " >> and C<< ' >> characters 121replaced by their XML entities (e.g. C<< & >>). 122 123=cut 124 125sub xml_escape { 126 my ($string) = @_; 127 128 return _encode($string); 129} 130 131sub _decode { 132 my $string = shift; 133 my $entities = shift || {}; 134 my $flags = shift || {}; 135 defined $string or $string = ''; 136 my %replace = ( 137 %$entities, 138 '<' => '<', 139 '>' => '>', 140 '&' => '&', 141 ''' => "'", 142 '"' => '"', 143 ); 144 my @capture = map quotemeta, keys %replace; 145 push @capture, '\&\#x([0-9A-Fa-f]+)\;', '\&\#([0-9]+)\;'; 146 my $capture = "(".join("|", @capture).")"; 147 $string =~ s| 148 $capture 149 | 150 my $reference = $1; 151 my $number = $2; 152 $reference =~ /\&\#x/ ? chr(hex($number)) 153 : $reference =~ /\&\#/ ? chr($number) 154 : $replace{$reference}; 155 |gex; 156 return $string; 157} 158 159 160=head2 tidy_xml($raw_xml) 161 162Returns the XML string in a tidy format (with tabs & newlines) 163 164Optional flags: C<file>, C<complete>, C<indentstring>, C<save>, C<bytes> 165 166=cut 167 168sub tidy_xml { 169 my $xml = shift; 170 my $flags = shift || {}; 171 172 my $object = xml_to_object($xml, $flags); 173 defined $object or return $object; 174 _tidy_object($object, undef, $flags); 175 my $return = $object->to_xml({ %$flags, tidy => 0 }) . "\n"; 176 return $return; 177} 178 179 180=head2 xml_to_object($raw_xml) 181 182Creates an 'XML::MyXML::Object' object from the raw XML provided 183 184Optional flags: C<file>, C<bytes> 185 186=cut 187 188sub xml_to_object { 189 my $xml = shift; 190 my $flags = shift || {}; 191 192 if ($flags->{file}) { 193 open my $fh, '<', $xml or croak "Error: The file '$xml' could not be opened for reading: $!"; 194 $xml = do { local $/; <$fh> }; 195 close $fh; 196 } 197 198 if ($flags->{bytes} or $flags->{file}) { 199 my (undef, undef, $encoding) = $xml =~ /<\?xml(\s[^>]+)?\sencoding=(['"])(.*?)\2/; 200 $encoding = 'UTF-8' if ! defined $encoding or $encoding =~ /^utf\-?8\z/i; 201 my $encoding_obj = find_encoding($encoding) or croak "Error: encoding '$encoding' not found"; 202 eval { $xml = $encoding_obj->decode($xml, Encode::FB_CROAK); 1 } 203 or croak "Error: Input string is invalid $encoding"; 204 } 205 206 my $entities = {}; 207 208 # Parse CDATA sections 209 $xml =~ s/\<\!\[CDATA\[(.*?)\]\]\>/_encode($1)/egs; 210 my @items = $xml =~ /(<!--.*?(?:-->|$)|<[^>]*?>|[^<>]+)/sg; 211 # Remove comments, special markup and initial whitespace 212 { 213 my $init_ws = 1; # whether we are inside initial whitespace 214 foreach my $item (@items) { 215 if ($item =~ /\A<!--/) { 216 if ($item !~ /-->\z/) { croak encode_utf8("Error: unclosed XML comment block - '$item'"); } 217 undef $item; 218 } elsif ($item =~ /\A<\?/) { # like <?xml?> or <?target?> 219 if ($item !~ /\?>\z/) { croak encode_utf8("Error: Erroneous special markup - '$item'"); } 220 undef $item; 221 } elsif (my ($entname, undef, $entvalue) = $item =~ /^<!ENTITY\s+(\S+)\s+(['"])(.*?)\2\s*>\z/) { 222 $entities->{"&$entname;"} = _decode($entvalue); 223 undef $item; 224 } elsif ($item =~ /<!/) { # like <!DOCTYPE> or <!ELEMENT> or <!ATTLIST> 225 undef $item; 226 } elsif ($init_ws) { 227 if ($item =~ /\S/) { 228 $init_ws = 0; 229 } else { 230 undef $item; 231 } 232 } 233 } 234 @items = grep defined, @items or croak "Error: No elements in the XML document"; 235 } 236 my @stack; 237 my $object = bless ({ 238 content => [], 239 full_ns_info => {}, 240 ns_data => {}, 241 }, 'XML::MyXML::Object'); 242 my $pointer = $object; 243 foreach my $item (@items) { 244 if ($item =~ /^\<\/?\>\z/) { 245 croak encode_utf8("Error: Strange tag: '$item'"); 246 } elsif ($item =~ /^\<\/([^\s>]+)\>\z/) { 247 my ($el_name) = $1; 248 $stack[-1]{el_name} eq $el_name 249 or croak encode_utf8("Error: Incompatible stack element: stack='$stack[-1]{el_name}' item='$item'"); 250 my $stack_entry = pop @stack; 251 delete $stack_entry->{content} if ! @{$stack_entry->{content}}; 252 $pointer = $stack_entry->{parent}; 253 } elsif ($item =~ /^\<[^>]+?(\/)?\>\z/) { 254 my $is_self_closing = defined $1; 255 my ($el_name) = $item =~ /^<([^\s>\/]+)/; 256 defined $el_name or croak encode_utf8("Error: Strange tag: '$item'"); 257 $item =~ s/^\<\Q$el_name\E//; 258 $item =~ s/\/>\z//; 259 my @attrs = $item =~ /\s+(\S+=(['"]).*?\2)/g; 260 my $i = 0; 261 @attrs = grep {++$i % 2} @attrs; 262 my %attr; 263 foreach my $attr (@attrs) { 264 my ($attr_name, undef, $attr_value) = $attr =~ /^(\S+?)=(['"])(.*?)\2\z/; 265 defined $attr_name or croak encode_utf8("Error: Strange attribute: '$attr'"); 266 $attr{$attr_name} = _decode($attr_value, $entities); 267 } 268 my $entry = bless { 269 el_name => $el_name, 270 attrs => \%attr, 271 $is_self_closing ? () : (content => []), 272 parent => $pointer, 273 }, 'XML::MyXML::Object'; 274 weaken $entry->{parent}; 275 $entry->_apply_namespace_declarations; 276 push @stack, $entry unless $is_self_closing; 277 push @{$pointer->{content}}, $entry; 278 $pointer = $entry unless $is_self_closing; 279 } elsif ($item =~ /^[^<>]*\z/) { 280 my $entry = bless { 281 text => _decode($item, $entities), 282 parent => $pointer, 283 }, 'XML::MyXML::Object'; 284 weaken $entry->{parent}; 285 push @{$pointer->{content}}, $entry; 286 } else { 287 croak encode_utf8("Error: Strange element: '$item'"); 288 } 289 } 290 ! @stack or croak encode_utf8("Error: The <$stack[-1]{el_name}> element has not been closed in the XML document"); 291 $object = $object->{content}[0]; 292 $object->{parent} = undef; 293 return $object; 294} 295 296sub _objectarray_to_xml { 297 my $object = shift; 298 299 my $xml = ''; 300 foreach my $stuff (@$object) { 301 if (! defined $stuff->{el_name} and defined $stuff->{text}) { 302 $xml .= _encode($stuff->{text}); 303 } else { 304 $xml .= "<".$stuff->{el_name}; 305 foreach my $attrname (keys %{$stuff->{attrs}}) { 306 $xml .= " ".$attrname.'="'._encode($stuff->{attrs}{$attrname}).'"'; 307 } 308 if (! defined $stuff->{content} or ! @{ $stuff->{content} }) { 309 $xml .= "/>" 310 } else { 311 $xml .= ">"; 312 $xml .= _objectarray_to_xml($stuff->{content}); 313 $xml .= "</".$stuff->{el_name}.">"; 314 } 315 } 316 } 317 return $xml; 318} 319 320 321=head2 object_to_xml($object) 322 323Creates an XML string from the 'XML::MyXML::Object' object provided 324 325Optional flags: C<complete>, C<tidy>, C<indentstring>, C<save>, C<bytes> 326 327=cut 328 329sub object_to_xml { 330 my $object = shift; 331 my $flags = shift || {}; 332 333 return $object->to_xml( $flags ); 334} 335 336sub _tidy_object { 337 my $object = shift; 338 my $tabs = shift || 0; 339 my $flags = shift || {}; 340 341 my $indentstring = exists $flags->{indentstring} ? $flags->{indentstring} : $DEFAULT_INDENTSTRING; 342 343 return unless defined $object->{content} and @{$object->{content}}; 344 my $hastext; 345 my @children = @{$object->{content}}; 346 foreach my $i (0..$#children) { 347 my $child = $children[$i]; 348 if (defined $child->{text} and $child->{text} =~ /\S/) { 349 $hastext = 1; 350 last; 351 } 352 } 353 return if $hastext; 354 355 @{$object->{content}} = grep { ! defined $_->{text} or $_->{text} =~ /\S/ } @{$object->{content}}; 356 357 @children = @{$object->{content}}; 358 $object->{content} = []; 359 for my $i (0..$#children) { 360 my $whitespace = bless { 361 text => "\n".($indentstring x ($tabs+1)), 362 parent => $object, 363 }, 'XML::MyXML::Object'; 364 weaken $whitespace->{parent}; 365 push @{$object->{content}}, $whitespace; 366 push @{$object->{content}}, $children[$i]; 367 } 368 my $whitespace = bless { 369 text => "\n".($indentstring x $tabs), 370 parent => $object, 371 }, 'XML::MyXML::Object'; 372 weaken $whitespace->{parent}; 373 push @{$object->{content}}, $whitespace; 374 375 for my $i (0..$#{$object->{content}}) { 376 _tidy_object($object->{content}[$i], $tabs+1, $flags); 377 } 378} 379 380 381=head2 simple_to_xml($simple_array_ref) 382 383Produces a raw XML string from either an array reference, a hash reference or a mixed structure such as these examples: 384 385 { thing => { name => 'John', location => { city => 'New York', country => 'U.S.A.' } } } 386 # <thing><name>John</name><location><country>U.S.A.</country><city>New York</city></location></thing> 387 388 [ thing => [ name => 'John', location => [ city => 'New York', country => 'U.S.A.' ] ] ] 389 # <thing><name>John</name><location><country>U.S.A.</country><city>New York</city></location></thing> 390 391 { thing => { name => 'John', location => [ city => 'New York', city => 'Boston', country => 'U.S.A.' ] } } 392 # <thing><name>John</name><location><city>New York</city><city>Boston</city><country>U.S.A.</country></location></thing> 393 394Here's a mini-tutorial on how to use this function, in which you'll also see how to set attributes. 395 396The simplest invocations are these: 397 398 simple_to_xml({target => undef}) 399 # <target/> 400 401 simple_to_xml({target => 123}) 402 # <target>123</target> 403 404Every set of sibling elements (such as the document itself, which is a single top-level element, or a pack of 4055 elements all children to the same parent element) is represented in the $simple_array_ref parameter as 406key-value pairs inside either a hashref or an arrayref (you can choose which). 407 408Keys represent tags+attributes of the sibling elements, whereas values represent the contents of those elements. 409 410Eg: 411 412 [ 413 first => 'John', 414 last => 'Doe,' 415 ] 416 417...and... 418 419 { 420 first => 'John', 421 last => 'Doe', 422 } 423 424both translate to: 425 426 <first>John</first><last>Doe</last> 427 428A value can either be undef (to denote an empty element), or a string (to denote a string), or another 429hashref/arrayref to denote a set of children elements, like this: 430 431 { 432 person => { 433 name => { 434 first => 'John', 435 last => 'Doe' 436 } 437 } 438 } 439 440...becomes: 441 442 <person> 443 <name> 444 <first>John</first> 445 <last>Doe</last> 446 </name> 447 </person> 448 449 450The only difference between using an arrayref or using a hashref, is that arrayrefs preserve the 451order of the elements, and allow repetition of identical tags. So a person with many addresses, should choose to 452represent its list of addresses under an arrayref, like this: 453 454 { 455 person => [ 456 name => { 457 first => 'John', 458 last => 'Doe', 459 }, 460 address => { 461 country => 'Malta', 462 }, 463 address => { 464 country => 'Indonesia', 465 }, 466 address => { 467 country => 'China', 468 } 469 ] 470 } 471 472...which becomes: 473 474 <person> 475 <name> 476 <last>Doe</last> 477 <first>John</first> 478 </name> 479 <address> 480 <country>Malta</country> 481 </address> 482 <address> 483 <country>Indonesia</country> 484 </address> 485 <address> 486 <country>China</country> 487 </address> 488 </person> 489 490Finally, to set attributes to your elements (eg id="12") you need to replace the key with either 491a string containing attributes as well (eg: C<'address id="12"'>), or replace it with a reference, as the many 492items in the examples below: 493 494 {thing => [ 495 'item id="1"' => 'chair', 496 [item => {id => 2}] => 'table', 497 [item => [id => 3]] => 'door', 498 [item => id => 4] => 'sofa', 499 {item => {id => 5}} => 'bed', 500 {item => [id => 6]} => 'shirt', 501 [item => {id => 7, other => 8}, [more => 9, also => 10, but_not => undef]] => 'towel' 502 ]} 503 504...which becomes: 505 506 <thing> 507 <item id="1">chair</item> 508 <item id="2">table</item> 509 <item id="3">door</item> 510 <item id="4">sofa</item> 511 <item id="5">bed</item> 512 <item id="6">shirt</item> 513 <item id="7" other="8" more="9" also="10">towel</item> 514 </thing> 515 516As you see, attributes may be represented in a great variety of ways, so you don't need to remember 517the "correct" one. 518 519Of course if the "simple structure" is a hashref, the key cannot be a reference (because hash keys are always 520strings), so if you want attributes on your elements, you either need the enclosing structure to be an 521arrayref as in the example above, to allow keys to be refs which contain the attributes, or you need to 522represent the key (=tag+attrs) as a string, like this (also in the previous example): C<'item id="1"'> 523 524This concludes the mini-tutorial of the simple_to_xml function. 525 526All the strings in C<$simple_array_ref> need to contain characters, rather than bytes/octets. The C<bytes> 527optional flag only affects the produced XML string. 528 529Optional flags: C<complete>, C<tidy>, C<indentstring>, C<save>, C<xslt>, C<bytes> 530 531=cut 532 533sub simple_to_xml { 534 my $arref = shift; 535 my $flags = shift || {}; 536 537 my $xml = ''; 538 my ($key, $value, @residue) = (ref $arref eq 'HASH') ? %$arref : @$arref; 539 $key = _key_to_string($key); 540 ! @residue or croak "Error: the provided simple ref contains more than 1 top element"; 541 my ($el_name) = $key =~ /^(\S+)/; 542 defined $el_name or croak encode_utf8 "Error: Strange key: $key"; 543 544 if (! ref $value) { 545 if ($key eq '!as_is') { 546 check_xml $value or croak "invalid xml: $value"; 547 $xml .= $value; 548 } elsif (defined $value and length $value) { 549 $xml .= "<$key>"._encode($value)."</$el_name>"; 550 } else { 551 $xml .= "<$key/>"; 552 } 553 } else { 554 $xml .= "<$key>@{[ _arrayref_to_xml($value, $flags) ]}</$el_name>"; 555 } 556 if ($flags->{tidy}) { 557 $xml = tidy_xml($xml, { 558 exists $flags->{indentstring} ? (indentstring => $flags->{indentstring}) : () 559 }); 560 } 561 my $decl = ''; 562 $decl .= qq'<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>\n' if $flags->{complete}; 563 $decl .= qq'<?xml-stylesheet type="text/xsl" href="$flags->{xslt}"?>\n' if $flags->{xslt}; 564 $xml = $decl . $xml; 565 566 if (defined $flags->{save}) { 567 open my $fh, '>', $flags->{save} or croak "Error: Couldn't open file '$flags->{save}' for writing: $!"; 568 binmode $fh, ':encoding(UTF-8)'; 569 print $fh $xml; 570 close $fh; 571 } 572 573 $xml = encode_utf8($xml) if $flags->{bytes}; 574 return $xml; 575} 576 577sub _flatten { 578 my ($thing) = @_; 579 580 if (!ref $thing) { return $thing; } 581 elsif (ref $thing eq 'HASH') { return map _flatten($_), %$thing; } 582 elsif (ref $thing eq 'ARRAY') { return map _flatten($_), @$thing; } 583 else { croak 'Error: reference of invalid type in simple_to_xml: '.(ref $thing); } 584} 585 586sub _key_to_string { 587 my ($key) = @_; 588 589 if (! ref $key) { 590 return $key; 591 } else { 592 my ($tag, %attrs) = _flatten($key); 593 return $tag . join('', map " $_=\""._encode($attrs{$_}).'"', grep {defined $attrs{$_}} keys %attrs); 594 } 595} 596 597sub _arrayref_to_xml { 598 my $arref = shift; 599 my $flags = shift || {}; 600 601 my $xml = ''; 602 603 if (ref $arref eq 'HASH') { return _hashref_to_xml($arref, $flags); } 604 605 foreach (my $i = 0; $i <= $#$arref; ) { 606 my $key = $arref->[$i++]; 607 $key = _key_to_string($key); 608 my ($el_name) = $key =~ /^(\S+)/; 609 defined $el_name or croak encode_utf8 "Error: Strange key: $key"; 610 my $value = $arref->[$i++]; 611 612 if ($key eq '!as_is') { 613 check_xml $value or croak "invalid xml: $value"; 614 $xml .= $value; 615 } elsif (! ref $value) { 616 if (defined $value and length $value) { 617 $xml .= "<$key>@{[ _encode($value) ]}</$el_name>"; 618 } else { 619 $xml .= "<$key/>"; 620 } 621 } else { 622 $xml .= "<$key>@{[ _arrayref_to_xml($value, $flags) ]}</$el_name>"; 623 } 624 } 625 return $xml; 626} 627 628sub _hashref_to_xml { 629 my $hashref = shift; 630 my $flags = shift || {}; 631 632 my $xml = ''; 633 634 while (my ($key, $value) = each %$hashref) { 635 my ($el_name) = $key =~ /^(\S+)/; 636 defined $el_name or croak encode_utf8 "Error: Strange key: $key"; 637 638 if ($key eq '!as_is') { 639 check_xml $value or croak "invalid xml: $value"; 640 $xml .= $value; 641 } elsif (! ref $value) { 642 if (defined $value and length $value) { 643 $xml .= "<$key>@{[ _encode($value) ]}</$el_name>"; 644 } else { 645 $xml .= "<$key/>"; 646 } 647 } else { 648 $xml .= "<$key>@{[ _arrayref_to_xml($value, $flags) ]}</$el_name>"; 649 } 650 } 651 return $xml; 652} 653 654 655=head2 xml_to_simple($raw_xml) 656 657Produces a very simple hash object from the raw XML string provided. An example hash object created thusly is this: 658S<C<< { thing => { name => 'John', location => { city => 'New York', country => 'U.S.A.' } } } >>> 659 660B<WARNING:> This function only works on very simple XML strings, i.e. children of an element may not consist of both 661text and elements (child elements will be discarded in that case). Also attributes in tags are ignored. 662 663Since the object created is a hashref (unless used with the C<arrayref> optional flag), duplicate keys will be 664discarded. 665 666All strings contained in the output simple structure will always contain characters rather than octets/bytes, 667regardless of the C<bytes> optional flag. 668 669Optional flags: C<internal>, C<strip>, C<file>, C<strip_ns>, C<arrayref>, C<bytes> 670 671=cut 672 673sub xml_to_simple { 674 my $xml = shift; 675 my $flags = shift || {}; 676 677 my $object = xml_to_object($xml, $flags); 678 679 $object = $object->simplify($flags) if defined $object; 680 681 return $object; 682} 683 684sub _objectarray_to_simple { 685 my $object = shift; 686 my $flags = shift || {}; 687 688 defined $object or return undef; 689 690 return $flags->{arrayref} 691 ? _objectarray_to_simple_arrayref($object, $flags) 692 : _objectarray_to_simple_hashref($object, $flags); 693} 694 695sub _objectarray_to_simple_hashref { 696 my $object = shift; 697 my $flags = shift || {}; 698 699 defined $object or return undef; 700 701 my $hashref = {}; 702 703 foreach my $stuff (@$object) { 704 if (defined(my $key = $stuff->{el_name})) { 705 $key = strip_ns $key if $flags->{strip_ns}; 706 $hashref->{ $key } = _objectarray_to_simple($stuff->{content}, $flags); 707 } 708 elsif (defined(my $value = $stuff->{text})) { 709 $value = trim $value if $flags->{strip}; 710 return $value if $value =~ /\S/; 711 } 712 } 713 714 return %$hashref ? $hashref : undef; 715} 716 717sub _objectarray_to_simple_arrayref { 718 my $object = shift; 719 my $flags = shift || {}; 720 721 defined $object or return undef; 722 723 my $arrayref = []; 724 725 foreach my $stuff (@$object) { 726 if (defined(my $key = $stuff->{el_name})) { 727 $key = strip_ns $key if $flags->{strip_ns}; 728 push @$arrayref, ( $key, _objectarray_to_simple($stuff->{content}, $flags) ); 729 } elsif (defined(my $value = $stuff->{text})) { 730 $value = trim $value if $flags->{strip}; 731 return $value if $value =~ /\S/; 732 } 733 } 734 735 return @$arrayref ? $arrayref : undef; 736} 737 738 739=head2 check_xml($raw_xml) 740 741Returns true if the $raw_xml string is valid XML (valid enough to be used by this module), and false otherwise. 742 743Optional flags: C<file>, C<bytes> 744 745=cut 746 747sub check_xml { 748 my $xml = shift; 749 my $flags = shift || {}; 750 751 my $ok = eval { xml_to_object($xml, $flags); 1 }; 752 return !!$ok; 753} 754 755 7561; # End of XML::MyXML 757 758__END__ 759 760 761=head1 OBJECT METHODS 762 763=head2 $obj->path("subtag1/subsubtag2[attr1=val1][attr2]/.../subsubsubtagX") 764 765Returns the element specified by the path as an XML::MyXML::Object object. When there are more than one tags 766with the specified name in the last step of the path, it will return all of them as an array. In scalar 767context will only return the first one. Simple CSS3-style attribute selectors are allowed in the path next 768to the tagnames, for example: C<< p[class=big] >> will only return C<< <p> >> elements that contain an 769attribute called "class" with a value of "big". p[class] on the other hand will return p elements having a 770"class" attribute, but that attribute can have any value. It's possible to surround attribute values with 771quotes, like so: C<< input[name="foo[]"] >> 772 773An example... To print the last names of all the students from the following XML, do: 774 775 my $xml = <<'EOB'; 776 <people> 777 <student> 778 <name> 779 <first>Alex</first> 780 <last>Karelas</last> 781 </name> 782 </student> 783 <student> 784 <name> 785 <first>John</first> 786 <last>Doe</last> 787 </name> 788 </student> 789 <teacher> 790 <name> 791 <first>Mary</first> 792 <last>Poppins</last> 793 </name> 794 </teacher> 795 <teacher> 796 <name> 797 <first>Peter</first> 798 <last>Gabriel</last> 799 </name> 800 </teacher> 801 </people> 802 EOB 803 804 my $obj = xml_to_object($xml); 805 my @students = $obj->path('student'); 806 foreach my $student (@students) { 807 print $student->path('name/last')->value, "\n"; 808 } 809 810...or like this... 811 812 my @last = $obj->path('student/name/last'); 813 foreach my $last (@last) { 814 print $last->value, "\n"; 815 } 816 817If you wish to describe the root element in the path as well, prepend it in the path with a slash like so: 818 819 if( $student->path('/student/name/last')->value eq $student->path('name/last')->value ) { 820 print "The two are identical", "\n"; 821 } 822 823B<Since XML::MyXML version 1.08, the path method supports namespaces.> 824 825You can replace the namespace prefix of an attribute or an element name in the path string with the 826namespace name inside curly brackets, and place the curly-bracketed expression after the local part. 827 828B<< I<Example #1:> >> Suppose the XML you want to go through is: 829 830 <stream:stream xmlns:stream="http://foo/bar"> 831 <a>b</a> 832 </stream:stream> 833 834Then this will return the string C<"b">: 835 836 $obj->path('/stream{http://foo/bar}/a')->value; 837 838B<< I<Example #2:> >> Suppose the XML you want to go through is: 839 840 <stream xmlns="http://foo/bar"> 841 <a>b</a> 842 </stream> 843 844Then both of these expressions will return C<"b">: 845 846 $obj->path('/stream/a{http://foo/bar}')->value; 847 $obj->path('/stream{http://foo/bar}/a{http://foo/bar}')->value; 848 849B<Since XML::MyXML version 1.08, quotes in attribute match strings have no special meaning.> 850 851If you want to use the "]" character in attribute values, you need to escape it with a 852backslash character. As you need if you want to use the "}" character in a namespace value 853in the path string. 854 855B<< I<Example #1:> >> Suppose the XML you want to go through is: 856 857 <stream xmlns:o="http://foo}bar"> 858 <a o:name="c]d">b</a> 859 </stream> 860 861Then this expression will return C<"b">: 862 863 $obj->path('/stream/a[name{http://foo\}bar}=c\]d]')->value; 864 865B<< I<Example #2:> >> You can match attribute values containing quote characters with just C<"> in the path string. 866 867If the XML is: 868 869 <stream id=""1"">a</stream> 870 871...then this will return the string C<"a">: 872 873 $obj->path('/stream[id="1"]')->value; 874 875Optional flags: none 876 877=head2 $obj->text([set_value]), also known as $obj->value([set_value]) 878 879If provided a set_value, will delete all contents of $obj and will place C<set_value> as its text contents. 880Otherwise will return the text contents of this object, and of its descendants, in a single string. 881 882Optional flags: C<strip> 883 884=head2 $obj->inner_xml([xml_string]) 885 886Gets or sets the inner XML of the $obj node, depending on whether C<xml_string> is provided. 887 888Optional flags: C<bytes> 889 890=head2 $obj->attr('attrname' [, 'attrvalue']) 891 892Gets/Sets the value of the 'attrname' attribute of the top element. Returns undef if attribute does not exist. 893If called without the 'attrname' parameter, returns a hash with all attribute => value pairs. If setting with 894an attrvalue of C<undef>, then removes that attribute entirely. 895 896Input parameters and output are all in character strings, rather than octets/bytes. 897 898Optional flags: none 899 900=head2 $obj->tag 901 902Returns the tag of the $obj element. E.g. if $obj represents an <rss:item> element, C<< $obj->tag >> will 903return the string 'rss:item'. Returns undef if $obj doesn't represent a tag. 904 905Optional flags: C<strip_ns> 906 907=head2 $obj->name 908 909Same as C<< $obj->tag >> (alias). 910 911=head2 $obj->parent 912 913Returns the XML::MyXML::Object element that is the parent of $obj in the document. Returns undef if $obj 914doesn't have a parent. 915 916Optional flags: none 917 918=head2 $obj->simplify 919 920Returns a very simple hashref, like the one returned with C<&XML::MyXML::xml_to_simple>. Same restrictions 921and warnings apply. 922 923Optional flags: C<internal>, C<strip>, C<strip_ns>, C<arrayref> 924 925=head2 $obj->to_xml 926 927Returns the XML string of the object, just like calling C<object_to_xml( $obj )> 928 929Optional flags: C<complete>, C<tidy>, C<indentstring>, C<save>, C<bytes> 930 931=head2 $obj->to_tidy_xml 932 933Returns the XML string of the object in tidy form, just like calling C<tidy_xml( object_to_xml( $obj ) )> 934 935Optional flags: C<complete>, C<indentstring>, C<save>, C<bytes> 936 937=head1 BUGS 938 939If you have a Github account, report your issues at 940L<https://github.com/akarelas/xml-myxml/issues>. 941I will be notified, and then you'll automatically be notified of progress on 942your bug as I make changes. 943 944You can get notified of new versions of this module for free, by email or RSS, 945at L<https://www.perlmodules.net/viewfeed/distro/XML-MyXML> 946 947=head1 LICENSE 948 949Copyright (C) Alexander Karelas. 950 951This library is free software; you can redistribute it and/or modify 952it under the same terms as Perl itself. 953 954=head1 AUTHOR 955 956Alexander Karelas E<lt>karjala@cpan.orgE<gt> 957