1# XML::Parser 2# 3# Copyright (c) 1998-2000 Larry Wall and Clark Cooper 4# All rights reserved. 5# 6# This program is free software; you can redistribute it and/or 7# modify it under the same terms as Perl itself. 8 9package XML::Parser; 10 11use strict; 12 13use vars qw($VERSION $LWP_load_failed); 14 15use Carp; 16 17BEGIN { 18 require XML::Parser::Expat; 19 $VERSION = '2.44'; 20 die "Parser.pm and Expat.pm versions don't match" 21 unless $VERSION eq $XML::Parser::Expat::VERSION; 22} 23 24$LWP_load_failed = 0; 25 26sub new { 27 my ($class, %args) = @_; 28 my $style = $args{Style}; 29 30 my $nonexopt = $args{Non_Expat_Options} ||= {}; 31 32 $nonexopt->{Style} = 1; 33 $nonexopt->{Non_Expat_Options} = 1; 34 $nonexopt->{Handlers} = 1; 35 $nonexopt->{_HNDL_TYPES} = 1; 36 $nonexopt->{NoLWP} = 1; 37 38 $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters}; 39 $args{_HNDL_TYPES}->{Init} = 1; 40 $args{_HNDL_TYPES}->{Final} = 1; 41 42 $args{Handlers} ||= {}; 43 my $handlers = $args{Handlers}; 44 45 if (defined($style)) { 46 my $stylepkg = $style; 47 48 if ($stylepkg !~ /::/) { 49 $stylepkg = "\u$style"; 50 51 eval { 52 my $fullpkg = 'XML::Parser::Style::' . $stylepkg; 53 my $stylefile = $fullpkg; 54 $stylefile =~ s/::/\//g; 55 require "$stylefile.pm"; 56 $stylepkg = $fullpkg; 57 }; 58 if ($@) { 59 # fallback to old behaviour 60 $stylepkg = 'XML::Parser::' . $stylepkg; 61 } 62 } 63 64 my $htype; 65 foreach $htype (keys %{$args{_HNDL_TYPES}}) { 66 # Handlers explicitly given override 67 # handlers from the Style package 68 unless (defined($handlers->{$htype})) { 69 70 # A handler in the style package must either have 71 # exactly the right case as the type name or a 72 # completely lower case version of it. 73 74 my $hname = "${stylepkg}::$htype"; 75 if (defined(&$hname)) { 76 $handlers->{$htype} = \&$hname; 77 next; 78 } 79 80 $hname = "${stylepkg}::\L$htype"; 81 if (defined(&$hname)) { 82 $handlers->{$htype} = \&$hname; 83 next; 84 } 85 } 86 } 87 } 88 89 unless (defined($handlers->{ExternEnt}) 90 or defined ($handlers->{ExternEntFin})) { 91 92 if ($args{NoLWP} or $LWP_load_failed) { 93 $handlers->{ExternEnt} = \&file_ext_ent_handler; 94 $handlers->{ExternEntFin} = \&file_ext_ent_cleanup; 95 } 96 else { 97 # The following just bootstraps the real LWP external entity 98 # handler 99 100 $handlers->{ExternEnt} = \&initial_ext_ent_handler; 101 102 # No cleanup function available until LWPExternEnt.pl loaded 103 } 104 } 105 106 $args{Pkg} ||= caller; 107 bless \%args, $class; 108} # End of new 109 110sub setHandlers { 111 my ($self, @handler_pairs) = @_; 112 113 croak("Uneven number of arguments to setHandlers method") 114 if (int(@handler_pairs) & 1); 115 116 my @ret; 117 while (@handler_pairs) { 118 my $type = shift @handler_pairs; 119 my $handler = shift @handler_pairs; 120 unless (defined($self->{_HNDL_TYPES}->{$type})) { 121 my @types = sort keys %{$self->{_HNDL_TYPES}}; 122 123 croak("Unknown Parser handler type: $type\n Valid types: @types"); 124 } 125 push(@ret, $type, $self->{Handlers}->{$type}); 126 $self->{Handlers}->{$type} = $handler; 127 } 128 129 return @ret; 130} 131 132sub parse_start { 133 my $self = shift; 134 my @expat_options = (); 135 136 my ($key, $val); 137 while (($key, $val) = each %{$self}) { 138 push (@expat_options, $key, $val) 139 unless exists $self->{Non_Expat_Options}->{$key}; 140 } 141 142 my %handlers = %{$self->{Handlers}}; 143 my $init = delete $handlers{Init}; 144 my $final = delete $handlers{Final}; 145 146 my $expatnb = XML::Parser::ExpatNB->new(@expat_options, @_); 147 $expatnb->setHandlers(%handlers); 148 149 &$init($expatnb) 150 if defined($init); 151 152 $expatnb->{_State_} = 1; 153 154 $expatnb->{FinalHandler} = $final 155 if defined($final); 156 157 return $expatnb; 158} 159 160sub parse { 161 my $self = shift; 162 my $arg = shift; 163 my @expat_options = (); 164 my ($key, $val); 165 while (($key, $val) = each %{$self}) { 166 push(@expat_options, $key, $val) 167 unless exists $self->{Non_Expat_Options}->{$key}; 168 } 169 170 my $expat = XML::Parser::Expat->new(@expat_options, @_); 171 my %handlers = %{$self->{Handlers}}; 172 my $init = delete $handlers{Init}; 173 my $final = delete $handlers{Final}; 174 175 $expat->setHandlers(%handlers); 176 177 if ($self->{Base}) { 178 $expat->base($self->{Base}); 179 } 180 181 &$init($expat) 182 if defined($init); 183 184 my @result = (); 185 my $result; 186 eval { 187 $result = $expat->parse($arg); 188 }; 189 my $err = $@; 190 if ($err) { 191 $expat->release; 192 die $err; 193 } 194 195 if ($result and defined($final)) { 196 if (wantarray) { 197 @result = &$final($expat); 198 } 199 else { 200 $result = &$final($expat); 201 } 202 } 203 204 $expat->release; 205 206 return unless defined wantarray; 207 return wantarray ? @result : $result; 208} 209 210sub parsestring { 211 my $self = shift; 212 $self->parse(@_); 213} 214 215sub parsefile { 216 my $self = shift; 217 my $file = shift; 218 local(*FILE); 219 open(FILE, $file) or croak "Couldn't open $file:\n$!"; 220 binmode(FILE); 221 my @ret; 222 my $ret; 223 224 $self->{Base} = $file; 225 226 if (wantarray) { 227 eval { 228 @ret = $self->parse(*FILE, @_); 229 }; 230 } 231 else { 232 eval { 233 $ret = $self->parse(*FILE, @_); 234 }; 235 } 236 my $err = $@; 237 close(FILE); 238 die $err if $err; 239 240 return unless defined wantarray; 241 return wantarray ? @ret : $ret; 242} 243 244sub initial_ext_ent_handler { 245 # This just bootstraps in the real lwp_ext_ent_handler which 246 # also loads the URI and LWP modules. 247 248 unless ($LWP_load_failed) { 249 local($^W) = 0; 250 251 my $stat = 252 eval { 253 require('XML/Parser/LWPExternEnt.pl'); 254 }; 255 256 if ($stat) { 257 $_[0]->setHandlers(ExternEnt => \&lwp_ext_ent_handler, 258 ExternEntFin => \&lwp_ext_ent_cleanup); 259 260 goto &lwp_ext_ent_handler; 261 } 262 263 # Failed to load lwp handler, act as if NoLWP 264 265 $LWP_load_failed = 1; 266 267 my $cmsg = "Couldn't load LWP based external entity handler\n"; 268 $cmsg .= "Switching to file-based external entity handler\n"; 269 $cmsg .= " (To avoid this message, use NoLWP option to XML::Parser)\n"; 270 warn($cmsg); 271 } 272 273 $_[0]->setHandlers(ExternEnt => \&file_ext_ent_handler, 274 ExternEntFin => \&file_ext_ent_cleanup); 275 goto &file_ext_ent_handler; 276 277} 278 279sub file_ext_ent_handler { 280 my ($xp, $base, $path) = @_; 281 282 # Prepend base only for relative paths 283 284 if (defined($base) 285 and not ($path =~ m!^(?:[\\/]|\w+:)!)) 286 { 287 my $newpath = $base; 288 $newpath =~ s![^\\/:]*$!$path!; 289 $path = $newpath; 290 } 291 292 if ($path =~ /^\s*[|>+]/ 293 or $path =~ /\|\s*$/) { 294 $xp->{ErrorMessage} 295 .= "System ID ($path) contains Perl IO control characters"; 296 return undef; 297 } 298 299 require IO::File; 300 my $fh = IO::File->new($path); 301 unless (defined $fh) { 302 $xp->{ErrorMessage} 303 .= "Failed to open $path:\n$!"; 304 return undef; 305 } 306 307 $xp->{_BaseStack} ||= []; 308 $xp->{_FhStack} ||= []; 309 310 push(@{$xp->{_BaseStack}}, $base); 311 push(@{$xp->{_FhStack}}, $fh); 312 313 $xp->base($path); 314 315 return $fh; 316} 317 318sub file_ext_ent_cleanup { 319 my ($xp) = @_; 320 321 my $fh = pop(@{$xp->{_FhStack}}); 322 $fh->close; 323 324 my $base = pop(@{$xp->{_BaseStack}}); 325 $xp->base($base); 326} 327 3281; 329 330__END__ 331 332=head1 NAME 333 334XML::Parser - A perl module for parsing XML documents 335 336=head1 SYNOPSIS 337 338 use XML::Parser; 339 340 $p1 = XML::Parser->new(Style => 'Debug'); 341 $p1->parsefile('REC-xml-19980210.xml'); 342 $p1->parse('<foo id="me">Hello World</foo>'); 343 344 # Alternative 345 $p2 = XML::Parser->new(Handlers => {Start => \&handle_start, 346 End => \&handle_end, 347 Char => \&handle_char}); 348 $p2->parse($socket); 349 350 # Another alternative 351 $p3 = XML::Parser->new(ErrorContext => 2); 352 353 $p3->setHandlers(Char => \&text, 354 Default => \&other); 355 356 open(FOO, 'xmlgenerator |'); 357 $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1'); 358 close(FOO); 359 360 $p3->parsefile('junk.xml', ErrorContext => 3); 361 362=begin man 363.ds PI 364 365=end man 366 367=head1 DESCRIPTION 368 369This module provides ways to parse XML documents. It is built on top of 370L<XML::Parser::Expat>, which is a lower level interface to James Clark's 371expat library. Each call to one of the parsing methods creates a new 372instance of XML::Parser::Expat which is then used to parse the document. 373Expat options may be provided when the XML::Parser object is created. 374These options are then passed on to the Expat object on each parse call. 375They can also be given as extra arguments to the parse methods, in which 376case they override options given at XML::Parser creation time. 377 378The behavior of the parser is controlled either by C<L</STYLES>> and/or 379C<L</HANDLERS>> options, or by L</setHandlers> method. These all provide 380mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. 381If neither C<Style> nor C<Handlers> are specified, then parsing just 382checks the document for being well-formed. 383 384When underlying handlers get called, they receive as their first parameter 385the I<Expat> object, not the Parser object. 386 387=head1 METHODS 388 389=over 4 390 391=item new 392 393This is a class method, the constructor for XML::Parser. Options are passed 394as keyword value pairs. Recognized options are: 395 396=over 4 397 398=item * Style 399 400This option provides an easy way to create a given style of parser. The 401built in styles are: L<"Debug">, L<"Subs">, L<"Tree">, L<"Objects">, 402and L<"Stream">. These are all defined in separate packages under 403C<XML::Parser::Style::*>, and you can find further documentation for 404each style both below, and in those packages. 405 406Custom styles can be provided by giving a full package name containing 407at least one '::'. This package should then have subs defined for each 408handler it wishes to have installed. See L<"STYLES"> below 409for a discussion of each built in style. 410 411=item * Handlers 412 413When provided, this option should be an anonymous hash containing as 414keys the type of handler and as values a sub reference to handle that 415type of event. All the handlers get passed as their 1st parameter the 416instance of expat that is parsing the document. Further details on 417handlers can be found in L<"HANDLERS">. Any handler set here 418overrides the corresponding handler set with the Style option. 419 420=item * Pkg 421 422Some styles will refer to subs defined in this package. If not provided, 423it defaults to the package which called the constructor. 424 425=item * ErrorContext 426 427This is an Expat option. When this option is defined, errors are reported 428in context. The value should be the number of lines to show on either side 429of the line in which the error occurred. 430 431=item * ProtocolEncoding 432 433This is an Expat option. This sets the protocol encoding name. It defaults 434to none. The built-in encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and 435C<US-ASCII>. Other encodings may be used if they have encoding maps in one 436of the directories in the @Encoding_Path list. Check L<"ENCODINGS"> for 437more information on encoding maps. Setting the protocol encoding overrides 438any encoding in the XML declaration. 439 440=item * Namespaces 441 442This is an Expat option. If this is set to a true value, then namespace 443processing is done during the parse. See L<XML::Parser::Expat/"Namespaces"> 444for further discussion of namespace processing. 445 446=item * NoExpand 447 448This is an Expat option. Normally, the parser will try to expand references 449to entities defined in the internal subset. If this option is set to a true 450value, and a default handler is also set, then the default handler will be 451called when an entity reference is seen in text. This has no effect if a 452default handler has not been registered, and it has no effect on the expansion 453of entity references inside attribute values. 454 455=item * Stream_Delimiter 456 457This is an Expat option. It takes a string value. When this string is found 458alone on a line while parsing from a stream, then the parse is ended as if it 459saw an end of file. The intended use is with a stream of xml documents in a 460MIME multipart format. The string should not contain a trailing newline. 461 462=item * ParseParamEnt 463 464This is an Expat option. Unless standalone is set to "yes" in the XML 465declaration, setting this to a true value allows the external DTD to be read, 466and parameter entities to be parsed and expanded. 467 468=item * NoLWP 469 470This option has no effect if the ExternEnt or ExternEntFin handlers are 471directly set. Otherwise, if true, it forces the use of a file based external 472entity handler. 473 474=item * Non-Expat-Options 475 476If provided, this should be an anonymous hash whose keys are options that 477shouldn't be passed to Expat. This should only be of concern to those 478subclassing XML::Parser. 479 480=back 481 482=item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) 483 484This method registers handlers for various parser events. It overrides any 485previous handlers registered through the Style or Handler options or through 486earlier calls to setHandlers. By providing a false or undefined value as 487the handler, the existing handler can be unset. 488 489This method returns a list of type, handler pairs corresponding to the 490input. The handlers returned are the ones that were in effect prior to 491the call. 492 493See a description of the handler types in L<"HANDLERS">. 494 495=item parse(SOURCE [, OPT => OPT_VALUE [...]]) 496 497The SOURCE parameter should either be a string containing the whole XML 498document, or it should be an open IO::Handle. Constructor options to 499XML::Parser::Expat given as keyword-value pairs may follow the SOURCE 500parameter. These override, for this call, any options or attributes passed 501through from the XML::Parser instance. 502 503A die call is thrown if a parse error occurs. Otherwise it will return 1 504or whatever is returned from the B<Final> handler, if one is installed. 505In other words, what parse may return depends on the style. 506 507=item parsestring 508 509This is just an alias for parse for backwards compatibility. 510 511=item parsefile(FILE [, OPT => OPT_VALUE [...]]) 512 513Open FILE for reading, then call parse with the open handle. The file 514is closed no matter how parse returns. Returns what parse returns. 515 516=item parse_start([ OPT => OPT_VALUE [...]]) 517 518Create and return a new instance of XML::Parser::ExpatNB. Constructor 519options may be provided. If an init handler has been provided, it is 520called before returning the ExpatNB object. Documents are parsed by 521making incremental calls to the parse_more method of this object, which 522takes a string. A single call to the parse_done method of this object, 523which takes no arguments, indicates that the document is finished. 524 525If there is a final handler installed, it is executed by the parse_done 526method before returning and the parse_done method returns whatever is 527returned by the final handler. 528 529=back 530 531=head1 HANDLERS 532 533Expat is an event based parser. As the parser recognizes parts of the 534document (say the start or end tag for an XML element), then any handlers 535registered for that type of an event are called with suitable parameters. 536All handlers receive an instance of XML::Parser::Expat as their first 537argument. See L<XML::Parser::Expat/"METHODS"> for a discussion of the 538methods that can be called on this object. 539 540=head2 Init (Expat) 541 542This is called just before the parsing of the document starts. 543 544=head2 Final (Expat) 545 546This is called just after parsing has finished, but only if no errors 547occurred during the parse. Parse returns what this returns. 548 549=head2 Start (Expat, Element [, Attr, Val [,...]]) 550 551This event is generated when an XML start tag is recognized. Element is the 552name of the XML element type that is opened with the start tag. The Attr & 553Val pairs are generated for each attribute in the start tag. 554 555=head2 End (Expat, Element) 556 557This event is generated when an XML end tag is recognized. Note that 558an XML empty tag (<foo/>) generates both a start and an end event. 559 560=head2 Char (Expat, String) 561 562This event is generated when non-markup is recognized. The non-markup 563sequence of characters is in String. A single non-markup sequence of 564characters may generate multiple calls to this handler. Whatever the 565encoding of the string in the original document, this is given to the 566handler in UTF-8. 567 568=head2 Proc (Expat, Target, Data) 569 570This event is generated when a processing instruction is recognized. 571 572=head2 Comment (Expat, Data) 573 574This event is generated when a comment is recognized. 575 576=head2 CdataStart (Expat) 577 578This is called at the start of a CDATA section. 579 580=head2 CdataEnd (Expat) 581 582This is called at the end of a CDATA section. 583 584=head2 Default (Expat, String) 585 586This is called for any characters that don't have a registered handler. 587This includes both characters that are part of markup for which no 588events are generated (markup declarations) and characters that 589could generate events, but for which no handler has been registered. 590 591Whatever the encoding in the original document, the string is returned to 592the handler in UTF-8. 593 594=head2 Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation) 595 596This is called for a declaration of an unparsed entity. Entity is the name 597of the entity. Base is the base to be used for resolving a relative URI. 598Sysid is the system id. Pubid is the public id. Notation is the notation 599name. Base and Pubid may be undefined. 600 601=head2 Notation (Expat, Notation, Base, Sysid, Pubid) 602 603This is called for a declaration of notation. Notation is the notation name. 604Base is the base to be used for resolving a relative URI. Sysid is the system 605id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. 606 607=head2 ExternEnt (Expat, Base, Sysid, Pubid) 608 609This is called when an external entity is referenced. Base is the base to be 610used for resolving a relative URI. Sysid is the system id. Pubid is the public 611id. Base, and Pubid may be undefined. 612 613This handler should either return a string, which represents the contents of 614the external entity, or return an open filehandle that can be read to obtain 615the contents of the external entity, or return undef, which indicates the 616external entity couldn't be found and will generate a parse error. 617 618If an open filehandle is returned, it must be returned as either a glob 619(*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). 620 621A default handler is installed for this event. The default handler is 622XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with 623a true value, otherwise XML::Parser::file_ext_ent_handler is the default 624handler for external entities. Even without the NoLWP option, if the 625URI or LWP modules are missing, the file based handler ends up being used 626after giving a warning on the first external entity reference. 627 628The LWP external entity handler will use proxies defined in the environment 629(http_proxy, ftp_proxy, etc.). 630 631Please note that the LWP external entity handler reads the entire 632entity into a string and returns it, where as the file handler opens a 633filehandle. 634 635Also note that the file external entity handler will likely choke on 636absolute URIs or file names that don't fit the conventions of the local 637operating system. 638 639The expat base method can be used to set a basename for 640relative pathnames. If no basename is given, or if the basename is itself 641a relative name, then it is relative to the current working directory. 642 643=head2 ExternEntFin (Expat) 644 645This is called after parsing an external entity. It's not called unless 646an ExternEnt handler is also set. There is a default handler installed 647that pairs with the default ExternEnt handler. 648 649If you're going to install your own ExternEnt handler, then you should 650set (or unset) this handler too. 651 652=head2 Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam) 653 654This is called when an entity is declared. For internal entities, the Val 655parameter will contain the value and the remaining three parameters will be 656undefined. For external entities, the Val parameter will be undefined, the 657Sysid parameter will have the system id, the Pubid parameter will have the 658public id if it was provided (it will be undefined otherwise), the Ndata 659parameter will contain the notation for unparsed entities. If this is a 660parameter entity declaration, then the IsParam parameter is true. 661 662Note that this handler and the Unparsed handler above overlap. If both are 663set, then this handler will not be called for unparsed entities. 664 665=head2 Element (Expat, Name, Model) 666 667The element handler is called when an element declaration is found. Name 668is the element name, and Model is the content model as an XML::Parser::Content 669object. See L<XML::Parser::Expat/"XML::Parser::ContentModel Methods"> 670for methods available for this class. 671 672=head2 Attlist (Expat, Elname, Attname, Type, Default, Fixed) 673 674This handler is called for each attribute in an ATTLIST declaration. 675So an ATTLIST declaration that has multiple attributes will generate multiple 676calls to this handler. The Elname parameter is the name of the element with 677which the attribute is being associated. The Attname parameter is the name 678of the attribute. Type is the attribute type, given as a string. Default is 679the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted 680string (i.e. the returned string will begin and end with a quote character). 681If Fixed is true, then this is a fixed attribute. 682 683=head2 Doctype (Expat, Name, Sysid, Pubid, Internal) 684 685This handler is called for DOCTYPE declarations. Name is the document type 686name. Sysid is the system id of the document type, if it was provided, 687otherwise it's undefined. Pubid is the public id of the document type, 688which will be undefined if no public id was given. Internal is the internal 689subset, given as a string. If there was no internal subset, it will be 690undefined. Internal will contain all whitespace, comments, processing 691instructions, and declarations seen in the internal subset. The declarations 692will be there whether or not they have been processed by another handler 693(except for unparsed entities processed by the Unparsed handler). However, 694comments and processing instructions will not appear if they've been processed 695by their respective handlers. 696 697=head2 * DoctypeFin (Parser) 698 699This handler is called after parsing of the DOCTYPE declaration has finished, 700including any internal or external DTD declarations. 701 702=head2 XMLDecl (Expat, Version, Encoding, Standalone) 703 704This handler is called for xml declarations. Version is a string containing 705the version. Encoding is either undefined or contains an encoding string. 706Standalone will be either true, false, or undefined if the standalone attribute 707is yes, no, or not made respectively. 708 709=head1 STYLES 710 711=head2 Debug 712 713This just prints out the document in outline form. Nothing special is 714returned by parse. 715 716=head2 Subs 717 718Each time an element starts, a sub by that name in the package specified 719by the Pkg option is called with the same parameters that the Start 720handler gets called with. 721 722Each time an element ends, a sub with that name appended with an underscore 723("_"), is called with the same parameters that the End handler gets called 724with. 725 726Nothing special is returned by parse. 727 728=head2 Tree 729 730Parse will return a parse tree for the document. Each node in the tree 731takes the form of a tag, content pair. Text nodes are represented with 732a pseudo-tag of "0" and the string that is their content. For elements, 733the content is an array reference. The first item in the array is a 734(possibly empty) hash reference containing attributes. The remainder of 735the array is a sequence of tag-content pairs representing the content 736of the element. 737 738So for example the result of parsing: 739 740 <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 741 742would be: 743 744 Tag Content 745 ================================================================== 746 [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], 747 bar, [ {}, 0, "Howdy", ref, [{}]], 748 0, "do" 749 ] 750 ] 751 752The root document "foo", has 3 children: a "head" element, a "bar" 753element and the text "do". After the empty attribute hash, these are 754represented in it's contents by 3 tag-content pairs. 755 756=head2 Objects 757 758This is similar to the Tree style, except that a hash object is created for 759each element. The corresponding object will be in the class whose name 760is created by appending "::" and the element name to the package set with 761the Pkg option. Non-markup text will be in the ::Characters class. The 762contents of the corresponding object will be in an anonymous array that 763is the value of the Kids property for that object. 764 765=head2 Stream 766 767This style also uses the Pkg package. If none of the subs that this 768style looks for is there, then the effect of parsing with this style is 769to print a canonical copy of the document without comments or declarations. 770All the subs receive as their 1st parameter the Expat instance for the 771document they're parsing. 772 773It looks for the following routines: 774 775=over 4 776 777=item * StartDocument 778 779Called at the start of the parse . 780 781=item * StartTag 782 783Called for every start tag with a second parameter of the element type. The $_ 784variable will contain a copy of the tag and the %_ variable will contain 785attribute values supplied for that element. 786 787=item * EndTag 788 789Called for every end tag with a second parameter of the element type. The $_ 790variable will contain a copy of the end tag. 791 792=item * Text 793 794Called just before start or end tags with accumulated non-markup text in 795the $_ variable. 796 797=item * PI 798 799Called for processing instructions. The $_ variable will contain a copy of 800the PI and the target and data are sent as 2nd and 3rd parameters 801respectively. 802 803=item * EndDocument 804 805Called at conclusion of the parse. 806 807=back 808 809=head1 ENCODINGS 810 811XML documents may be encoded in character sets other than Unicode as 812long as they may be mapped into the Unicode character set. Expat has 813further restrictions on encodings. Read the xmlparse.h header file in 814the expat distribution to see details on these restrictions. 815 816Expat has built-in encodings for: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and 817C<US-ASCII>. Encodings are set either through the XML declaration 818encoding attribute or through the ProtocolEncoding option to XML::Parser 819or XML::Parser::Expat. 820 821For encodings other than the built-ins, expat calls the function 822load_encoding in the Expat package with the encoding name. This function 823looks for a file in the path list @XML::Parser::Expat::Encoding_Path, that 824matches the lower-cased name with a '.enc' extension. The first one it 825finds, it loads. 826 827If you wish to build your own encoding maps, check out the XML::Encoding 828module from CPAN. 829 830=head1 AUTHORS 831 832Larry Wall <F<larry@wall.org>> wrote version 1.0. 833 834Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API 835for this version (2.x), provided documentation, 836and added some standard package features. 837 838Matt Sergeant <F<matt@sergeant.org>> is now maintaining XML::Parser 839 840=cut 841