1# $Id: DOMHandler.pm,v 1.1 2002/08/20 18:06:48 eray Exp eray $ 2 3package XML::DOMHandler; 4use strict; 5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); 6require Exporter; 7@ISA = qw( Exporter ); 8@EXPORT = qw( Version ); 9$VERSION = '1.0'; 10sub Version { $VERSION; } 11 12# 13# table of node types and internal handler methods 14# 15my %dispatch_table = ( 16 &XML_ELEMENT_NODE => '_handle_element', 17 &XML_ATTRIBUTE_NODE => '_handle_attribute', 18 &XML_TEXT_NODE => '_handle_text', 19 &XML_CDATA_SECTION_NODE => '_handle_cdata', 20 &XML_ENTITY_REF_NODE => '_handle_entity_ref', 21 &XML_ENTITY_NODE => '', 22 &XML_PI_NODE => '_handle_pi', 23 &XML_COMMENT_NODE => '_handle_comment', 24 &XML_DOCUMENT_NODE => '_handle_doc_node', 25 &XML_DOCUMENT_TYPE_NODE => '_handle_doctype', 26 &XML_DOCUMENT_FRAG_NODE => '', 27 &XML_NOTATION_NODE => '', 28 &XML_HTML_DOCUMENT_NODE => '', 29 &XML_DTD_NODE => '', 30 &XML_ELEMENT_DECL_NODE => '', 31 &XML_ATTRIBUTE_DECL_NODE => '', 32 &XML_ENTITY_DECL_NODE => '', 33 &XML_NAMESPACE_DECL_NODE => '_handle_ns_decl', 34 &XML_XINCLUDE_START => '', 35 &XML_XINCLUDE_END => '', 36 ); 37 38my $level; # depth in the tree 39my $position; # position in parent's content list 40my @pstack; # position stack 41my $root; 42my $rootset = 0; 43 44 45sub new { 46# 47# initialize object with options 48# 49 my $class = shift; 50 my $self = {@_}; 51 reset(); 52 return bless( $self, $class ); 53} 54 55 56sub reset { 57# 58# set globals back to zero 59# 60 $level = 0; 61 $position = 0; 62 @pstack = (0); 63} 64 65 66sub traverse { 67# 68# dispatch node to handler, recurse 69# 70 my( $self, $node ) = @_; 71 72 my $handled_flag = 0; 73 my $fun = $dispatch_table{ $node->nodeType }; 74 75 $root = $node unless( $rootset ); 76 $rootset = 1; 77 78 if( $fun ) { 79 $handled_flag = $self->$fun( $node ); 80 return 1; 81 82 # apply generic Node handler 83 $handled_flag = 84 $self->_apply_user_handler( $node, 'generic_node' ) 85 || $handled_flag; 86 87 # apply generic "else" node handler if no handlers applied 88 $handled_flag ||= 89 $self->_apply_user_handler( $node, 'else_generic_node' ); 90 } 91 92 return $handled_flag; 93} 94 95 96sub _handle_element { 97# 98# process an element, recurse if necessary 99# 100 my( $self, $node ) = @_; 101 my $handled_flag = 0; 102 103 # apply specific element handler 104 my $name = $node->nodeName; 105 $handled_flag = $self->_apply_user_handler( $node, $name ); 106 107 # apply generic element handler 108 $handled_flag = $self->_apply_user_handler( $node, 'generic_element' ) 109 || $handled_flag; 110 111 # apply generic "else" handler if no element handlers applied 112 $handled_flag ||= 113 $self->_apply_user_handler( $node, 'generic_element_else' ); 114 115 return $self->_handle_descendants( $node ) || $handled_flag; 116} 117 118 119# 120# default handlers for node types 121# 122sub _handle_attribute { 123 my( $self, $node ) = @_; 124 return $self->_apply_user_handler( $node, 'generic_attribute' ); 125} 126 127sub _handle_text { 128 my( $self, $node ) = @_; 129 return $self->_apply_user_handler( $node, 'generic_text' ); 130} 131 132sub _handle_cdata { 133 my( $self, $node ) = @_; 134 return $self->_apply_user_handler( $node, 'generic_CDATA' ); 135} 136 137sub _handle_entity_ref { 138 my( $self, $node ) = @_; 139 return $self->_apply_user_handler( $node, 'generic_entity_ref' ); 140} 141 142sub _handle_pi { 143 my( $self, $node ) = @_; 144 return $self->_apply_user_handler( $node, 'generic_PI' ); 145} 146 147sub _handle_comment { 148 my( $self, $node ) = @_; 149 return $self->_apply_user_handler( $node, 'generic_comment' ); 150} 151 152sub _handle_doc_type { 153 my( $self, $node ) = @_; 154 return $self->_apply_user_handler( $node, 'generic_doctype' ); 155} 156 157 158sub _handle_doc_node { 159# 160# process the document node, recurse if necessary 161# 162 my( $self, $node ) = @_; 163 $self->_apply_user_handler( $node, 'generic_document' ); 164 $level++; 165 $pstack[1] = 1; 166 $position = 1; 167 my $handled_flag = $self->traverse( $node->getDocumentElement ); 168 $level--; 169 return $handled_flag; 170} 171 172 173sub _handle_descendants { 174# 175# recurse through descendants 176# 177# NOTES: 178# 1. Removing a node that follows the current node is dangerous! 179# 2. Nodes inserted before or after the current node won't be processed. 180# 181 my( $self, $node ) = @_; 182 my $handled_flag = 0; 183 $level++; 184 $pstack[ $level ] = 0; 185 foreach my $child ( $node->getChildnodes ) { 186 $pstack[ $level ] ++; 187 $position = $pstack[ $level ]; 188 $handled_flag += $self->traverse( $child ); 189 } 190 $level--; 191 return $handled_flag; 192} 193 194 195sub _apply_user_handler { 196# 197# send reference to self and node to a handler method 198# 199 my( $self, $node, $handler ) = @_; 200 my $handled_flag = 0; 201 202 if( exists( $self->{ handler_package }) and 203 UNIVERSAL::can( $self->{ handler_package }, $handler )) { 204 $self->{ handler_package }->$handler( $self, $node ); 205 $handled_flag = 1; 206 } 207 208 return $handled_flag; 209} 210 211 212# 213# Entity node types 214# 215sub XML_ELEMENT_NODE() {1;} 216sub XML_ATTRIBUTE_NODE() {2;} 217sub XML_TEXT_NODE() {3;} 218sub XML_CDATA_SECTION_NODE() {4;} 219sub XML_ENTITY_REF_NODE() {5;} 220sub XML_ENTITY_NODE() {6;} 221sub XML_PI_NODE() {7;} 222sub XML_COMMENT_NODE() {8;} 223sub XML_DOCUMENT_NODE() {9;} 224sub XML_DOCUMENT_TYPE_NODE() {10;} 225sub XML_DOCUMENT_FRAG_NODE() {11;} 226sub XML_NOTATION_NODE() {12;} 227sub XML_HTML_DOCUMENT_NODE() {13;} 228sub XML_DTD_NODE() {14;} 229sub XML_ELEMENT_DECL_NODE() {15;} 230sub XML_ATTRIBUTE_DECL_NODE() {16;} 231sub XML_ENTITY_DECL_NODE() {17;} 232sub XML_NAMESPACE_DECL_NODE() {18;} 233sub XML_XINCLUDE_START() {19;} 234sub XML_XINCLUDE_END() {20;} 235 236 2371; 238__END__ 239######################################################################## 240=pod 241 242=head1 NAME 243 244DOMHandler - Implements a call-back interface to DOM. 245 246=head1 SYNOPSIS 247 248 use DOMHandler; 249 use XML::LibXML; 250 $p = new XML::LibXML; 251 $doc = $p->parse_file( 'data.xml' ); 252 $dh = new DOMHandler( handler_package => new testhandler ); 253 $dh->traverse( $doc ); 254 255 package testhandler; 256 sub new { 257 return bless {}; 258 } 259 sub A { 260 my( $self, $agent, $node ) = @_; 261 my $par = $node->parentNode->nodeName; 262 print "I'm in an A element and my parent is $par.\n"; 263 } 264 sub generic_element { 265 my( $self, $agent, $node ) = @_; 266 my $name = $node->nodeName; 267 print "I'm in an element named '$name'.\n"; 268 } 269 sub generic_text { 270 print "Here's some text.\n"; 271 } 272 sub generic_PI { 273 print "Here's a processing instruction.\n"; 274 } 275 sub generic_CDATA { 276 print "Here's a CDATA Section.\n"; 277 } 278 279=head1 DESCRIPTION 280 281This module creates a layer on top of DOM that allows you to program 282in a "push" style rather than "pull". Once the document has been 283parsed and you have a DOM object, you can call on the DOMHandler's 284traverse() method to apply a set of call-back routines to all the 285nodes in a tree. You supply the routines in a handler package when 286initializing the DOMHandler. 287 288In your handler package, the names of routines determine which will be 289called for a given node. There are routines for node types, named 290"generic_" plus the node type. For elements, you can name routines 291after the element name and these will only be called for that type of 292element. A list of supported handlers follows: 293 294=over 4 295 296=item else_generic_node() 297 298Applied only to nodes that have not been handled by another routine. 299 300=item generic_CDATA() 301 302Applied to CDATA sections. 303 304=item generic_comment() 305 306Applied to XML comments. 307 308=item generic_doctype() 309 310Applied to DOCTYPE declarations. 311 312=item generic_element() 313 314Applied to all elements. 315 316=item generic_node() 317 318Applied to all nodes. 319 320=item generic_PI() 321 322Processing instruction 323 324=item generic_text() 325 326Applied to text nodes. 327 328=back 4 329 330A handler routine takes three arguments: the $self reference, a 331reference to the DOMHandler object, and a reference to a node in the 332document being traversed. You can use DOM routines on that node to do 333any processing you want. At the moment, this module only supports 334XML::LibXML documents. 335 336IMPORTANT NOTE: Some DOM operations may cause unwanted results. For 337example, if you delete the current node's parent, the program will 338likely crash. 339 340=head1 METHODS 341 342=head2 traverse( $doc ) 343 344Visits each node in a document, in order, applying the appropriate 345handler routines. 346 347=head1 AUTHOR 348 349Erik Ray (eray@oreilly.com), Production Tools Dept., 350O'Reilly and Associates Inc. 351 352=head1 COPYRIGHT 353 354Copyright (c) 2002 Erik Ray and O'Reilly & Associates. 355 356=cut 357