1# $Id: DOMHandler.pm,v 1.1 2002/08/20 18:06:48 eray Exp eray $
2
3package XML::DOMHandler;
4use strict;
5use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
6require Exporter;
7@ISA = qw( Exporter );
8@EXPORT = qw( Version );
9$VERSION = '1.0';
10sub Version { $VERSION; }
11
12#
13# table of node types and internal handler methods
14#
15my %dispatch_table = (
16		      &XML_ELEMENT_NODE        => '_handle_element',
17		      &XML_ATTRIBUTE_NODE      => '_handle_attribute',
18		      &XML_TEXT_NODE           => '_handle_text',
19		      &XML_CDATA_SECTION_NODE  => '_handle_cdata',
20		      &XML_ENTITY_REF_NODE     => '_handle_entity_ref',
21		      &XML_ENTITY_NODE         => '',
22		      &XML_PI_NODE             => '_handle_pi',
23		      &XML_COMMENT_NODE        => '_handle_comment',
24		      &XML_DOCUMENT_NODE       => '_handle_doc_node',
25		      &XML_DOCUMENT_TYPE_NODE  => '_handle_doctype',
26		      &XML_DOCUMENT_FRAG_NODE  => '',
27		      &XML_NOTATION_NODE       => '',
28		      &XML_HTML_DOCUMENT_NODE  => '',
29		      &XML_DTD_NODE            => '',
30		      &XML_ELEMENT_DECL_NODE   => '',
31		      &XML_ATTRIBUTE_DECL_NODE => '',
32		      &XML_ENTITY_DECL_NODE    => '',
33		      &XML_NAMESPACE_DECL_NODE => '_handle_ns_decl',
34		      &XML_XINCLUDE_START      => '',
35		      &XML_XINCLUDE_END        => '',
36		      );
37
38my $level;          # depth in the tree
39my $position;       # position in parent's content list
40my @pstack;         # position stack
41my $root;
42my $rootset = 0;
43
44
45sub new {
46#
47# initialize object with options
48#
49    my $class = shift;
50    my $self = {@_};
51    reset();
52    return bless( $self, $class );
53}
54
55
56sub reset {
57#
58# set globals back to zero
59#
60    $level = 0;
61    $position = 0;
62    @pstack = (0);
63}
64
65
66sub traverse {
67#
68# dispatch node to handler, recurse
69#
70    my( $self, $node ) = @_;
71
72    my $handled_flag = 0;
73    my $fun = $dispatch_table{ $node->nodeType };
74
75    $root = $node unless( $rootset );
76    $rootset = 1;
77
78    if( $fun ) {
79	$handled_flag = $self->$fun( $node );
80	return 1;
81
82	# apply generic Node handler
83	$handled_flag =
84	    $self->_apply_user_handler( $node, 'generic_node' )
85		|| $handled_flag;
86
87	# apply generic "else" node handler if no handlers applied
88	$handled_flag ||=
89	    $self->_apply_user_handler( $node, 'else_generic_node' );
90    }
91
92    return $handled_flag;
93}
94
95
96sub _handle_element {
97#
98# process an element, recurse if necessary
99#
100    my( $self, $node ) = @_;
101    my $handled_flag = 0;
102
103    # apply specific element handler
104    my $name = $node->nodeName;
105    $handled_flag = $self->_apply_user_handler( $node, $name );
106
107    # apply generic element handler
108    $handled_flag = $self->_apply_user_handler( $node, 'generic_element' )
109	|| $handled_flag;
110
111    # apply generic "else" handler if no element handlers applied
112    $handled_flag ||=
113	$self->_apply_user_handler( $node, 'generic_element_else' );
114
115    return $self->_handle_descendants( $node ) || $handled_flag;
116}
117
118
119#
120# default handlers for node types
121#
122sub _handle_attribute {
123    my( $self, $node ) = @_;
124    return $self->_apply_user_handler( $node, 'generic_attribute' );
125}
126
127sub _handle_text {
128    my( $self, $node ) = @_;
129    return $self->_apply_user_handler( $node, 'generic_text' );
130}
131
132sub _handle_cdata {
133    my( $self, $node ) = @_;
134    return $self->_apply_user_handler( $node, 'generic_CDATA' );
135}
136
137sub _handle_entity_ref {
138    my( $self, $node ) = @_;
139    return $self->_apply_user_handler( $node, 'generic_entity_ref' );
140}
141
142sub _handle_pi {
143    my( $self, $node ) = @_;
144    return $self->_apply_user_handler( $node, 'generic_PI' );
145}
146
147sub _handle_comment {
148    my( $self, $node ) = @_;
149    return $self->_apply_user_handler( $node, 'generic_comment' );
150}
151
152sub _handle_doc_type {
153    my( $self, $node ) = @_;
154    return $self->_apply_user_handler( $node, 'generic_doctype' );
155}
156
157
158sub _handle_doc_node {
159#
160# process the document node, recurse if necessary
161#
162    my( $self, $node ) = @_;
163    $self->_apply_user_handler( $node, 'generic_document' );
164    $level++;
165    $pstack[1] = 1;
166    $position = 1;
167    my $handled_flag = $self->traverse( $node->getDocumentElement );
168    $level--;
169    return $handled_flag;
170}
171
172
173sub _handle_descendants {
174#
175# recurse through descendants
176#
177# NOTES:
178# 1. Removing a node that follows the current node is dangerous!
179# 2. Nodes inserted before or after the current node won't be processed.
180#
181    my( $self, $node ) = @_;
182    my $handled_flag = 0;
183    $level++;
184    $pstack[ $level ] = 0;
185    foreach my $child ( $node->getChildnodes ) {
186	$pstack[ $level ] ++;
187	$position = $pstack[ $level ];
188	$handled_flag += $self->traverse( $child );
189    }
190    $level--;
191    return $handled_flag;
192}
193
194
195sub _apply_user_handler {
196#
197# send reference to self and node to a handler method
198#
199    my( $self, $node, $handler ) = @_;
200    my $handled_flag = 0;
201
202    if( exists( $self->{ handler_package }) and
203      UNIVERSAL::can( $self->{ handler_package }, $handler )) {
204	$self->{ handler_package }->$handler( $self, $node );
205	$handled_flag = 1;
206    }
207
208    return $handled_flag;
209}
210
211
212#
213# Entity node types
214#
215sub XML_ELEMENT_NODE()            {1;}
216sub XML_ATTRIBUTE_NODE()          {2;}
217sub XML_TEXT_NODE()               {3;}
218sub XML_CDATA_SECTION_NODE()      {4;}
219sub XML_ENTITY_REF_NODE()         {5;}
220sub XML_ENTITY_NODE()             {6;}
221sub XML_PI_NODE()                 {7;}
222sub XML_COMMENT_NODE()            {8;}
223sub XML_DOCUMENT_NODE()           {9;}
224sub XML_DOCUMENT_TYPE_NODE()     {10;}
225sub XML_DOCUMENT_FRAG_NODE()     {11;}
226sub XML_NOTATION_NODE()          {12;}
227sub XML_HTML_DOCUMENT_NODE()     {13;}
228sub XML_DTD_NODE()               {14;}
229sub XML_ELEMENT_DECL_NODE()      {15;}
230sub XML_ATTRIBUTE_DECL_NODE()    {16;}
231sub XML_ENTITY_DECL_NODE()       {17;}
232sub XML_NAMESPACE_DECL_NODE()    {18;}
233sub XML_XINCLUDE_START()         {19;}
234sub XML_XINCLUDE_END()           {20;}
235
236
2371;
238__END__
239########################################################################
240=pod
241
242=head1 NAME
243
244DOMHandler - Implements a call-back interface to DOM.
245
246=head1 SYNOPSIS
247
248  use DOMHandler;
249  use XML::LibXML;
250  $p = new XML::LibXML;
251  $doc = $p->parse_file( 'data.xml' );
252  $dh = new DOMHandler( handler_package => new testhandler );
253  $dh->traverse( $doc );
254
255  package testhandler;
256  sub new {
257      return bless {};
258  }
259  sub A {
260      my( $self, $agent, $node ) = @_;
261      my $par = $node->parentNode->nodeName;
262      print "I'm in an A element and my parent is $par.\n";
263  }
264  sub generic_element {
265      my( $self, $agent, $node ) = @_;
266      my $name = $node->nodeName;
267      print "I'm in an element named '$name'.\n";
268  }
269  sub generic_text {
270      print "Here's some text.\n";
271  }
272  sub generic_PI {
273      print "Here's a processing instruction.\n";
274  }
275  sub generic_CDATA {
276      print "Here's a CDATA Section.\n";
277  }
278
279=head1 DESCRIPTION
280
281This module creates a layer on top of DOM that allows you to program
282in a "push" style rather than "pull". Once the document has been
283parsed and you have a DOM object, you can call on the DOMHandler's
284traverse() method to apply a set of call-back routines to all the
285nodes in a tree. You supply the routines in a handler package when
286initializing the DOMHandler.
287
288In your handler package, the names of routines determine which will be
289called for a given node. There are routines for node types, named
290"generic_" plus the node type. For elements, you can name routines
291after the element name and these will only be called for that type of
292element. A list of supported handlers follows:
293
294=over 4
295
296=item else_generic_node()
297
298Applied only to nodes that have not been handled by another routine.
299
300=item generic_CDATA()
301
302Applied to CDATA sections.
303
304=item generic_comment()
305
306Applied to XML comments.
307
308=item generic_doctype()
309
310Applied to DOCTYPE declarations.
311
312=item generic_element()
313
314Applied to all elements.
315
316=item generic_node()
317
318Applied to all nodes.
319
320=item generic_PI()
321
322Processing instruction
323
324=item generic_text()
325
326Applied to text nodes.
327
328=back 4
329
330A handler routine takes three arguments: the $self reference, a
331reference to the DOMHandler object, and a reference to a node in the
332document being traversed. You can use DOM routines on that node to do
333any processing you want. At the moment, this module only supports
334XML::LibXML documents.
335
336IMPORTANT NOTE: Some DOM operations may cause unwanted results. For
337example, if you delete the current node's parent, the program will
338likely crash.
339
340=head1 METHODS
341
342=head2 traverse( $doc )
343
344Visits each node in a document, in order, applying the appropriate
345handler routines.
346
347=head1 AUTHOR
348
349Erik Ray (eray@oreilly.com), Production Tools Dept.,
350O'Reilly and Associates Inc.
351
352=head1 COPYRIGHT
353
354Copyright (c) 2002 Erik Ray and O'Reilly & Associates.
355
356=cut
357