1# $Id: XPath.pm,v 1.11 2005/10/18 08:39:04 mrodrigu Exp $
2
3package XML::DOM::XPath;
4
5use strict;
6
7use XML::XPathEngine;
8use XML::DOM;
9
10use vars qw($VERSION);
11$VERSION="0.14";
12
13my $xp_field;     # the field in the document that contains the XML::XPathEngine object
14my $parent_field; # the field in an attribute that contains the parent element
15
16BEGIN
17  { # this is probably quite wrong, I have to figure out the internal structure of nodes better
18    $xp_field     = 11;
19    $parent_field = 12;
20  }
21
22package XML::DOM::Document;
23
24sub findnodes            { my( $dom, $path)= @_; return $dom->xp->findnodes(            $path, $dom); }
25sub findnodes_as_string  { my( $dom, $path)= @_; return $dom->xp->findnodes_as_string(  $path, $dom); }
26sub findnodes_as_strings { my( $dom, $path)= @_; return $dom->xp->findnodes_as_strings( $path, $dom); }
27sub findvalue            { my( $dom, $path)= @_; return $dom->xp->findvalue(            $path, $dom); }
28sub exists               { my( $dom, $path)= @_; return $dom->xp->exists(               $path, $dom); }
29sub find                 { my( $dom, $path)= @_; return $dom->xp->find(                 $path, $dom); }
30sub matches              { my( $dom, $path)= @_; return $dom->xp->matches( $dom, $path, $dom); }
31sub set_namespace        { my $dom= shift; $dom->xp->set_namespace( @_); }
32
33sub cmp { return $_[1]->isa( 'XML::DOM::Document') ? 0 : 1; }
34
35sub getRootNode { return $_[0]; }
36sub xp { return $_[0]->[$xp_field] }
37
38{ no warnings;
39  # copied from the original DOM package, with the addition of the creation of the XML::XPathEngine object
40  sub new
41    { my ($class) = @_;
42      my $self = bless [], $class;
43
44      # keep Doc pointer, even though getOwnerDocument returns undef
45      $self->[_Doc] = $self;
46      $self->[_C] = new XML::DOM::NodeList;
47      $self->[$xp_field]= XML::XPathEngine->new();
48      $self;
49    }
50}
51
52package XML::DOM::Node;
53
54sub findnodes           { my( $node, $path)= @_; return $node->xp->findnodes(           $path, $node); }
55sub findnodes_as_string { my( $node, $path)= @_; return $node->xp->findnodes_as_string( $path, $node); }
56sub findvalue           { my( $node, $path)= @_; return $node->xp->findvalue(           $path, $node); }
57sub exists              { my( $node, $path)= @_; return $node->xp->exists(              $path, $node); }
58sub find                { my( $node, $path)= @_; return $node->xp->find(                $path, $node); }
59sub matches             { my( $node, $path)= @_; return $node->xp->matches( $node->getOwnerDocument, $path, $node); }
60
61sub isCommentNode { 0 };
62sub isPINode      { 0 };
63
64sub to_number { return XML::XPathEngine::Number->new( shift->string_value); }
65
66sub getParent   { return $_[0]->getParentNode; }
67sub getRootNode { return $_[0]->getOwnerDocument; }
68
69sub xp { return $_[0]->getOwnerDocument->xp; }
70
71# this method exists in XML::DOM but it returns undef, while
72# XML::XPathEngine needs it, but wants an array... bother!
73# This method is actually redefined for XML::DOM::Element, but needs
74# to be here for other types of nodes.
75{ no warnings;
76  sub getAttributes
77    { if( caller(0)!~ m{^XML::XPathEngine}) { return undef; }                                    # XML::DOM
78      else                                  { my @atts= (); return wantarray ? @atts : \@atts; } # XML::XPathEngine
79    }
80}
81
82sub cmp
83  { my( $a, $b)=@_;
84
85    # easy cases
86    return  0 if( $a == $b);
87    return -1 if( $a->isAncestor($b)); # a starts before b
88    return  1 if( $b->isAncestor($a)); # a starts after b
89
90    # special case for 2 attributes of the same element
91    # order is dictionary order of the attribute names
92    if( $a->isa( 'XML::DOM::Attr') && $b->isa( 'XML::DOM::Attr'))
93      { if( $a->getParent == $b->getParent)
94          { return $a->getName cmp $b->getName }
95        else
96          { return $a->getParent->cmp( $b->getParent); }
97      }
98
99    # ancestors does not include the element itself
100    my @a_pile= ($a->ancestors_or_self);
101    my @b_pile= ($b->ancestors_or_self);
102
103    # the 2 elements are not in the same twig
104    return undef unless( $a_pile[-1] == $b_pile[-1]);
105
106    # find the first non common ancestors (they are siblings)
107    my $a_anc= pop @a_pile;
108    my $b_anc= pop @b_pile;
109
110    while( $a_anc == $b_anc)
111      { $a_anc= pop @a_pile;
112        $b_anc= pop @b_pile;
113      }
114
115    # from there move left and right and figure out the order
116    my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc);
117    while()
118      { $a_prev= $a_prev->getPreviousSibling || return( -1);
119        return 1 if( $a_prev == $b_next);
120        $a_next= $a_next->getNextSibling || return( 1);
121        return -1 if( $a_next == $b_prev);
122        $b_prev= $b_prev->getPreviousSibling || return( 1);
123        return -1 if( $b_prev == $a_next);
124        $b_next= $b_next->getNextSibling || return( -1);
125        return 1 if( $b_next == $a_prev);
126      }
127  }
128
129sub ancestors_or_self
130  { my $node= shift;
131    my @ancestors= ($node);
132    while( $node= $node->getParent)
133      { push @ancestors, $node; }
134    return @ancestors;
135  }
136
137sub getNamespace
138  { my $node= shift;
139    my $prefix= shift() || $node->ns_prefix;
140    if( my $expanded= $node->get_namespace( $prefix))
141      { return XML::DOM::Namespace->new( $prefix, $expanded); }
142    else
143      { return XML::DOM::Namespace->new( $prefix, ''); }
144  }
145
146sub getLocalName
147  { my $node= shift;
148    (my $local= $node->getName)=~ s{^[^:]*:}{};
149    return $local;
150  }
151
152sub ns_prefix
153  { my $node= shift;
154    if( $node->getName=~ m{^([^:]*):})
155      { return $1; }
156    else
157      { return( '#default'); } # should it be '' ?
158  }
159
160BEGIN
161  { my %DEFAULT_NS= ( xml   => "http://www.w3.org/XML/1998/namespace",
162                      xmlns => "http://www.w3.org/2000/xmlns/",
163                    );
164
165    sub get_namespace
166      { my $node= shift;
167        my $prefix= defined $_[0] ? shift() : $node->ns_prefix;
168        if( $prefix eq "#default") { $prefix=''}
169        my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns";
170        my $expanded= $DEFAULT_NS{$prefix} || $node->inherit_att( $ns_att) || '';
171        return $expanded;
172      }
173  }
174
175sub inherit_att
176  { my $node= shift;
177    my $att= shift;
178
179    do
180      { if( ($node->getNodeType == ELEMENT_NODE) && ($node->getAttribute( $att)))
181          { return $node->getAttribute( $att); }
182      } while( $node= $node->getParentNode);
183    return undef;
184  }
185
186package XML::DOM::Element;
187
188sub getName { return $_[0]->getTagName; }
189
190{ no warnings;
191
192# this method exists in XML::DOM but it returns a NamedNodeMap object
193# XML::XPathEngine needs it, but wants an array... bother!
194sub getAttributes
195  { # in any case we need $_[0]->[_A]  to be filled
196    $_[0]->[_A] ||= XML::DOM::NamedNodeMap->new (Doc  => $_[0]->[_Doc], Parent  => $_[0]);
197
198    if( caller(0)!~ m{^XML::XPathEngine})
199      { # the original XML::DOM value
200        return $_[0]->[_A];
201      }
202    else
203      { # this is what XML::XPathEngine needs
204        my $elt= shift;
205        my @atts= grep { ref $_ eq 'XML::DOM::Attr' } values %{$elt->[1]};
206        $_->[$parent_field]= $elt foreach (@atts);
207        return wantarray ? @atts : \@atts;
208      }
209  }
210
211}
212
213# nearly straight from XML::XPathEngine
214sub string_value
215  { my $self = shift;
216    my $string = '';
217    foreach my $kid ($self->getChildNodes)
218      { if ($kid->getNodeType == ELEMENT_NODE || $kid->getNodeType == TEXT_NODE)
219          { $string .= $kid->string_value; }
220      }
221    return $string;
222  }
223
224
225
226package XML::DOM::Attr;
227
228# needed for the sort
229sub inherit_att { return $_[0]->getParent->inherit_att( @_); }
230
231sub getParent    { return $_[0]->[$parent_field]; }
232sub string_value { return $_[0]->getValue; }
233sub getData      { return $_[0]->getValue; }
234
235
236package XML::DOM::Text;
237sub string_value { return $_[0]->getData; }
238
239
240package XML::DOM::Comment;
241sub isCommentNode { 1 };
242sub string_value { return $_[0]->getData; }
243
244
245package XML::DOM::ProcessingInstruction;
246
247sub isPINode { 1 };
248sub isProcessingInstructionNode { 1 };
249sub string_value { return $_[0]->getData; }
250sub value { return $_[0]->getData; }
251
252
253package XML::DOM::Namespace;
254
255sub new
256  { my( $class, $prefix, $expanded)= @_;
257    bless { prefix => $prefix, expanded => $expanded }, $class;
258  }
259
260sub isNamespaceNode { 1; }
261
262sub getPrefix   { $_[0]->{prefix};   }
263sub getExpanded { $_[0]->{expanded}; }
264sub getValue    { $_[0]->{expanded}; }
265sub getData     { $_[0]->{expanded}; }
266
267
2681;
269__END__
270
271=head1 NAME
272
273XML::DOM::XPath - Perl extension to add XPath support to XML::DOM, using XML::XPath engine
274
275=head1 SYNOPSIS
276
277  use XML::DOM::XPath;
278
279  my $parser= XML::DOM::Parser->new();
280  my $doc = $parser->parsefile ("file.xml");
281
282  # print all HREF attributes of all CODEBASE elements
283  # compare with the XML::DOM version to see how much easier it is to use
284  my @nodes = $doc->findnodes( '//CODEBASE[@HREF]/@HREF');
285  print $_->getValue, "\n" foreach (@nodes);
286
287=head1 DESCRIPTION
288
289XML::DOM::XPath allows you to use XML::XPath methods to query
290a DOM. This is often much easier than relying only on getElementsByTagName.
291
292It lets you use all of the XML::DOM methods.
293
294=head1 METHODS
295
296Those methods can be applied to a whole dom object or to a node.
297
298=head2 findnodes($path)
299
300return a list of nodes found by $path.
301
302=head2 findnodes_as_string($path)
303
304return the nodes found reproduced as XML. The result is not guaranteed
305to be valid XML though.
306
307=head2 findvalue($path)
308
309return the concatenation of the text content of the result nodes
310
311=head2 exists($path)
312
313return true if the given path exists.
314
315=head2 matches($path)
316
317return true if the node matches the path.
318
319
320=head1 SEE ALSO
321
322  XML::DOM
323
324  XML::XPathEngine
325
326=head1 AUTHOR
327
328Michel Rodriguez, mirod@cpan.org
329
330=head1 COPYRIGHT AND LICENSE
331
332Copyright 2003 by Michel Rodriguez
333
334This library is free software; you can redistribute it and/or modify
335it under the same terms as Perl itself.
336
337=cut
338