1 /* Copyright 2002-2005 Elliotte Rusty Harold
2 
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6 
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU Lesser General Public License for more details.
11 
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307  USA
16 
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@ibiblio.org. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */
21 
22 package nu.xom;
23 
24 import java.util.Iterator;
25 import java.util.List;
26 
27 import org.jaxen.NamespaceContext;
28 
29 /**
30  *
31  * <p>
32  *  The generic superclass for all the contents
33  *  of an XML document. There are exactly eight kinds of
34  *  nodes in XOM:
35  * </p>
36  *
37  * <ul>
38  *   <li><code>Element</code></li>
39  *   <li><code>Document</code></li>
40  *   <li><code>Text</code></li>
41  *   <li><code>Comment</code></li>
42  *   <li><code>Attribute</code></li>
43  *   <li><code>ProcessingInstruction</code></li>
44  *   <li><code>DocType</code></li>
45  *   <li><code>Namespace</code></li>
46  * </ul>
47  *
48  * <p>
49  *   Every instance of <code>Node</code> is an
50  *   instance of one of these eight classes
51  *   (including, possibly, one of their subclasses).
52  * </p>
53  *
54  *
55  * @author Elliotte Rusty Harold
56  * @version 1.1b4
57  *
58  */
59 public abstract class Node {
60 
61 
62     private ParentNode parent = null;
63 
64     /**
65      * <p>
66      * Creates a new <code>Node</code> object.
67      * Can only be invoked by other members of
68      * the <code>nu.xom</code> package.
69      * </p>
70      */
Node()71     Node() {}
72 
73 
74     /**
75      * <p>
76      * Returns the XPath 1.0 string-value of this node.
77      * </p>
78      *
79      * @return the XPath 1.0 string-value of this node
80      */
getValue()81     public abstract String getValue();
82 
83 
84     /**
85      *
86      * <p>
87      * Returns the document that contains this node,
88      * or null if this node is not currently part of a document.
89      * Each node belongs to no more than one document at a time.
90      * If this node is a <code>Document</code>, then it returns
91      * this node.
92      * </p>
93      *
94      * @return the document this node is a part of
95      */
getDocument()96     public final Document getDocument() {
97         Node parent = this;
98         while (parent != null && !(parent.isDocument())) {
99             parent = parent.getParent();
100         }
101         return (Document) parent;
102     }
103 
104 
105     /**
106      *
107      * <p>
108      * Returns the root of the subtree in which this node is found,
109      * whether that's a document or an element.
110      * </p>
111      *
112      * @return the document this node is a part of
113      */
getRoot()114     final Node getRoot() {
115 
116         Node parent = this.getParent();
117         if (parent == null) {
118             return this;
119         }
120         while (parent.getParent() != null) {
121             parent = parent.getParent();
122         }
123         return parent;
124 
125     }
126 
127 
128     /**
129      *
130      * <p>
131      * Returns the base URI of this node as specified by
132      * <a href="http://www.w3.org/TR/xmlbase/" target="_top">XML
133      * Base</a>, or the empty string if this is not known. In most
134      * cases, this is the URL against which relative URLs in this node
135      * should be resolved.
136      * </p>
137      *
138      * <p>
139      *  The base URI of a non-parent node is the base URI of the
140      *  element containing the node. The base URI of a document
141      *  node is the URI from which the document was parsed,
142      *  or which was set by calling <code>setBaseURI</code> on
143      *  on the document.
144      * </p>
145      *
146      * <p>
147      * The base URI of an element is determined as follows:
148      * </p>
149      *
150      * <ul>
151      *   <li>
152      *     If the element has an <code>xml:base</code> attribute,
153      *     then the value of that attribute is
154      *     converted from an IRI to a URI, absolutized if possible,
155      *     and returned.
156      *   </li>
157      *   <li>
158      *      Otherwise, if any ancestor element of the element loaded
159      *      from the same entity has an <code>xml:base</code>
160      *      attribute, then the value of that attribute from the
161      *      nearest such ancestor is converted from an IRI to a URI,
162      *      absolutized if possible, and returned.
163      *      <em><code>xml:base</code> attributes from other entities are
164      *      not considered.</em>
165      *    </li>
166      *    <li>
167      *      Otherwise, if <code>setBaseURI()</code> has been invoked on
168      *      this element, then the URI most recently passed to that method
169      *      is absolutized if possible and returned.
170      *    </li>
171      *    <li>
172      *      Otherwise, if the element comes from an externally
173      *      parsed entity or the document entity, and the
174      *      original base URI has not been changed by invoking
175      *      <code>setBaseURI()</code>, then the URI of that entity is
176      *      returned.
177      *    </li>
178      *    <li>
179      *      Otherwise, (the element was created by a constructor
180      *      rather then being parsed from an existing document), the
181      *      base URI of the nearest ancestor that does have a base URI
182      *      is returned. If no ancestors have a base URI, then the
183      *      empty string is returned.
184      *    </li>
185      * </ul>
186      *
187      * <p>
188      *  Absolutization takes place as specified by the
189      *  <a target="_top" href="http://www.w3.org/TR/xmlbase/">XML
190      *  Base specification</a>. However, it is not always possible to
191      *  absolutize a relative URI, in which case the empty string will
192      *  be returned.
193      * </p>
194      *
195      * @return the base URI of this node
196      */
getBaseURI()197     public String getBaseURI() {
198         if (parent == null) return "";
199         return parent.getBaseURI();
200     }
201 
202 
203     /**
204      *
205      * <p>
206      * Returns the node that contains this node,
207      * or null if this node does not have a parent.
208      * </p>
209      *
210      * @return the element or document that most immediately
211      *     contains this node
212      */
getParent()213     public final ParentNode getParent() {
214         return this.parent;
215     }
216 
217 
setParent(ParentNode parent)218     final void setParent(ParentNode parent) {
219         this.parent = parent;
220     }
221 
222 
223     /**
224      * <p>
225      * Removes this node from its parent so that it can be added
226      * to a different parent node or document. This method does nothing
227      * if the node does not have a parent.
228      * </p>
229      *
230      * @throws XMLException if the parent refuses to detach this node
231      */
detach()232     public void detach() {
233 
234         if (parent == null) return;
235         else if (this.isAttribute()) {
236             Element element = (Element) parent;
237             element.removeAttribute((Attribute) this);
238         }
239         else {
240             parent.removeChild(this);
241         }
242 
243     }
244 
245 
246     /**
247      * <p>
248      *  Returns the child of this node at the specified position.
249      * </p>
250      *
251      * @param position the index of the child node to return
252      *
253      * @return the position<sup>th</sup> child node of this node
254      *
255      * @throws IndexOutOfBoundsException if this node does not have children
256      */
getChild(int position)257     public abstract Node getChild(int position);
258 
259 
260     /**
261      * <p>
262      * Returns the number of children of this node.
263      * This is always non-negative (greater than or equal to zero).
264      * </p>
265      *
266      * @return the number of children of this node
267      */
getChildCount()268     public abstract int getChildCount();
269 
270 
271     /**
272      * <p>
273      * Returns a deep copy of this node with no parent,
274      * that can be added to the current document or a different one.
275      * </p>
276      *
277      * <p>
278      * Per Bloch, the <code>Cloneable</code>
279      * interface is just a mess and should
280      * be avoided. However, I do not follow his suggestion of a copy
281      * constructor exclusively because it is useful to be able to
282      * copy a node without knowing its more specific type.
283      * Ken Arnold agrees with this. It's more effective for
284      * subclasses that can return an instance of the subclass.
285      * </p>
286      *
287      * @return a copy of this node without a parent
288      */
copy()289     public abstract Node copy();
290 
291 
292     /**
293      * <p>
294      * Returns the actual XML form of this node, such as might be
295      * copied and pasted from the original document. However, this
296      * does not preserve semantically insignificant details such as
297      * white space inside tags or the use of empty-element tags vs.
298      * start-tag end-tag pairs.
299      * </p>
300      *
301      * @return an XML representation of this node
302      */
toXML()303     public abstract String toXML();
304 
305 
306     /**
307      * <p>
308      * Tests for node identity. That is, two
309      * <code>Node</code> objects are equal
310      * if and only if they are the same object.
311      * </p>
312      *
313      * @param o the object compared for equality to this node
314      *
315      * @return true if <code>o</code> is this node; false otherwise
316      *
317      * @see java.lang.Object#equals(Object)
318      */
equals(Object o)319     public final boolean equals(Object o) {
320         return this == o;
321     }
322 
323 
324     /**
325      * <p>
326      * Returns a unique identifier for this node.
327      * The value returned is the same as returned by
328      * <code>super.hashCode()</code>
329      * because nodes use identity semantics.
330      * </p>
331      *
332      * @return a probably unique identifier for this node
333      *
334      * @see java.lang.Object#hashCode()
335      */
hashCode()336     public final int hashCode() {
337         return super.hashCode();
338     }
339 
340 
341     /**
342      * <p>
343      * Returns the nodes selected by the XPath expression in the
344      * context of this node in document order as defined in XSLT.
345      * All namespace prefixes used in the
346      * expression should be bound to namespace URIs by the
347      * second argument.
348      * </p>
349      *
350      * <p>
351      * Note that XPath expressions operate on the XPath data model,
352      * not the XOM data model. XPath counts all adjacent
353      * <code>Text</code> objects as a single text node, and does not
354      * consider empty <code>Text</code> objects. For instance, an
355      * element that has exactly three text children in XOM, will
356      * have exactly one text child in XPath, whose value is the
357      * concatenation of all three XOM <code>Text</code> objects.
358      * </p>
359      *
360      * <p>
361      * You can use XPath expressions that use the namespace axis.
362      * However, namespace nodes are never returned. If an XPath
363      * expression only selects namespace nodes, then this method will
364      * return an empty list.
365      * </p>
366      *
367      * <p>
368      * No variables are bound.
369      * </p>
370      *
371      * <p>
372      * The context position is the index of this node among its parents
373      * children, counting adjacent text nodes as one. The context size
374      * is the number of children this node's parent has, again counting
375      * adjacent text nodes as one node. If the parent is a
376      * <code>Document</code>, then the <code>DocType</code> (if any) is
377      * not counted. If the node has no parent, then the context position
378      * is 1, and the context size is 1.
379      * </p>
380      *
381      * <p>
382      * Queries such as /&#x2A;, //, and /&#x2A;//p that refer to the
383      * root node do work when operating with a context node that is not
384      * part of a document. However, the query / (return the root node)
385      * throws an <code>XPathException</code> when applied to a node
386      * that is not part of the document. Furthermore the top-level
387      * node in the tree is treated as the first and only child of the
388      * root node, not as the root node itself. For instance, this
389      * query stores <code>parent</code> in the <code>result</code>
390      * variable, not <code>child</code>:
391      * </p>
392      *
393      * <pre><code>  Element parent = new Element("parent");
394      *   Element child = new Element("child");
395      *   parent.appendChild(child);
396      *   Nodes results = child.query("/*");
397      *   Node result = result.get(0);</code></pre>
398      *
399      * @param xpath the XPath expression to evaluate
400      * @param namespaces a collection of namespace prefix bindings
401      *     used in the XPath expression
402      *
403      * @return a list of all matched nodes; possibly empty
404      *
405      * @throws XPathException if there's a syntax error in the
406      *     expression, the query returns something other than
407      *     a node-set
408      *
409      */
query(String xpath, XPathContext namespaces)410     public final Nodes query(String xpath, XPathContext namespaces) {
411 
412         if (this.isDocType()) {
413             throw new XPathException("Can't use XPath on a DocType");
414         }
415         DocumentFragment frag = null;
416 
417         Node root = getRoot();
418         if (! root.isDocument()) {
419             frag = new DocumentFragment();
420             frag.appendChild(root);
421         }
422 
423         try {
424             JaxenConnector connector = new JaxenConnector(xpath);
425             if (namespaces == null) {
426                 connector.setNamespaceContext(emptyContext);
427             }
428             else {
429                 connector.setNamespaceContext(namespaces.getJaxenContext());
430             }
431 
432             List queryResults = connector.selectNodes(this);
433             return new Nodes(queryResults);
434         }
435         catch (XPathException ex) {
436             ex.setXPath(xpath);
437             throw ex;
438         }
439         catch (Exception ex) { // JaxenException and RuntimeException
440             // I can't trigger a RuntimeException with the current Jaxen
441             // code base; but it's been an issue in the past, and I'm
442             // not convinced it's fully fixed now.
443             XPathException xpe = new XPathException("XPath error: " + ex.getMessage(), ex);
444             xpe.setXPath(xpath);
445             throw xpe;
446         }
447         finally {
448             if (frag != null) frag.removeChild(0);
449         }
450 
451     }
452 
453 
454     private static NamespaceContext emptyContext = new EmptyNamespaceContext();
455 
456     private static class EmptyNamespaceContext implements NamespaceContext {
457 
translateNamespacePrefixToUri(String prefix)458         public String translateNamespacePrefixToUri(String prefix) {
459             // XML prefix is recognized automatically in Jaxen without
460             // calling this method.
461             // if ("xml".equals(prefix)) return Namespace.XML_NAMESPACE;
462             return null;
463         }
464 
465 
466     }
467 
468 
469     /**
470      * <p>
471      * Returns the nodes selected by the XPath expression in the
472      * context of this node in document order as defined by XSLT.
473      * This XPath expression must not contain
474      * any namespace prefixes.
475      * </p>
476      *
477      * <p>
478      * No variables are bound. No namespace prefixes are bound.
479      * </p>
480      *
481      * @param xpath the XPath expression to evaluate
482      *
483      * @return a list of all matched nodes; possibly empty
484      *
485      * @throws XPathException if there's a syntax error in the
486      *     expression; or the query returns something other than
487      *     a node-set
488      */
query(String xpath)489     public final Nodes query(String xpath) {
490         return query(xpath, null);
491     }
492 
493 
494     // Methods to replace instanceof tests to improve performance
isElement()495     boolean isElement() {
496         return false;
497     }
498 
isText()499     boolean isText() {
500         return false;
501     }
502 
isComment()503     boolean isComment() {
504         return false;
505     }
506 
isProcessingInstruction()507     boolean isProcessingInstruction() {
508         return false;
509     }
510 
isAttribute()511     boolean isAttribute() {
512         return false;
513     }
514 
isDocument()515     boolean isDocument() {
516         return false;
517     }
518 
isDocType()519     boolean isDocType() {
520         return false;
521     }
522 
isDocumentFragment()523     boolean isDocumentFragment() {
524         return false;
525     }
526 
527 
528 }
529