1 /* Copyright 2002-2005 Elliotte Rusty Harold 2 3 This library is free software; you can redistribute it and/or modify 4 it under the terms of version 2.1 of the GNU Lesser General Public 5 License as published by the Free Software Foundation. 6 7 This library is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU Lesser General Public License for more details. 11 12 You should have received a copy of the GNU Lesser General Public 13 License along with this library; if not, write to the 14 Free Software Foundation, Inc., 59 Temple Place, Suite 330, 15 Boston, MA 02111-1307 USA 16 17 You can contact Elliotte Rusty Harold by sending e-mail to 18 elharo@ibiblio.org. Please include the word "XOM" in the 19 subject line. The XOM home page is located at http://www.xom.nu/ 20 */ 21 22 package nu.xom; 23 24 import java.util.Iterator; 25 import java.util.List; 26 27 import org.jaxen.NamespaceContext; 28 29 /** 30 * 31 * <p> 32 * The generic superclass for all the contents 33 * of an XML document. There are exactly eight kinds of 34 * nodes in XOM: 35 * </p> 36 * 37 * <ul> 38 * <li><code>Element</code></li> 39 * <li><code>Document</code></li> 40 * <li><code>Text</code></li> 41 * <li><code>Comment</code></li> 42 * <li><code>Attribute</code></li> 43 * <li><code>ProcessingInstruction</code></li> 44 * <li><code>DocType</code></li> 45 * <li><code>Namespace</code></li> 46 * </ul> 47 * 48 * <p> 49 * Every instance of <code>Node</code> is an 50 * instance of one of these eight classes 51 * (including, possibly, one of their subclasses). 52 * </p> 53 * 54 * 55 * @author Elliotte Rusty Harold 56 * @version 1.1b4 57 * 58 */ 59 public abstract class Node { 60 61 62 private ParentNode parent = null; 63 64 /** 65 * <p> 66 * Creates a new <code>Node</code> object. 67 * Can only be invoked by other members of 68 * the <code>nu.xom</code> package. 69 * </p> 70 */ Node()71 Node() {} 72 73 74 /** 75 * <p> 76 * Returns the XPath 1.0 string-value of this node. 77 * </p> 78 * 79 * @return the XPath 1.0 string-value of this node 80 */ getValue()81 public abstract String getValue(); 82 83 84 /** 85 * 86 * <p> 87 * Returns the document that contains this node, 88 * or null if this node is not currently part of a document. 89 * Each node belongs to no more than one document at a time. 90 * If this node is a <code>Document</code>, then it returns 91 * this node. 92 * </p> 93 * 94 * @return the document this node is a part of 95 */ getDocument()96 public final Document getDocument() { 97 Node parent = this; 98 while (parent != null && !(parent.isDocument())) { 99 parent = parent.getParent(); 100 } 101 return (Document) parent; 102 } 103 104 105 /** 106 * 107 * <p> 108 * Returns the root of the subtree in which this node is found, 109 * whether that's a document or an element. 110 * </p> 111 * 112 * @return the document this node is a part of 113 */ getRoot()114 final Node getRoot() { 115 116 Node parent = this.getParent(); 117 if (parent == null) { 118 return this; 119 } 120 while (parent.getParent() != null) { 121 parent = parent.getParent(); 122 } 123 return parent; 124 125 } 126 127 128 /** 129 * 130 * <p> 131 * Returns the base URI of this node as specified by 132 * <a href="http://www.w3.org/TR/xmlbase/" target="_top">XML 133 * Base</a>, or the empty string if this is not known. In most 134 * cases, this is the URL against which relative URLs in this node 135 * should be resolved. 136 * </p> 137 * 138 * <p> 139 * The base URI of a non-parent node is the base URI of the 140 * element containing the node. The base URI of a document 141 * node is the URI from which the document was parsed, 142 * or which was set by calling <code>setBaseURI</code> on 143 * on the document. 144 * </p> 145 * 146 * <p> 147 * The base URI of an element is determined as follows: 148 * </p> 149 * 150 * <ul> 151 * <li> 152 * If the element has an <code>xml:base</code> attribute, 153 * then the value of that attribute is 154 * converted from an IRI to a URI, absolutized if possible, 155 * and returned. 156 * </li> 157 * <li> 158 * Otherwise, if any ancestor element of the element loaded 159 * from the same entity has an <code>xml:base</code> 160 * attribute, then the value of that attribute from the 161 * nearest such ancestor is converted from an IRI to a URI, 162 * absolutized if possible, and returned. 163 * <em><code>xml:base</code> attributes from other entities are 164 * not considered.</em> 165 * </li> 166 * <li> 167 * Otherwise, if <code>setBaseURI()</code> has been invoked on 168 * this element, then the URI most recently passed to that method 169 * is absolutized if possible and returned. 170 * </li> 171 * <li> 172 * Otherwise, if the element comes from an externally 173 * parsed entity or the document entity, and the 174 * original base URI has not been changed by invoking 175 * <code>setBaseURI()</code>, then the URI of that entity is 176 * returned. 177 * </li> 178 * <li> 179 * Otherwise, (the element was created by a constructor 180 * rather then being parsed from an existing document), the 181 * base URI of the nearest ancestor that does have a base URI 182 * is returned. If no ancestors have a base URI, then the 183 * empty string is returned. 184 * </li> 185 * </ul> 186 * 187 * <p> 188 * Absolutization takes place as specified by the 189 * <a target="_top" href="http://www.w3.org/TR/xmlbase/">XML 190 * Base specification</a>. However, it is not always possible to 191 * absolutize a relative URI, in which case the empty string will 192 * be returned. 193 * </p> 194 * 195 * @return the base URI of this node 196 */ getBaseURI()197 public String getBaseURI() { 198 if (parent == null) return ""; 199 return parent.getBaseURI(); 200 } 201 202 203 /** 204 * 205 * <p> 206 * Returns the node that contains this node, 207 * or null if this node does not have a parent. 208 * </p> 209 * 210 * @return the element or document that most immediately 211 * contains this node 212 */ getParent()213 public final ParentNode getParent() { 214 return this.parent; 215 } 216 217 setParent(ParentNode parent)218 final void setParent(ParentNode parent) { 219 this.parent = parent; 220 } 221 222 223 /** 224 * <p> 225 * Removes this node from its parent so that it can be added 226 * to a different parent node or document. This method does nothing 227 * if the node does not have a parent. 228 * </p> 229 * 230 * @throws XMLException if the parent refuses to detach this node 231 */ detach()232 public void detach() { 233 234 if (parent == null) return; 235 else if (this.isAttribute()) { 236 Element element = (Element) parent; 237 element.removeAttribute((Attribute) this); 238 } 239 else { 240 parent.removeChild(this); 241 } 242 243 } 244 245 246 /** 247 * <p> 248 * Returns the child of this node at the specified position. 249 * </p> 250 * 251 * @param position the index of the child node to return 252 * 253 * @return the position<sup>th</sup> child node of this node 254 * 255 * @throws IndexOutOfBoundsException if this node does not have children 256 */ getChild(int position)257 public abstract Node getChild(int position); 258 259 260 /** 261 * <p> 262 * Returns the number of children of this node. 263 * This is always non-negative (greater than or equal to zero). 264 * </p> 265 * 266 * @return the number of children of this node 267 */ getChildCount()268 public abstract int getChildCount(); 269 270 271 /** 272 * <p> 273 * Returns a deep copy of this node with no parent, 274 * that can be added to the current document or a different one. 275 * </p> 276 * 277 * <p> 278 * Per Bloch, the <code>Cloneable</code> 279 * interface is just a mess and should 280 * be avoided. However, I do not follow his suggestion of a copy 281 * constructor exclusively because it is useful to be able to 282 * copy a node without knowing its more specific type. 283 * Ken Arnold agrees with this. It's more effective for 284 * subclasses that can return an instance of the subclass. 285 * </p> 286 * 287 * @return a copy of this node without a parent 288 */ copy()289 public abstract Node copy(); 290 291 292 /** 293 * <p> 294 * Returns the actual XML form of this node, such as might be 295 * copied and pasted from the original document. However, this 296 * does not preserve semantically insignificant details such as 297 * white space inside tags or the use of empty-element tags vs. 298 * start-tag end-tag pairs. 299 * </p> 300 * 301 * @return an XML representation of this node 302 */ toXML()303 public abstract String toXML(); 304 305 306 /** 307 * <p> 308 * Tests for node identity. That is, two 309 * <code>Node</code> objects are equal 310 * if and only if they are the same object. 311 * </p> 312 * 313 * @param o the object compared for equality to this node 314 * 315 * @return true if <code>o</code> is this node; false otherwise 316 * 317 * @see java.lang.Object#equals(Object) 318 */ equals(Object o)319 public final boolean equals(Object o) { 320 return this == o; 321 } 322 323 324 /** 325 * <p> 326 * Returns a unique identifier for this node. 327 * The value returned is the same as returned by 328 * <code>super.hashCode()</code> 329 * because nodes use identity semantics. 330 * </p> 331 * 332 * @return a probably unique identifier for this node 333 * 334 * @see java.lang.Object#hashCode() 335 */ hashCode()336 public final int hashCode() { 337 return super.hashCode(); 338 } 339 340 341 /** 342 * <p> 343 * Returns the nodes selected by the XPath expression in the 344 * context of this node in document order as defined in XSLT. 345 * All namespace prefixes used in the 346 * expression should be bound to namespace URIs by the 347 * second argument. 348 * </p> 349 * 350 * <p> 351 * Note that XPath expressions operate on the XPath data model, 352 * not the XOM data model. XPath counts all adjacent 353 * <code>Text</code> objects as a single text node, and does not 354 * consider empty <code>Text</code> objects. For instance, an 355 * element that has exactly three text children in XOM, will 356 * have exactly one text child in XPath, whose value is the 357 * concatenation of all three XOM <code>Text</code> objects. 358 * </p> 359 * 360 * <p> 361 * You can use XPath expressions that use the namespace axis. 362 * However, namespace nodes are never returned. If an XPath 363 * expression only selects namespace nodes, then this method will 364 * return an empty list. 365 * </p> 366 * 367 * <p> 368 * No variables are bound. 369 * </p> 370 * 371 * <p> 372 * The context position is the index of this node among its parents 373 * children, counting adjacent text nodes as one. The context size 374 * is the number of children this node's parent has, again counting 375 * adjacent text nodes as one node. If the parent is a 376 * <code>Document</code>, then the <code>DocType</code> (if any) is 377 * not counted. If the node has no parent, then the context position 378 * is 1, and the context size is 1. 379 * </p> 380 * 381 * <p> 382 * Queries such as /*, //, and /*//p that refer to the 383 * root node do work when operating with a context node that is not 384 * part of a document. However, the query / (return the root node) 385 * throws an <code>XPathException</code> when applied to a node 386 * that is not part of the document. Furthermore the top-level 387 * node in the tree is treated as the first and only child of the 388 * root node, not as the root node itself. For instance, this 389 * query stores <code>parent</code> in the <code>result</code> 390 * variable, not <code>child</code>: 391 * </p> 392 * 393 * <pre><code> Element parent = new Element("parent"); 394 * Element child = new Element("child"); 395 * parent.appendChild(child); 396 * Nodes results = child.query("/*"); 397 * Node result = result.get(0);</code></pre> 398 * 399 * @param xpath the XPath expression to evaluate 400 * @param namespaces a collection of namespace prefix bindings 401 * used in the XPath expression 402 * 403 * @return a list of all matched nodes; possibly empty 404 * 405 * @throws XPathException if there's a syntax error in the 406 * expression, the query returns something other than 407 * a node-set 408 * 409 */ query(String xpath, XPathContext namespaces)410 public final Nodes query(String xpath, XPathContext namespaces) { 411 412 if (this.isDocType()) { 413 throw new XPathException("Can't use XPath on a DocType"); 414 } 415 DocumentFragment frag = null; 416 417 Node root = getRoot(); 418 if (! root.isDocument()) { 419 frag = new DocumentFragment(); 420 frag.appendChild(root); 421 } 422 423 try { 424 JaxenConnector connector = new JaxenConnector(xpath); 425 if (namespaces == null) { 426 connector.setNamespaceContext(emptyContext); 427 } 428 else { 429 connector.setNamespaceContext(namespaces.getJaxenContext()); 430 } 431 432 List queryResults = connector.selectNodes(this); 433 return new Nodes(queryResults); 434 } 435 catch (XPathException ex) { 436 ex.setXPath(xpath); 437 throw ex; 438 } 439 catch (Exception ex) { // JaxenException and RuntimeException 440 // I can't trigger a RuntimeException with the current Jaxen 441 // code base; but it's been an issue in the past, and I'm 442 // not convinced it's fully fixed now. 443 XPathException xpe = new XPathException("XPath error: " + ex.getMessage(), ex); 444 xpe.setXPath(xpath); 445 throw xpe; 446 } 447 finally { 448 if (frag != null) frag.removeChild(0); 449 } 450 451 } 452 453 454 private static NamespaceContext emptyContext = new EmptyNamespaceContext(); 455 456 private static class EmptyNamespaceContext implements NamespaceContext { 457 translateNamespacePrefixToUri(String prefix)458 public String translateNamespacePrefixToUri(String prefix) { 459 // XML prefix is recognized automatically in Jaxen without 460 // calling this method. 461 // if ("xml".equals(prefix)) return Namespace.XML_NAMESPACE; 462 return null; 463 } 464 465 466 } 467 468 469 /** 470 * <p> 471 * Returns the nodes selected by the XPath expression in the 472 * context of this node in document order as defined by XSLT. 473 * This XPath expression must not contain 474 * any namespace prefixes. 475 * </p> 476 * 477 * <p> 478 * No variables are bound. No namespace prefixes are bound. 479 * </p> 480 * 481 * @param xpath the XPath expression to evaluate 482 * 483 * @return a list of all matched nodes; possibly empty 484 * 485 * @throws XPathException if there's a syntax error in the 486 * expression; or the query returns something other than 487 * a node-set 488 */ query(String xpath)489 public final Nodes query(String xpath) { 490 return query(xpath, null); 491 } 492 493 494 // Methods to replace instanceof tests to improve performance isElement()495 boolean isElement() { 496 return false; 497 } 498 isText()499 boolean isText() { 500 return false; 501 } 502 isComment()503 boolean isComment() { 504 return false; 505 } 506 isProcessingInstruction()507 boolean isProcessingInstruction() { 508 return false; 509 } 510 isAttribute()511 boolean isAttribute() { 512 return false; 513 } 514 isDocument()515 boolean isDocument() { 516 return false; 517 } 518 isDocType()519 boolean isDocType() { 520 return false; 521 } 522 isDocumentFragment()523 boolean isDocumentFragment() { 524 return false; 525 } 526 527 528 } 529