1"""Simple implementation of the Level 1 DOM. 2 3Namespaces and other minor Level 2 features are also supported. 4 5parse("foo.xml") 6 7parseString("<foo><bar/></foo>") 8 9Todo: 10===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16""" 17 18import io 19import xml.dom 20 21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 22from xml.dom.minicompat import * 23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 24 25# This is used by the ID-cache invalidation checks; the list isn't 26# actually complete, since the nodes being checked will never be the 27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 28# the node being added or removed, not the node being modified.) 29# 30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 31 xml.dom.Node.ENTITY_REFERENCE_NODE) 32 33 34class Node(xml.dom.Node): 35 namespaceURI = None # this is non-null only for elements and attributes 36 parentNode = None 37 ownerDocument = None 38 nextSibling = None 39 previousSibling = None 40 41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 42 43 def __bool__(self): 44 return True 45 46 def toxml(self, encoding=None): 47 return self.toprettyxml("", "", encoding) 48 49 def toprettyxml(self, indent="\t", newl="\n", encoding=None): 50 if encoding is None: 51 writer = io.StringIO() 52 else: 53 writer = io.TextIOWrapper(io.BytesIO(), 54 encoding=encoding, 55 errors="xmlcharrefreplace", 56 newline='\n') 57 if self.nodeType == Node.DOCUMENT_NODE: 58 # Can pass encoding only to document, to put it into XML header 59 self.writexml(writer, "", indent, newl, encoding) 60 else: 61 self.writexml(writer, "", indent, newl) 62 if encoding is None: 63 return writer.getvalue() 64 else: 65 return writer.detach().getvalue() 66 67 def hasChildNodes(self): 68 return bool(self.childNodes) 69 70 def _get_childNodes(self): 71 return self.childNodes 72 73 def _get_firstChild(self): 74 if self.childNodes: 75 return self.childNodes[0] 76 77 def _get_lastChild(self): 78 if self.childNodes: 79 return self.childNodes[-1] 80 81 def insertBefore(self, newChild, refChild): 82 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 83 for c in tuple(newChild.childNodes): 84 self.insertBefore(c, refChild) 85 ### The DOM does not clearly specify what to return in this case 86 return newChild 87 if newChild.nodeType not in self._child_node_types: 88 raise xml.dom.HierarchyRequestErr( 89 "%s cannot be child of %s" % (repr(newChild), repr(self))) 90 if newChild.parentNode is not None: 91 newChild.parentNode.removeChild(newChild) 92 if refChild is None: 93 self.appendChild(newChild) 94 else: 95 try: 96 index = self.childNodes.index(refChild) 97 except ValueError: 98 raise xml.dom.NotFoundErr() 99 if newChild.nodeType in _nodeTypes_with_children: 100 _clear_id_cache(self) 101 self.childNodes.insert(index, newChild) 102 newChild.nextSibling = refChild 103 refChild.previousSibling = newChild 104 if index: 105 node = self.childNodes[index-1] 106 node.nextSibling = newChild 107 newChild.previousSibling = node 108 else: 109 newChild.previousSibling = None 110 newChild.parentNode = self 111 return newChild 112 113 def appendChild(self, node): 114 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 115 for c in tuple(node.childNodes): 116 self.appendChild(c) 117 ### The DOM does not clearly specify what to return in this case 118 return node 119 if node.nodeType not in self._child_node_types: 120 raise xml.dom.HierarchyRequestErr( 121 "%s cannot be child of %s" % (repr(node), repr(self))) 122 elif node.nodeType in _nodeTypes_with_children: 123 _clear_id_cache(self) 124 if node.parentNode is not None: 125 node.parentNode.removeChild(node) 126 _append_child(self, node) 127 node.nextSibling = None 128 return node 129 130 def replaceChild(self, newChild, oldChild): 131 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 132 refChild = oldChild.nextSibling 133 self.removeChild(oldChild) 134 return self.insertBefore(newChild, refChild) 135 if newChild.nodeType not in self._child_node_types: 136 raise xml.dom.HierarchyRequestErr( 137 "%s cannot be child of %s" % (repr(newChild), repr(self))) 138 if newChild is oldChild: 139 return 140 if newChild.parentNode is not None: 141 newChild.parentNode.removeChild(newChild) 142 try: 143 index = self.childNodes.index(oldChild) 144 except ValueError: 145 raise xml.dom.NotFoundErr() 146 self.childNodes[index] = newChild 147 newChild.parentNode = self 148 oldChild.parentNode = None 149 if (newChild.nodeType in _nodeTypes_with_children 150 or oldChild.nodeType in _nodeTypes_with_children): 151 _clear_id_cache(self) 152 newChild.nextSibling = oldChild.nextSibling 153 newChild.previousSibling = oldChild.previousSibling 154 oldChild.nextSibling = None 155 oldChild.previousSibling = None 156 if newChild.previousSibling: 157 newChild.previousSibling.nextSibling = newChild 158 if newChild.nextSibling: 159 newChild.nextSibling.previousSibling = newChild 160 return oldChild 161 162 def removeChild(self, oldChild): 163 try: 164 self.childNodes.remove(oldChild) 165 except ValueError: 166 raise xml.dom.NotFoundErr() 167 if oldChild.nextSibling is not None: 168 oldChild.nextSibling.previousSibling = oldChild.previousSibling 169 if oldChild.previousSibling is not None: 170 oldChild.previousSibling.nextSibling = oldChild.nextSibling 171 oldChild.nextSibling = oldChild.previousSibling = None 172 if oldChild.nodeType in _nodeTypes_with_children: 173 _clear_id_cache(self) 174 175 oldChild.parentNode = None 176 return oldChild 177 178 def normalize(self): 179 L = [] 180 for child in self.childNodes: 181 if child.nodeType == Node.TEXT_NODE: 182 if not child.data: 183 # empty text node; discard 184 if L: 185 L[-1].nextSibling = child.nextSibling 186 if child.nextSibling: 187 child.nextSibling.previousSibling = child.previousSibling 188 child.unlink() 189 elif L and L[-1].nodeType == child.nodeType: 190 # collapse text node 191 node = L[-1] 192 node.data = node.data + child.data 193 node.nextSibling = child.nextSibling 194 if child.nextSibling: 195 child.nextSibling.previousSibling = node 196 child.unlink() 197 else: 198 L.append(child) 199 else: 200 L.append(child) 201 if child.nodeType == Node.ELEMENT_NODE: 202 child.normalize() 203 self.childNodes[:] = L 204 205 def cloneNode(self, deep): 206 return _clone_node(self, deep, self.ownerDocument or self) 207 208 def isSupported(self, feature, version): 209 return self.ownerDocument.implementation.hasFeature(feature, version) 210 211 def _get_localName(self): 212 # Overridden in Element and Attr where localName can be Non-Null 213 return None 214 215 # Node interfaces from Level 3 (WD 9 April 2002) 216 217 def isSameNode(self, other): 218 return self is other 219 220 def getInterface(self, feature): 221 if self.isSupported(feature, None): 222 return self 223 else: 224 return None 225 226 # The "user data" functions use a dictionary that is only present 227 # if some user data has been set, so be careful not to assume it 228 # exists. 229 230 def getUserData(self, key): 231 try: 232 return self._user_data[key][0] 233 except (AttributeError, KeyError): 234 return None 235 236 def setUserData(self, key, data, handler): 237 old = None 238 try: 239 d = self._user_data 240 except AttributeError: 241 d = {} 242 self._user_data = d 243 if key in d: 244 old = d[key][0] 245 if data is None: 246 # ignore handlers passed for None 247 handler = None 248 if old is not None: 249 del d[key] 250 else: 251 d[key] = (data, handler) 252 return old 253 254 def _call_user_data_handler(self, operation, src, dst): 255 if hasattr(self, "_user_data"): 256 for key, (data, handler) in list(self._user_data.items()): 257 if handler is not None: 258 handler.handle(operation, key, data, src, dst) 259 260 # minidom-specific API: 261 262 def unlink(self): 263 self.parentNode = self.ownerDocument = None 264 if self.childNodes: 265 for child in self.childNodes: 266 child.unlink() 267 self.childNodes = NodeList() 268 self.previousSibling = None 269 self.nextSibling = None 270 271 # A Node is its own context manager, to ensure that an unlink() call occurs. 272 # This is similar to how a file object works. 273 def __enter__(self): 274 return self 275 276 def __exit__(self, et, ev, tb): 277 self.unlink() 278 279defproperty(Node, "firstChild", doc="First child node, or None.") 280defproperty(Node, "lastChild", doc="Last child node, or None.") 281defproperty(Node, "localName", doc="Namespace-local name of this node.") 282 283 284def _append_child(self, node): 285 # fast path with less checks; usable by DOM builders if careful 286 childNodes = self.childNodes 287 if childNodes: 288 last = childNodes[-1] 289 node.previousSibling = last 290 last.nextSibling = node 291 childNodes.append(node) 292 node.parentNode = self 293 294def _in_document(node): 295 # return True iff node is part of a document tree 296 while node is not None: 297 if node.nodeType == Node.DOCUMENT_NODE: 298 return True 299 node = node.parentNode 300 return False 301 302def _write_data(writer, data): 303 "Writes datachars to writer." 304 if data: 305 data = data.replace("&", "&").replace("<", "<"). \ 306 replace("\"", """).replace(">", ">") 307 writer.write(data) 308 309def _get_elements_by_tagName_helper(parent, name, rc): 310 for node in parent.childNodes: 311 if node.nodeType == Node.ELEMENT_NODE and \ 312 (name == "*" or node.tagName == name): 313 rc.append(node) 314 _get_elements_by_tagName_helper(node, name, rc) 315 return rc 316 317def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 318 for node in parent.childNodes: 319 if node.nodeType == Node.ELEMENT_NODE: 320 if ((localName == "*" or node.localName == localName) and 321 (nsURI == "*" or node.namespaceURI == nsURI)): 322 rc.append(node) 323 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 324 return rc 325 326class DocumentFragment(Node): 327 nodeType = Node.DOCUMENT_FRAGMENT_NODE 328 nodeName = "#document-fragment" 329 nodeValue = None 330 attributes = None 331 parentNode = None 332 _child_node_types = (Node.ELEMENT_NODE, 333 Node.TEXT_NODE, 334 Node.CDATA_SECTION_NODE, 335 Node.ENTITY_REFERENCE_NODE, 336 Node.PROCESSING_INSTRUCTION_NODE, 337 Node.COMMENT_NODE, 338 Node.NOTATION_NODE) 339 340 def __init__(self): 341 self.childNodes = NodeList() 342 343 344class Attr(Node): 345 __slots__=('_name', '_value', 'namespaceURI', 346 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement') 347 nodeType = Node.ATTRIBUTE_NODE 348 attributes = None 349 specified = False 350 _is_id = False 351 352 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 353 354 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 355 prefix=None): 356 self.ownerElement = None 357 self._name = qName 358 self.namespaceURI = namespaceURI 359 self._prefix = prefix 360 self.childNodes = NodeList() 361 362 # Add the single child node that represents the value of the attr 363 self.childNodes.append(Text()) 364 365 # nodeValue and value are set elsewhere 366 367 def _get_localName(self): 368 try: 369 return self._localName 370 except AttributeError: 371 return self.nodeName.split(":", 1)[-1] 372 373 def _get_specified(self): 374 return self.specified 375 376 def _get_name(self): 377 return self._name 378 379 def _set_name(self, value): 380 self._name = value 381 if self.ownerElement is not None: 382 _clear_id_cache(self.ownerElement) 383 384 nodeName = name = property(_get_name, _set_name) 385 386 def _get_value(self): 387 return self._value 388 389 def _set_value(self, value): 390 self._value = value 391 self.childNodes[0].data = value 392 if self.ownerElement is not None: 393 _clear_id_cache(self.ownerElement) 394 self.childNodes[0].data = value 395 396 nodeValue = value = property(_get_value, _set_value) 397 398 def _get_prefix(self): 399 return self._prefix 400 401 def _set_prefix(self, prefix): 402 nsuri = self.namespaceURI 403 if prefix == "xmlns": 404 if nsuri and nsuri != XMLNS_NAMESPACE: 405 raise xml.dom.NamespaceErr( 406 "illegal use of 'xmlns' prefix for the wrong namespace") 407 self._prefix = prefix 408 if prefix is None: 409 newName = self.localName 410 else: 411 newName = "%s:%s" % (prefix, self.localName) 412 if self.ownerElement: 413 _clear_id_cache(self.ownerElement) 414 self.name = newName 415 416 prefix = property(_get_prefix, _set_prefix) 417 418 def unlink(self): 419 # This implementation does not call the base implementation 420 # since most of that is not needed, and the expense of the 421 # method call is not warranted. We duplicate the removal of 422 # children, but that's all we needed from the base class. 423 elem = self.ownerElement 424 if elem is not None: 425 del elem._attrs[self.nodeName] 426 del elem._attrsNS[(self.namespaceURI, self.localName)] 427 if self._is_id: 428 self._is_id = False 429 elem._magic_id_nodes -= 1 430 self.ownerDocument._magic_id_count -= 1 431 for child in self.childNodes: 432 child.unlink() 433 del self.childNodes[:] 434 435 def _get_isId(self): 436 if self._is_id: 437 return True 438 doc = self.ownerDocument 439 elem = self.ownerElement 440 if doc is None or elem is None: 441 return False 442 443 info = doc._get_elem_info(elem) 444 if info is None: 445 return False 446 if self.namespaceURI: 447 return info.isIdNS(self.namespaceURI, self.localName) 448 else: 449 return info.isId(self.nodeName) 450 451 def _get_schemaType(self): 452 doc = self.ownerDocument 453 elem = self.ownerElement 454 if doc is None or elem is None: 455 return _no_type 456 457 info = doc._get_elem_info(elem) 458 if info is None: 459 return _no_type 460 if self.namespaceURI: 461 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 462 else: 463 return info.getAttributeType(self.nodeName) 464 465defproperty(Attr, "isId", doc="True if this attribute is an ID.") 466defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 467defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 468 469 470class NamedNodeMap(object): 471 """The attribute list is a transient interface to the underlying 472 dictionaries. Mutations here will change the underlying element's 473 dictionary. 474 475 Ordering is imposed artificially and does not reflect the order of 476 attributes as found in an input document. 477 """ 478 479 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 480 481 def __init__(self, attrs, attrsNS, ownerElement): 482 self._attrs = attrs 483 self._attrsNS = attrsNS 484 self._ownerElement = ownerElement 485 486 def _get_length(self): 487 return len(self._attrs) 488 489 def item(self, index): 490 try: 491 return self[list(self._attrs.keys())[index]] 492 except IndexError: 493 return None 494 495 def items(self): 496 L = [] 497 for node in self._attrs.values(): 498 L.append((node.nodeName, node.value)) 499 return L 500 501 def itemsNS(self): 502 L = [] 503 for node in self._attrs.values(): 504 L.append(((node.namespaceURI, node.localName), node.value)) 505 return L 506 507 def __contains__(self, key): 508 if isinstance(key, str): 509 return key in self._attrs 510 else: 511 return key in self._attrsNS 512 513 def keys(self): 514 return self._attrs.keys() 515 516 def keysNS(self): 517 return self._attrsNS.keys() 518 519 def values(self): 520 return self._attrs.values() 521 522 def get(self, name, value=None): 523 return self._attrs.get(name, value) 524 525 __len__ = _get_length 526 527 def _cmp(self, other): 528 if self._attrs is getattr(other, "_attrs", None): 529 return 0 530 else: 531 return (id(self) > id(other)) - (id(self) < id(other)) 532 533 def __eq__(self, other): 534 return self._cmp(other) == 0 535 536 def __ge__(self, other): 537 return self._cmp(other) >= 0 538 539 def __gt__(self, other): 540 return self._cmp(other) > 0 541 542 def __le__(self, other): 543 return self._cmp(other) <= 0 544 545 def __lt__(self, other): 546 return self._cmp(other) < 0 547 548 def __getitem__(self, attname_or_tuple): 549 if isinstance(attname_or_tuple, tuple): 550 return self._attrsNS[attname_or_tuple] 551 else: 552 return self._attrs[attname_or_tuple] 553 554 # same as set 555 def __setitem__(self, attname, value): 556 if isinstance(value, str): 557 try: 558 node = self._attrs[attname] 559 except KeyError: 560 node = Attr(attname) 561 node.ownerDocument = self._ownerElement.ownerDocument 562 self.setNamedItem(node) 563 node.value = value 564 else: 565 if not isinstance(value, Attr): 566 raise TypeError("value must be a string or Attr object") 567 node = value 568 self.setNamedItem(node) 569 570 def getNamedItem(self, name): 571 try: 572 return self._attrs[name] 573 except KeyError: 574 return None 575 576 def getNamedItemNS(self, namespaceURI, localName): 577 try: 578 return self._attrsNS[(namespaceURI, localName)] 579 except KeyError: 580 return None 581 582 def removeNamedItem(self, name): 583 n = self.getNamedItem(name) 584 if n is not None: 585 _clear_id_cache(self._ownerElement) 586 del self._attrs[n.nodeName] 587 del self._attrsNS[(n.namespaceURI, n.localName)] 588 if hasattr(n, 'ownerElement'): 589 n.ownerElement = None 590 return n 591 else: 592 raise xml.dom.NotFoundErr() 593 594 def removeNamedItemNS(self, namespaceURI, localName): 595 n = self.getNamedItemNS(namespaceURI, localName) 596 if n is not None: 597 _clear_id_cache(self._ownerElement) 598 del self._attrsNS[(n.namespaceURI, n.localName)] 599 del self._attrs[n.nodeName] 600 if hasattr(n, 'ownerElement'): 601 n.ownerElement = None 602 return n 603 else: 604 raise xml.dom.NotFoundErr() 605 606 def setNamedItem(self, node): 607 if not isinstance(node, Attr): 608 raise xml.dom.HierarchyRequestErr( 609 "%s cannot be child of %s" % (repr(node), repr(self))) 610 old = self._attrs.get(node.name) 611 if old: 612 old.unlink() 613 self._attrs[node.name] = node 614 self._attrsNS[(node.namespaceURI, node.localName)] = node 615 node.ownerElement = self._ownerElement 616 _clear_id_cache(node.ownerElement) 617 return old 618 619 def setNamedItemNS(self, node): 620 return self.setNamedItem(node) 621 622 def __delitem__(self, attname_or_tuple): 623 node = self[attname_or_tuple] 624 _clear_id_cache(node.ownerElement) 625 node.unlink() 626 627 def __getstate__(self): 628 return self._attrs, self._attrsNS, self._ownerElement 629 630 def __setstate__(self, state): 631 self._attrs, self._attrsNS, self._ownerElement = state 632 633defproperty(NamedNodeMap, "length", 634 doc="Number of nodes in the NamedNodeMap.") 635 636AttributeList = NamedNodeMap 637 638 639class TypeInfo(object): 640 __slots__ = 'namespace', 'name' 641 642 def __init__(self, namespace, name): 643 self.namespace = namespace 644 self.name = name 645 646 def __repr__(self): 647 if self.namespace: 648 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name, 649 self.namespace) 650 else: 651 return "<%s %r>" % (self.__class__.__name__, self.name) 652 653 def _get_name(self): 654 return self.name 655 656 def _get_namespace(self): 657 return self.namespace 658 659_no_type = TypeInfo(None, None) 660 661class Element(Node): 662 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix', 663 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS', 664 'nextSibling', 'previousSibling') 665 nodeType = Node.ELEMENT_NODE 666 nodeValue = None 667 schemaType = _no_type 668 669 _magic_id_nodes = 0 670 671 _child_node_types = (Node.ELEMENT_NODE, 672 Node.PROCESSING_INSTRUCTION_NODE, 673 Node.COMMENT_NODE, 674 Node.TEXT_NODE, 675 Node.CDATA_SECTION_NODE, 676 Node.ENTITY_REFERENCE_NODE) 677 678 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 679 localName=None): 680 self.parentNode = None 681 self.tagName = self.nodeName = tagName 682 self.prefix = prefix 683 self.namespaceURI = namespaceURI 684 self.childNodes = NodeList() 685 self.nextSibling = self.previousSibling = None 686 687 # Attribute dictionaries are lazily created 688 # attributes are double-indexed: 689 # tagName -> Attribute 690 # URI,localName -> Attribute 691 # in the future: consider lazy generation 692 # of attribute objects this is too tricky 693 # for now because of headaches with 694 # namespaces. 695 self._attrs = None 696 self._attrsNS = None 697 698 def _ensure_attributes(self): 699 if self._attrs is None: 700 self._attrs = {} 701 self._attrsNS = {} 702 703 def _get_localName(self): 704 try: 705 return self._localName 706 except AttributeError: 707 return self.tagName.split(":", 1)[-1] 708 709 def _get_tagName(self): 710 return self.tagName 711 712 def unlink(self): 713 if self._attrs is not None: 714 for attr in list(self._attrs.values()): 715 attr.unlink() 716 self._attrs = None 717 self._attrsNS = None 718 Node.unlink(self) 719 720 def getAttribute(self, attname): 721 if self._attrs is None: 722 return "" 723 try: 724 return self._attrs[attname].value 725 except KeyError: 726 return "" 727 728 def getAttributeNS(self, namespaceURI, localName): 729 if self._attrsNS is None: 730 return "" 731 try: 732 return self._attrsNS[(namespaceURI, localName)].value 733 except KeyError: 734 return "" 735 736 def setAttribute(self, attname, value): 737 attr = self.getAttributeNode(attname) 738 if attr is None: 739 attr = Attr(attname) 740 attr.value = value # also sets nodeValue 741 attr.ownerDocument = self.ownerDocument 742 self.setAttributeNode(attr) 743 elif value != attr.value: 744 attr.value = value 745 if attr.isId: 746 _clear_id_cache(self) 747 748 def setAttributeNS(self, namespaceURI, qualifiedName, value): 749 prefix, localname = _nssplit(qualifiedName) 750 attr = self.getAttributeNodeNS(namespaceURI, localname) 751 if attr is None: 752 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 753 attr.value = value 754 attr.ownerDocument = self.ownerDocument 755 self.setAttributeNode(attr) 756 else: 757 if value != attr.value: 758 attr.value = value 759 if attr.isId: 760 _clear_id_cache(self) 761 if attr.prefix != prefix: 762 attr.prefix = prefix 763 attr.nodeName = qualifiedName 764 765 def getAttributeNode(self, attrname): 766 if self._attrs is None: 767 return None 768 return self._attrs.get(attrname) 769 770 def getAttributeNodeNS(self, namespaceURI, localName): 771 if self._attrsNS is None: 772 return None 773 return self._attrsNS.get((namespaceURI, localName)) 774 775 def setAttributeNode(self, attr): 776 if attr.ownerElement not in (None, self): 777 raise xml.dom.InuseAttributeErr("attribute node already owned") 778 self._ensure_attributes() 779 old1 = self._attrs.get(attr.name, None) 780 if old1 is not None: 781 self.removeAttributeNode(old1) 782 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 783 if old2 is not None and old2 is not old1: 784 self.removeAttributeNode(old2) 785 _set_attribute_node(self, attr) 786 787 if old1 is not attr: 788 # It might have already been part of this node, in which case 789 # it doesn't represent a change, and should not be returned. 790 return old1 791 if old2 is not attr: 792 return old2 793 794 setAttributeNodeNS = setAttributeNode 795 796 def removeAttribute(self, name): 797 if self._attrsNS is None: 798 raise xml.dom.NotFoundErr() 799 try: 800 attr = self._attrs[name] 801 except KeyError: 802 raise xml.dom.NotFoundErr() 803 self.removeAttributeNode(attr) 804 805 def removeAttributeNS(self, namespaceURI, localName): 806 if self._attrsNS is None: 807 raise xml.dom.NotFoundErr() 808 try: 809 attr = self._attrsNS[(namespaceURI, localName)] 810 except KeyError: 811 raise xml.dom.NotFoundErr() 812 self.removeAttributeNode(attr) 813 814 def removeAttributeNode(self, node): 815 if node is None: 816 raise xml.dom.NotFoundErr() 817 try: 818 self._attrs[node.name] 819 except KeyError: 820 raise xml.dom.NotFoundErr() 821 _clear_id_cache(self) 822 node.unlink() 823 # Restore this since the node is still useful and otherwise 824 # unlinked 825 node.ownerDocument = self.ownerDocument 826 return node 827 828 removeAttributeNodeNS = removeAttributeNode 829 830 def hasAttribute(self, name): 831 if self._attrs is None: 832 return False 833 return name in self._attrs 834 835 def hasAttributeNS(self, namespaceURI, localName): 836 if self._attrsNS is None: 837 return False 838 return (namespaceURI, localName) in self._attrsNS 839 840 def getElementsByTagName(self, name): 841 return _get_elements_by_tagName_helper(self, name, NodeList()) 842 843 def getElementsByTagNameNS(self, namespaceURI, localName): 844 return _get_elements_by_tagName_ns_helper( 845 self, namespaceURI, localName, NodeList()) 846 847 def __repr__(self): 848 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 849 850 def writexml(self, writer, indent="", addindent="", newl=""): 851 # indent = current indentation 852 # addindent = indentation to add to higher levels 853 # newl = newline string 854 writer.write(indent+"<" + self.tagName) 855 856 attrs = self._get_attributes() 857 858 for a_name in attrs.keys(): 859 writer.write(" %s=\"" % a_name) 860 _write_data(writer, attrs[a_name].value) 861 writer.write("\"") 862 if self.childNodes: 863 writer.write(">") 864 if (len(self.childNodes) == 1 and 865 self.childNodes[0].nodeType in ( 866 Node.TEXT_NODE, Node.CDATA_SECTION_NODE)): 867 self.childNodes[0].writexml(writer, '', '', '') 868 else: 869 writer.write(newl) 870 for node in self.childNodes: 871 node.writexml(writer, indent+addindent, addindent, newl) 872 writer.write(indent) 873 writer.write("</%s>%s" % (self.tagName, newl)) 874 else: 875 writer.write("/>%s"%(newl)) 876 877 def _get_attributes(self): 878 self._ensure_attributes() 879 return NamedNodeMap(self._attrs, self._attrsNS, self) 880 881 def hasAttributes(self): 882 if self._attrs: 883 return True 884 else: 885 return False 886 887 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 888 889 def setIdAttribute(self, name): 890 idAttr = self.getAttributeNode(name) 891 self.setIdAttributeNode(idAttr) 892 893 def setIdAttributeNS(self, namespaceURI, localName): 894 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 895 self.setIdAttributeNode(idAttr) 896 897 def setIdAttributeNode(self, idAttr): 898 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 899 raise xml.dom.NotFoundErr() 900 if _get_containing_entref(self) is not None: 901 raise xml.dom.NoModificationAllowedErr() 902 if not idAttr._is_id: 903 idAttr._is_id = True 904 self._magic_id_nodes += 1 905 self.ownerDocument._magic_id_count += 1 906 _clear_id_cache(self) 907 908defproperty(Element, "attributes", 909 doc="NamedNodeMap of attributes on the element.") 910defproperty(Element, "localName", 911 doc="Namespace-local name of this element.") 912 913 914def _set_attribute_node(element, attr): 915 _clear_id_cache(element) 916 element._ensure_attributes() 917 element._attrs[attr.name] = attr 918 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 919 920 # This creates a circular reference, but Element.unlink() 921 # breaks the cycle since the references to the attribute 922 # dictionaries are tossed. 923 attr.ownerElement = element 924 925class Childless: 926 """Mixin that makes childless-ness easy to implement and avoids 927 the complexity of the Node methods that deal with children. 928 """ 929 __slots__ = () 930 931 attributes = None 932 childNodes = EmptyNodeList() 933 firstChild = None 934 lastChild = None 935 936 def _get_firstChild(self): 937 return None 938 939 def _get_lastChild(self): 940 return None 941 942 def appendChild(self, node): 943 raise xml.dom.HierarchyRequestErr( 944 self.nodeName + " nodes cannot have children") 945 946 def hasChildNodes(self): 947 return False 948 949 def insertBefore(self, newChild, refChild): 950 raise xml.dom.HierarchyRequestErr( 951 self.nodeName + " nodes do not have children") 952 953 def removeChild(self, oldChild): 954 raise xml.dom.NotFoundErr( 955 self.nodeName + " nodes do not have children") 956 957 def normalize(self): 958 # For childless nodes, normalize() has nothing to do. 959 pass 960 961 def replaceChild(self, newChild, oldChild): 962 raise xml.dom.HierarchyRequestErr( 963 self.nodeName + " nodes do not have children") 964 965 966class ProcessingInstruction(Childless, Node): 967 nodeType = Node.PROCESSING_INSTRUCTION_NODE 968 __slots__ = ('target', 'data') 969 970 def __init__(self, target, data): 971 self.target = target 972 self.data = data 973 974 # nodeValue is an alias for data 975 def _get_nodeValue(self): 976 return self.data 977 def _set_nodeValue(self, value): 978 self.data = value 979 nodeValue = property(_get_nodeValue, _set_nodeValue) 980 981 # nodeName is an alias for target 982 def _get_nodeName(self): 983 return self.target 984 def _set_nodeName(self, value): 985 self.target = value 986 nodeName = property(_get_nodeName, _set_nodeName) 987 988 def writexml(self, writer, indent="", addindent="", newl=""): 989 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 990 991 992class CharacterData(Childless, Node): 993 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling') 994 995 def __init__(self): 996 self.ownerDocument = self.parentNode = None 997 self.previousSibling = self.nextSibling = None 998 self._data = '' 999 Node.__init__(self) 1000 1001 def _get_length(self): 1002 return len(self.data) 1003 __len__ = _get_length 1004 1005 def _get_data(self): 1006 return self._data 1007 def _set_data(self, data): 1008 self._data = data 1009 1010 data = nodeValue = property(_get_data, _set_data) 1011 1012 def __repr__(self): 1013 data = self.data 1014 if len(data) > 10: 1015 dotdotdot = "..." 1016 else: 1017 dotdotdot = "" 1018 return '<DOM %s node "%r%s">' % ( 1019 self.__class__.__name__, data[0:10], dotdotdot) 1020 1021 def substringData(self, offset, count): 1022 if offset < 0: 1023 raise xml.dom.IndexSizeErr("offset cannot be negative") 1024 if offset >= len(self.data): 1025 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1026 if count < 0: 1027 raise xml.dom.IndexSizeErr("count cannot be negative") 1028 return self.data[offset:offset+count] 1029 1030 def appendData(self, arg): 1031 self.data = self.data + arg 1032 1033 def insertData(self, offset, arg): 1034 if offset < 0: 1035 raise xml.dom.IndexSizeErr("offset cannot be negative") 1036 if offset >= len(self.data): 1037 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1038 if arg: 1039 self.data = "%s%s%s" % ( 1040 self.data[:offset], arg, self.data[offset:]) 1041 1042 def deleteData(self, offset, count): 1043 if offset < 0: 1044 raise xml.dom.IndexSizeErr("offset cannot be negative") 1045 if offset >= len(self.data): 1046 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1047 if count < 0: 1048 raise xml.dom.IndexSizeErr("count cannot be negative") 1049 if count: 1050 self.data = self.data[:offset] + self.data[offset+count:] 1051 1052 def replaceData(self, offset, count, arg): 1053 if offset < 0: 1054 raise xml.dom.IndexSizeErr("offset cannot be negative") 1055 if offset >= len(self.data): 1056 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1057 if count < 0: 1058 raise xml.dom.IndexSizeErr("count cannot be negative") 1059 if count: 1060 self.data = "%s%s%s" % ( 1061 self.data[:offset], arg, self.data[offset+count:]) 1062 1063defproperty(CharacterData, "length", doc="Length of the string data.") 1064 1065 1066class Text(CharacterData): 1067 __slots__ = () 1068 1069 nodeType = Node.TEXT_NODE 1070 nodeName = "#text" 1071 attributes = None 1072 1073 def splitText(self, offset): 1074 if offset < 0 or offset > len(self.data): 1075 raise xml.dom.IndexSizeErr("illegal offset value") 1076 newText = self.__class__() 1077 newText.data = self.data[offset:] 1078 newText.ownerDocument = self.ownerDocument 1079 next = self.nextSibling 1080 if self.parentNode and self in self.parentNode.childNodes: 1081 if next is None: 1082 self.parentNode.appendChild(newText) 1083 else: 1084 self.parentNode.insertBefore(newText, next) 1085 self.data = self.data[:offset] 1086 return newText 1087 1088 def writexml(self, writer, indent="", addindent="", newl=""): 1089 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1090 1091 # DOM Level 3 (WD 9 April 2002) 1092 1093 def _get_wholeText(self): 1094 L = [self.data] 1095 n = self.previousSibling 1096 while n is not None: 1097 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1098 L.insert(0, n.data) 1099 n = n.previousSibling 1100 else: 1101 break 1102 n = self.nextSibling 1103 while n is not None: 1104 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1105 L.append(n.data) 1106 n = n.nextSibling 1107 else: 1108 break 1109 return ''.join(L) 1110 1111 def replaceWholeText(self, content): 1112 # XXX This needs to be seriously changed if minidom ever 1113 # supports EntityReference nodes. 1114 parent = self.parentNode 1115 n = self.previousSibling 1116 while n is not None: 1117 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1118 next = n.previousSibling 1119 parent.removeChild(n) 1120 n = next 1121 else: 1122 break 1123 n = self.nextSibling 1124 if not content: 1125 parent.removeChild(self) 1126 while n is not None: 1127 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1128 next = n.nextSibling 1129 parent.removeChild(n) 1130 n = next 1131 else: 1132 break 1133 if content: 1134 self.data = content 1135 return self 1136 else: 1137 return None 1138 1139 def _get_isWhitespaceInElementContent(self): 1140 if self.data.strip(): 1141 return False 1142 elem = _get_containing_element(self) 1143 if elem is None: 1144 return False 1145 info = self.ownerDocument._get_elem_info(elem) 1146 if info is None: 1147 return False 1148 else: 1149 return info.isElementContent() 1150 1151defproperty(Text, "isWhitespaceInElementContent", 1152 doc="True iff this text node contains only whitespace" 1153 " and is in element content.") 1154defproperty(Text, "wholeText", 1155 doc="The text of all logically-adjacent text nodes.") 1156 1157 1158def _get_containing_element(node): 1159 c = node.parentNode 1160 while c is not None: 1161 if c.nodeType == Node.ELEMENT_NODE: 1162 return c 1163 c = c.parentNode 1164 return None 1165 1166def _get_containing_entref(node): 1167 c = node.parentNode 1168 while c is not None: 1169 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1170 return c 1171 c = c.parentNode 1172 return None 1173 1174 1175class Comment(CharacterData): 1176 nodeType = Node.COMMENT_NODE 1177 nodeName = "#comment" 1178 1179 def __init__(self, data): 1180 CharacterData.__init__(self) 1181 self._data = data 1182 1183 def writexml(self, writer, indent="", addindent="", newl=""): 1184 if "--" in self.data: 1185 raise ValueError("'--' is not allowed in a comment node") 1186 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1187 1188 1189class CDATASection(Text): 1190 __slots__ = () 1191 1192 nodeType = Node.CDATA_SECTION_NODE 1193 nodeName = "#cdata-section" 1194 1195 def writexml(self, writer, indent="", addindent="", newl=""): 1196 if self.data.find("]]>") >= 0: 1197 raise ValueError("']]>' not allowed in a CDATA section") 1198 writer.write("<![CDATA[%s]]>" % self.data) 1199 1200 1201class ReadOnlySequentialNamedNodeMap(object): 1202 __slots__ = '_seq', 1203 1204 def __init__(self, seq=()): 1205 # seq should be a list or tuple 1206 self._seq = seq 1207 1208 def __len__(self): 1209 return len(self._seq) 1210 1211 def _get_length(self): 1212 return len(self._seq) 1213 1214 def getNamedItem(self, name): 1215 for n in self._seq: 1216 if n.nodeName == name: 1217 return n 1218 1219 def getNamedItemNS(self, namespaceURI, localName): 1220 for n in self._seq: 1221 if n.namespaceURI == namespaceURI and n.localName == localName: 1222 return n 1223 1224 def __getitem__(self, name_or_tuple): 1225 if isinstance(name_or_tuple, tuple): 1226 node = self.getNamedItemNS(*name_or_tuple) 1227 else: 1228 node = self.getNamedItem(name_or_tuple) 1229 if node is None: 1230 raise KeyError(name_or_tuple) 1231 return node 1232 1233 def item(self, index): 1234 if index < 0: 1235 return None 1236 try: 1237 return self._seq[index] 1238 except IndexError: 1239 return None 1240 1241 def removeNamedItem(self, name): 1242 raise xml.dom.NoModificationAllowedErr( 1243 "NamedNodeMap instance is read-only") 1244 1245 def removeNamedItemNS(self, namespaceURI, localName): 1246 raise xml.dom.NoModificationAllowedErr( 1247 "NamedNodeMap instance is read-only") 1248 1249 def setNamedItem(self, node): 1250 raise xml.dom.NoModificationAllowedErr( 1251 "NamedNodeMap instance is read-only") 1252 1253 def setNamedItemNS(self, node): 1254 raise xml.dom.NoModificationAllowedErr( 1255 "NamedNodeMap instance is read-only") 1256 1257 def __getstate__(self): 1258 return [self._seq] 1259 1260 def __setstate__(self, state): 1261 self._seq = state[0] 1262 1263defproperty(ReadOnlySequentialNamedNodeMap, "length", 1264 doc="Number of entries in the NamedNodeMap.") 1265 1266 1267class Identified: 1268 """Mix-in class that supports the publicId and systemId attributes.""" 1269 1270 __slots__ = 'publicId', 'systemId' 1271 1272 def _identified_mixin_init(self, publicId, systemId): 1273 self.publicId = publicId 1274 self.systemId = systemId 1275 1276 def _get_publicId(self): 1277 return self.publicId 1278 1279 def _get_systemId(self): 1280 return self.systemId 1281 1282class DocumentType(Identified, Childless, Node): 1283 nodeType = Node.DOCUMENT_TYPE_NODE 1284 nodeValue = None 1285 name = None 1286 publicId = None 1287 systemId = None 1288 internalSubset = None 1289 1290 def __init__(self, qualifiedName): 1291 self.entities = ReadOnlySequentialNamedNodeMap() 1292 self.notations = ReadOnlySequentialNamedNodeMap() 1293 if qualifiedName: 1294 prefix, localname = _nssplit(qualifiedName) 1295 self.name = localname 1296 self.nodeName = self.name 1297 1298 def _get_internalSubset(self): 1299 return self.internalSubset 1300 1301 def cloneNode(self, deep): 1302 if self.ownerDocument is None: 1303 # it's ok 1304 clone = DocumentType(None) 1305 clone.name = self.name 1306 clone.nodeName = self.name 1307 operation = xml.dom.UserDataHandler.NODE_CLONED 1308 if deep: 1309 clone.entities._seq = [] 1310 clone.notations._seq = [] 1311 for n in self.notations._seq: 1312 notation = Notation(n.nodeName, n.publicId, n.systemId) 1313 clone.notations._seq.append(notation) 1314 n._call_user_data_handler(operation, n, notation) 1315 for e in self.entities._seq: 1316 entity = Entity(e.nodeName, e.publicId, e.systemId, 1317 e.notationName) 1318 entity.actualEncoding = e.actualEncoding 1319 entity.encoding = e.encoding 1320 entity.version = e.version 1321 clone.entities._seq.append(entity) 1322 e._call_user_data_handler(operation, e, entity) 1323 self._call_user_data_handler(operation, self, clone) 1324 return clone 1325 else: 1326 return None 1327 1328 def writexml(self, writer, indent="", addindent="", newl=""): 1329 writer.write("<!DOCTYPE ") 1330 writer.write(self.name) 1331 if self.publicId: 1332 writer.write("%s PUBLIC '%s'%s '%s'" 1333 % (newl, self.publicId, newl, self.systemId)) 1334 elif self.systemId: 1335 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1336 if self.internalSubset is not None: 1337 writer.write(" [") 1338 writer.write(self.internalSubset) 1339 writer.write("]") 1340 writer.write(">"+newl) 1341 1342class Entity(Identified, Node): 1343 attributes = None 1344 nodeType = Node.ENTITY_NODE 1345 nodeValue = None 1346 1347 actualEncoding = None 1348 encoding = None 1349 version = None 1350 1351 def __init__(self, name, publicId, systemId, notation): 1352 self.nodeName = name 1353 self.notationName = notation 1354 self.childNodes = NodeList() 1355 self._identified_mixin_init(publicId, systemId) 1356 1357 def _get_actualEncoding(self): 1358 return self.actualEncoding 1359 1360 def _get_encoding(self): 1361 return self.encoding 1362 1363 def _get_version(self): 1364 return self.version 1365 1366 def appendChild(self, newChild): 1367 raise xml.dom.HierarchyRequestErr( 1368 "cannot append children to an entity node") 1369 1370 def insertBefore(self, newChild, refChild): 1371 raise xml.dom.HierarchyRequestErr( 1372 "cannot insert children below an entity node") 1373 1374 def removeChild(self, oldChild): 1375 raise xml.dom.HierarchyRequestErr( 1376 "cannot remove children from an entity node") 1377 1378 def replaceChild(self, newChild, oldChild): 1379 raise xml.dom.HierarchyRequestErr( 1380 "cannot replace children of an entity node") 1381 1382class Notation(Identified, Childless, Node): 1383 nodeType = Node.NOTATION_NODE 1384 nodeValue = None 1385 1386 def __init__(self, name, publicId, systemId): 1387 self.nodeName = name 1388 self._identified_mixin_init(publicId, systemId) 1389 1390 1391class DOMImplementation(DOMImplementationLS): 1392 _features = [("core", "1.0"), 1393 ("core", "2.0"), 1394 ("core", None), 1395 ("xml", "1.0"), 1396 ("xml", "2.0"), 1397 ("xml", None), 1398 ("ls-load", "3.0"), 1399 ("ls-load", None), 1400 ] 1401 1402 def hasFeature(self, feature, version): 1403 if version == "": 1404 version = None 1405 return (feature.lower(), version) in self._features 1406 1407 def createDocument(self, namespaceURI, qualifiedName, doctype): 1408 if doctype and doctype.parentNode is not None: 1409 raise xml.dom.WrongDocumentErr( 1410 "doctype object owned by another DOM tree") 1411 doc = self._create_document() 1412 1413 add_root_element = not (namespaceURI is None 1414 and qualifiedName is None 1415 and doctype is None) 1416 1417 if not qualifiedName and add_root_element: 1418 # The spec is unclear what to raise here; SyntaxErr 1419 # would be the other obvious candidate. Since Xerces raises 1420 # InvalidCharacterErr, and since SyntaxErr is not listed 1421 # for createDocument, that seems to be the better choice. 1422 # XXX: need to check for illegal characters here and in 1423 # createElement. 1424 1425 # DOM Level III clears this up when talking about the return value 1426 # of this function. If namespaceURI, qName and DocType are 1427 # Null the document is returned without a document element 1428 # Otherwise if doctype or namespaceURI are not None 1429 # Then we go back to the above problem 1430 raise xml.dom.InvalidCharacterErr("Element with no name") 1431 1432 if add_root_element: 1433 prefix, localname = _nssplit(qualifiedName) 1434 if prefix == "xml" \ 1435 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1436 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1437 if prefix and not namespaceURI: 1438 raise xml.dom.NamespaceErr( 1439 "illegal use of prefix without namespaces") 1440 element = doc.createElementNS(namespaceURI, qualifiedName) 1441 if doctype: 1442 doc.appendChild(doctype) 1443 doc.appendChild(element) 1444 1445 if doctype: 1446 doctype.parentNode = doctype.ownerDocument = doc 1447 1448 doc.doctype = doctype 1449 doc.implementation = self 1450 return doc 1451 1452 def createDocumentType(self, qualifiedName, publicId, systemId): 1453 doctype = DocumentType(qualifiedName) 1454 doctype.publicId = publicId 1455 doctype.systemId = systemId 1456 return doctype 1457 1458 # DOM Level 3 (WD 9 April 2002) 1459 1460 def getInterface(self, feature): 1461 if self.hasFeature(feature, None): 1462 return self 1463 else: 1464 return None 1465 1466 # internal 1467 def _create_document(self): 1468 return Document() 1469 1470class ElementInfo(object): 1471 """Object that represents content-model information for an element. 1472 1473 This implementation is not expected to be used in practice; DOM 1474 builders should provide implementations which do the right thing 1475 using information available to it. 1476 1477 """ 1478 1479 __slots__ = 'tagName', 1480 1481 def __init__(self, name): 1482 self.tagName = name 1483 1484 def getAttributeType(self, aname): 1485 return _no_type 1486 1487 def getAttributeTypeNS(self, namespaceURI, localName): 1488 return _no_type 1489 1490 def isElementContent(self): 1491 return False 1492 1493 def isEmpty(self): 1494 """Returns true iff this element is declared to have an EMPTY 1495 content model.""" 1496 return False 1497 1498 def isId(self, aname): 1499 """Returns true iff the named attribute is a DTD-style ID.""" 1500 return False 1501 1502 def isIdNS(self, namespaceURI, localName): 1503 """Returns true iff the identified attribute is a DTD-style ID.""" 1504 return False 1505 1506 def __getstate__(self): 1507 return self.tagName 1508 1509 def __setstate__(self, state): 1510 self.tagName = state 1511 1512def _clear_id_cache(node): 1513 if node.nodeType == Node.DOCUMENT_NODE: 1514 node._id_cache.clear() 1515 node._id_search_stack = None 1516 elif _in_document(node): 1517 node.ownerDocument._id_cache.clear() 1518 node.ownerDocument._id_search_stack= None 1519 1520class Document(Node, DocumentLS): 1521 __slots__ = ('_elem_info', 'doctype', 1522 '_id_search_stack', 'childNodes', '_id_cache') 1523 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1524 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1525 1526 implementation = DOMImplementation() 1527 nodeType = Node.DOCUMENT_NODE 1528 nodeName = "#document" 1529 nodeValue = None 1530 attributes = None 1531 parentNode = None 1532 previousSibling = nextSibling = None 1533 1534 1535 # Document attributes from Level 3 (WD 9 April 2002) 1536 1537 actualEncoding = None 1538 encoding = None 1539 standalone = None 1540 version = None 1541 strictErrorChecking = False 1542 errorHandler = None 1543 documentURI = None 1544 1545 _magic_id_count = 0 1546 1547 def __init__(self): 1548 self.doctype = None 1549 self.childNodes = NodeList() 1550 # mapping of (namespaceURI, localName) -> ElementInfo 1551 # and tagName -> ElementInfo 1552 self._elem_info = {} 1553 self._id_cache = {} 1554 self._id_search_stack = None 1555 1556 def _get_elem_info(self, element): 1557 if element.namespaceURI: 1558 key = element.namespaceURI, element.localName 1559 else: 1560 key = element.tagName 1561 return self._elem_info.get(key) 1562 1563 def _get_actualEncoding(self): 1564 return self.actualEncoding 1565 1566 def _get_doctype(self): 1567 return self.doctype 1568 1569 def _get_documentURI(self): 1570 return self.documentURI 1571 1572 def _get_encoding(self): 1573 return self.encoding 1574 1575 def _get_errorHandler(self): 1576 return self.errorHandler 1577 1578 def _get_standalone(self): 1579 return self.standalone 1580 1581 def _get_strictErrorChecking(self): 1582 return self.strictErrorChecking 1583 1584 def _get_version(self): 1585 return self.version 1586 1587 def appendChild(self, node): 1588 if node.nodeType not in self._child_node_types: 1589 raise xml.dom.HierarchyRequestErr( 1590 "%s cannot be child of %s" % (repr(node), repr(self))) 1591 if node.parentNode is not None: 1592 # This needs to be done before the next test since this 1593 # may *be* the document element, in which case it should 1594 # end up re-ordered to the end. 1595 node.parentNode.removeChild(node) 1596 1597 if node.nodeType == Node.ELEMENT_NODE \ 1598 and self._get_documentElement(): 1599 raise xml.dom.HierarchyRequestErr( 1600 "two document elements disallowed") 1601 return Node.appendChild(self, node) 1602 1603 def removeChild(self, oldChild): 1604 try: 1605 self.childNodes.remove(oldChild) 1606 except ValueError: 1607 raise xml.dom.NotFoundErr() 1608 oldChild.nextSibling = oldChild.previousSibling = None 1609 oldChild.parentNode = None 1610 if self.documentElement is oldChild: 1611 self.documentElement = None 1612 1613 return oldChild 1614 1615 def _get_documentElement(self): 1616 for node in self.childNodes: 1617 if node.nodeType == Node.ELEMENT_NODE: 1618 return node 1619 1620 def unlink(self): 1621 if self.doctype is not None: 1622 self.doctype.unlink() 1623 self.doctype = None 1624 Node.unlink(self) 1625 1626 def cloneNode(self, deep): 1627 if not deep: 1628 return None 1629 clone = self.implementation.createDocument(None, None, None) 1630 clone.encoding = self.encoding 1631 clone.standalone = self.standalone 1632 clone.version = self.version 1633 for n in self.childNodes: 1634 childclone = _clone_node(n, deep, clone) 1635 assert childclone.ownerDocument.isSameNode(clone) 1636 clone.childNodes.append(childclone) 1637 if childclone.nodeType == Node.DOCUMENT_NODE: 1638 assert clone.documentElement is None 1639 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1640 assert clone.doctype is None 1641 clone.doctype = childclone 1642 childclone.parentNode = clone 1643 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1644 self, clone) 1645 return clone 1646 1647 def createDocumentFragment(self): 1648 d = DocumentFragment() 1649 d.ownerDocument = self 1650 return d 1651 1652 def createElement(self, tagName): 1653 e = Element(tagName) 1654 e.ownerDocument = self 1655 return e 1656 1657 def createTextNode(self, data): 1658 if not isinstance(data, str): 1659 raise TypeError("node contents must be a string") 1660 t = Text() 1661 t.data = data 1662 t.ownerDocument = self 1663 return t 1664 1665 def createCDATASection(self, data): 1666 if not isinstance(data, str): 1667 raise TypeError("node contents must be a string") 1668 c = CDATASection() 1669 c.data = data 1670 c.ownerDocument = self 1671 return c 1672 1673 def createComment(self, data): 1674 c = Comment(data) 1675 c.ownerDocument = self 1676 return c 1677 1678 def createProcessingInstruction(self, target, data): 1679 p = ProcessingInstruction(target, data) 1680 p.ownerDocument = self 1681 return p 1682 1683 def createAttribute(self, qName): 1684 a = Attr(qName) 1685 a.ownerDocument = self 1686 a.value = "" 1687 return a 1688 1689 def createElementNS(self, namespaceURI, qualifiedName): 1690 prefix, localName = _nssplit(qualifiedName) 1691 e = Element(qualifiedName, namespaceURI, prefix) 1692 e.ownerDocument = self 1693 return e 1694 1695 def createAttributeNS(self, namespaceURI, qualifiedName): 1696 prefix, localName = _nssplit(qualifiedName) 1697 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1698 a.ownerDocument = self 1699 a.value = "" 1700 return a 1701 1702 # A couple of implementation-specific helpers to create node types 1703 # not supported by the W3C DOM specs: 1704 1705 def _create_entity(self, name, publicId, systemId, notationName): 1706 e = Entity(name, publicId, systemId, notationName) 1707 e.ownerDocument = self 1708 return e 1709 1710 def _create_notation(self, name, publicId, systemId): 1711 n = Notation(name, publicId, systemId) 1712 n.ownerDocument = self 1713 return n 1714 1715 def getElementById(self, id): 1716 if id in self._id_cache: 1717 return self._id_cache[id] 1718 if not (self._elem_info or self._magic_id_count): 1719 return None 1720 1721 stack = self._id_search_stack 1722 if stack is None: 1723 # we never searched before, or the cache has been cleared 1724 stack = [self.documentElement] 1725 self._id_search_stack = stack 1726 elif not stack: 1727 # Previous search was completed and cache is still valid; 1728 # no matching node. 1729 return None 1730 1731 result = None 1732 while stack: 1733 node = stack.pop() 1734 # add child elements to stack for continued searching 1735 stack.extend([child for child in node.childNodes 1736 if child.nodeType in _nodeTypes_with_children]) 1737 # check this node 1738 info = self._get_elem_info(node) 1739 if info: 1740 # We have to process all ID attributes before 1741 # returning in order to get all the attributes set to 1742 # be IDs using Element.setIdAttribute*(). 1743 for attr in node.attributes.values(): 1744 if attr.namespaceURI: 1745 if info.isIdNS(attr.namespaceURI, attr.localName): 1746 self._id_cache[attr.value] = node 1747 if attr.value == id: 1748 result = node 1749 elif not node._magic_id_nodes: 1750 break 1751 elif info.isId(attr.name): 1752 self._id_cache[attr.value] = node 1753 if attr.value == id: 1754 result = node 1755 elif not node._magic_id_nodes: 1756 break 1757 elif attr._is_id: 1758 self._id_cache[attr.value] = node 1759 if attr.value == id: 1760 result = node 1761 elif node._magic_id_nodes == 1: 1762 break 1763 elif node._magic_id_nodes: 1764 for attr in node.attributes.values(): 1765 if attr._is_id: 1766 self._id_cache[attr.value] = node 1767 if attr.value == id: 1768 result = node 1769 if result is not None: 1770 break 1771 return result 1772 1773 def getElementsByTagName(self, name): 1774 return _get_elements_by_tagName_helper(self, name, NodeList()) 1775 1776 def getElementsByTagNameNS(self, namespaceURI, localName): 1777 return _get_elements_by_tagName_ns_helper( 1778 self, namespaceURI, localName, NodeList()) 1779 1780 def isSupported(self, feature, version): 1781 return self.implementation.hasFeature(feature, version) 1782 1783 def importNode(self, node, deep): 1784 if node.nodeType == Node.DOCUMENT_NODE: 1785 raise xml.dom.NotSupportedErr("cannot import document nodes") 1786 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1787 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1788 return _clone_node(node, deep, self) 1789 1790 def writexml(self, writer, indent="", addindent="", newl="", encoding=None): 1791 if encoding is None: 1792 writer.write('<?xml version="1.0" ?>'+newl) 1793 else: 1794 writer.write('<?xml version="1.0" encoding="%s"?>%s' % ( 1795 encoding, newl)) 1796 for node in self.childNodes: 1797 node.writexml(writer, indent, addindent, newl) 1798 1799 # DOM Level 3 (WD 9 April 2002) 1800 1801 def renameNode(self, n, namespaceURI, name): 1802 if n.ownerDocument is not self: 1803 raise xml.dom.WrongDocumentErr( 1804 "cannot rename nodes from other documents;\n" 1805 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1806 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1807 raise xml.dom.NotSupportedErr( 1808 "renameNode() only applies to element and attribute nodes") 1809 if namespaceURI != EMPTY_NAMESPACE: 1810 if ':' in name: 1811 prefix, localName = name.split(':', 1) 1812 if ( prefix == "xmlns" 1813 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1814 raise xml.dom.NamespaceErr( 1815 "illegal use of 'xmlns' prefix") 1816 else: 1817 if ( name == "xmlns" 1818 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1819 and n.nodeType == Node.ATTRIBUTE_NODE): 1820 raise xml.dom.NamespaceErr( 1821 "illegal use of the 'xmlns' attribute") 1822 prefix = None 1823 localName = name 1824 else: 1825 prefix = None 1826 localName = None 1827 if n.nodeType == Node.ATTRIBUTE_NODE: 1828 element = n.ownerElement 1829 if element is not None: 1830 is_id = n._is_id 1831 element.removeAttributeNode(n) 1832 else: 1833 element = None 1834 n.prefix = prefix 1835 n._localName = localName 1836 n.namespaceURI = namespaceURI 1837 n.nodeName = name 1838 if n.nodeType == Node.ELEMENT_NODE: 1839 n.tagName = name 1840 else: 1841 # attribute node 1842 n.name = name 1843 if element is not None: 1844 element.setAttributeNode(n) 1845 if is_id: 1846 element.setIdAttributeNode(n) 1847 # It's not clear from a semantic perspective whether we should 1848 # call the user data handlers for the NODE_RENAMED event since 1849 # we're re-using the existing node. The draft spec has been 1850 # interpreted as meaning "no, don't call the handler unless a 1851 # new node is created." 1852 return n 1853 1854defproperty(Document, "documentElement", 1855 doc="Top-level element of this document.") 1856 1857 1858def _clone_node(node, deep, newOwnerDocument): 1859 """ 1860 Clone a node and give it the new owner document. 1861 Called by Node.cloneNode and Document.importNode 1862 """ 1863 if node.ownerDocument.isSameNode(newOwnerDocument): 1864 operation = xml.dom.UserDataHandler.NODE_CLONED 1865 else: 1866 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1867 if node.nodeType == Node.ELEMENT_NODE: 1868 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1869 node.nodeName) 1870 for attr in node.attributes.values(): 1871 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1872 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1873 a.specified = attr.specified 1874 1875 if deep: 1876 for child in node.childNodes: 1877 c = _clone_node(child, deep, newOwnerDocument) 1878 clone.appendChild(c) 1879 1880 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1881 clone = newOwnerDocument.createDocumentFragment() 1882 if deep: 1883 for child in node.childNodes: 1884 c = _clone_node(child, deep, newOwnerDocument) 1885 clone.appendChild(c) 1886 1887 elif node.nodeType == Node.TEXT_NODE: 1888 clone = newOwnerDocument.createTextNode(node.data) 1889 elif node.nodeType == Node.CDATA_SECTION_NODE: 1890 clone = newOwnerDocument.createCDATASection(node.data) 1891 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1892 clone = newOwnerDocument.createProcessingInstruction(node.target, 1893 node.data) 1894 elif node.nodeType == Node.COMMENT_NODE: 1895 clone = newOwnerDocument.createComment(node.data) 1896 elif node.nodeType == Node.ATTRIBUTE_NODE: 1897 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1898 node.nodeName) 1899 clone.specified = True 1900 clone.value = node.value 1901 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1902 assert node.ownerDocument is not newOwnerDocument 1903 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1904 clone = newOwnerDocument.implementation.createDocumentType( 1905 node.name, node.publicId, node.systemId) 1906 clone.ownerDocument = newOwnerDocument 1907 if deep: 1908 clone.entities._seq = [] 1909 clone.notations._seq = [] 1910 for n in node.notations._seq: 1911 notation = Notation(n.nodeName, n.publicId, n.systemId) 1912 notation.ownerDocument = newOwnerDocument 1913 clone.notations._seq.append(notation) 1914 if hasattr(n, '_call_user_data_handler'): 1915 n._call_user_data_handler(operation, n, notation) 1916 for e in node.entities._seq: 1917 entity = Entity(e.nodeName, e.publicId, e.systemId, 1918 e.notationName) 1919 entity.actualEncoding = e.actualEncoding 1920 entity.encoding = e.encoding 1921 entity.version = e.version 1922 entity.ownerDocument = newOwnerDocument 1923 clone.entities._seq.append(entity) 1924 if hasattr(e, '_call_user_data_handler'): 1925 e._call_user_data_handler(operation, e, entity) 1926 else: 1927 # Note the cloning of Document and DocumentType nodes is 1928 # implementation specific. minidom handles those cases 1929 # directly in the cloneNode() methods. 1930 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1931 1932 # Check for _call_user_data_handler() since this could conceivably 1933 # used with other DOM implementations (one of the FourThought 1934 # DOMs, perhaps?). 1935 if hasattr(node, '_call_user_data_handler'): 1936 node._call_user_data_handler(operation, node, clone) 1937 return clone 1938 1939 1940def _nssplit(qualifiedName): 1941 fields = qualifiedName.split(':', 1) 1942 if len(fields) == 2: 1943 return fields 1944 else: 1945 return (None, fields[0]) 1946 1947 1948def _do_pulldom_parse(func, args, kwargs): 1949 events = func(*args, **kwargs) 1950 toktype, rootNode = events.getEvent() 1951 events.expandNode(rootNode) 1952 events.clear() 1953 return rootNode 1954 1955def parse(file, parser=None, bufsize=None): 1956 """Parse a file into a DOM by filename or file object.""" 1957 if parser is None and not bufsize: 1958 from xml.dom import expatbuilder 1959 return expatbuilder.parse(file) 1960 else: 1961 from xml.dom import pulldom 1962 return _do_pulldom_parse(pulldom.parse, (file,), 1963 {'parser': parser, 'bufsize': bufsize}) 1964 1965def parseString(string, parser=None): 1966 """Parse a file into a DOM from a string.""" 1967 if parser is None: 1968 from xml.dom import expatbuilder 1969 return expatbuilder.parseString(string) 1970 else: 1971 from xml.dom import pulldom 1972 return _do_pulldom_parse(pulldom.parseString, (string,), 1973 {'parser': parser}) 1974 1975def getDOMImplementation(features=None): 1976 if features: 1977 if isinstance(features, str): 1978 features = domreg._parse_feature_string(features) 1979 for f, v in features: 1980 if not Document.implementation.hasFeature(f, v): 1981 return None 1982 return Document.implementation 1983