1"""Simple implementation of the Level 1 DOM. 2 3Namespaces and other minor Level 2 features are also supported. 4 5parse("foo.xml") 6 7parseString("<foo><bar/></foo>") 8 9Todo: 10===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16""" 17 18import io 19import xml.dom 20 21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 22from xml.dom.minicompat import * 23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 24 25# This is used by the ID-cache invalidation checks; the list isn't 26# actually complete, since the nodes being checked will never be the 27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 28# the node being added or removed, not the node being modified.) 29# 30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 31 xml.dom.Node.ENTITY_REFERENCE_NODE) 32 33 34class Node(xml.dom.Node): 35 namespaceURI = None # this is non-null only for elements and attributes 36 parentNode = None 37 ownerDocument = None 38 nextSibling = None 39 previousSibling = None 40 41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 42 43 def __bool__(self): 44 return True 45 46 def toxml(self, encoding=None, standalone=None): 47 return self.toprettyxml("", "", encoding, standalone) 48 49 def toprettyxml(self, indent="\t", newl="\n", encoding=None, 50 standalone=None): 51 if encoding is None: 52 writer = io.StringIO() 53 else: 54 writer = io.TextIOWrapper(io.BytesIO(), 55 encoding=encoding, 56 errors="xmlcharrefreplace", 57 newline='\n') 58 if self.nodeType == Node.DOCUMENT_NODE: 59 # Can pass encoding only to document, to put it into XML header 60 self.writexml(writer, "", indent, newl, encoding, standalone) 61 else: 62 self.writexml(writer, "", indent, newl) 63 if encoding is None: 64 return writer.getvalue() 65 else: 66 return writer.detach().getvalue() 67 68 def hasChildNodes(self): 69 return bool(self.childNodes) 70 71 def _get_childNodes(self): 72 return self.childNodes 73 74 def _get_firstChild(self): 75 if self.childNodes: 76 return self.childNodes[0] 77 78 def _get_lastChild(self): 79 if self.childNodes: 80 return self.childNodes[-1] 81 82 def insertBefore(self, newChild, refChild): 83 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 84 for c in tuple(newChild.childNodes): 85 self.insertBefore(c, refChild) 86 ### The DOM does not clearly specify what to return in this case 87 return newChild 88 if newChild.nodeType not in self._child_node_types: 89 raise xml.dom.HierarchyRequestErr( 90 "%s cannot be child of %s" % (repr(newChild), repr(self))) 91 if newChild.parentNode is not None: 92 newChild.parentNode.removeChild(newChild) 93 if refChild is None: 94 self.appendChild(newChild) 95 else: 96 try: 97 index = self.childNodes.index(refChild) 98 except ValueError: 99 raise xml.dom.NotFoundErr() 100 if newChild.nodeType in _nodeTypes_with_children: 101 _clear_id_cache(self) 102 self.childNodes.insert(index, newChild) 103 newChild.nextSibling = refChild 104 refChild.previousSibling = newChild 105 if index: 106 node = self.childNodes[index-1] 107 node.nextSibling = newChild 108 newChild.previousSibling = node 109 else: 110 newChild.previousSibling = None 111 newChild.parentNode = self 112 return newChild 113 114 def appendChild(self, node): 115 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 116 for c in tuple(node.childNodes): 117 self.appendChild(c) 118 ### The DOM does not clearly specify what to return in this case 119 return node 120 if node.nodeType not in self._child_node_types: 121 raise xml.dom.HierarchyRequestErr( 122 "%s cannot be child of %s" % (repr(node), repr(self))) 123 elif node.nodeType in _nodeTypes_with_children: 124 _clear_id_cache(self) 125 if node.parentNode is not None: 126 node.parentNode.removeChild(node) 127 _append_child(self, node) 128 node.nextSibling = None 129 return node 130 131 def replaceChild(self, newChild, oldChild): 132 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 133 refChild = oldChild.nextSibling 134 self.removeChild(oldChild) 135 return self.insertBefore(newChild, refChild) 136 if newChild.nodeType not in self._child_node_types: 137 raise xml.dom.HierarchyRequestErr( 138 "%s cannot be child of %s" % (repr(newChild), repr(self))) 139 if newChild is oldChild: 140 return 141 if newChild.parentNode is not None: 142 newChild.parentNode.removeChild(newChild) 143 try: 144 index = self.childNodes.index(oldChild) 145 except ValueError: 146 raise xml.dom.NotFoundErr() 147 self.childNodes[index] = newChild 148 newChild.parentNode = self 149 oldChild.parentNode = None 150 if (newChild.nodeType in _nodeTypes_with_children 151 or oldChild.nodeType in _nodeTypes_with_children): 152 _clear_id_cache(self) 153 newChild.nextSibling = oldChild.nextSibling 154 newChild.previousSibling = oldChild.previousSibling 155 oldChild.nextSibling = None 156 oldChild.previousSibling = None 157 if newChild.previousSibling: 158 newChild.previousSibling.nextSibling = newChild 159 if newChild.nextSibling: 160 newChild.nextSibling.previousSibling = newChild 161 return oldChild 162 163 def removeChild(self, oldChild): 164 try: 165 self.childNodes.remove(oldChild) 166 except ValueError: 167 raise xml.dom.NotFoundErr() 168 if oldChild.nextSibling is not None: 169 oldChild.nextSibling.previousSibling = oldChild.previousSibling 170 if oldChild.previousSibling is not None: 171 oldChild.previousSibling.nextSibling = oldChild.nextSibling 172 oldChild.nextSibling = oldChild.previousSibling = None 173 if oldChild.nodeType in _nodeTypes_with_children: 174 _clear_id_cache(self) 175 176 oldChild.parentNode = None 177 return oldChild 178 179 def normalize(self): 180 L = [] 181 for child in self.childNodes: 182 if child.nodeType == Node.TEXT_NODE: 183 if not child.data: 184 # empty text node; discard 185 if L: 186 L[-1].nextSibling = child.nextSibling 187 if child.nextSibling: 188 child.nextSibling.previousSibling = child.previousSibling 189 child.unlink() 190 elif L and L[-1].nodeType == child.nodeType: 191 # collapse text node 192 node = L[-1] 193 node.data = node.data + child.data 194 node.nextSibling = child.nextSibling 195 if child.nextSibling: 196 child.nextSibling.previousSibling = node 197 child.unlink() 198 else: 199 L.append(child) 200 else: 201 L.append(child) 202 if child.nodeType == Node.ELEMENT_NODE: 203 child.normalize() 204 self.childNodes[:] = L 205 206 def cloneNode(self, deep): 207 return _clone_node(self, deep, self.ownerDocument or self) 208 209 def isSupported(self, feature, version): 210 return self.ownerDocument.implementation.hasFeature(feature, version) 211 212 def _get_localName(self): 213 # Overridden in Element and Attr where localName can be Non-Null 214 return None 215 216 # Node interfaces from Level 3 (WD 9 April 2002) 217 218 def isSameNode(self, other): 219 return self is other 220 221 def getInterface(self, feature): 222 if self.isSupported(feature, None): 223 return self 224 else: 225 return None 226 227 # The "user data" functions use a dictionary that is only present 228 # if some user data has been set, so be careful not to assume it 229 # exists. 230 231 def getUserData(self, key): 232 try: 233 return self._user_data[key][0] 234 except (AttributeError, KeyError): 235 return None 236 237 def setUserData(self, key, data, handler): 238 old = None 239 try: 240 d = self._user_data 241 except AttributeError: 242 d = {} 243 self._user_data = d 244 if key in d: 245 old = d[key][0] 246 if data is None: 247 # ignore handlers passed for None 248 handler = None 249 if old is not None: 250 del d[key] 251 else: 252 d[key] = (data, handler) 253 return old 254 255 def _call_user_data_handler(self, operation, src, dst): 256 if hasattr(self, "_user_data"): 257 for key, (data, handler) in list(self._user_data.items()): 258 if handler is not None: 259 handler.handle(operation, key, data, src, dst) 260 261 # minidom-specific API: 262 263 def unlink(self): 264 self.parentNode = self.ownerDocument = None 265 if self.childNodes: 266 for child in self.childNodes: 267 child.unlink() 268 self.childNodes = NodeList() 269 self.previousSibling = None 270 self.nextSibling = None 271 272 # A Node is its own context manager, to ensure that an unlink() call occurs. 273 # This is similar to how a file object works. 274 def __enter__(self): 275 return self 276 277 def __exit__(self, et, ev, tb): 278 self.unlink() 279 280defproperty(Node, "firstChild", doc="First child node, or None.") 281defproperty(Node, "lastChild", doc="Last child node, or None.") 282defproperty(Node, "localName", doc="Namespace-local name of this node.") 283 284 285def _append_child(self, node): 286 # fast path with less checks; usable by DOM builders if careful 287 childNodes = self.childNodes 288 if childNodes: 289 last = childNodes[-1] 290 node.previousSibling = last 291 last.nextSibling = node 292 childNodes.append(node) 293 node.parentNode = self 294 295def _in_document(node): 296 # return True iff node is part of a document tree 297 while node is not None: 298 if node.nodeType == Node.DOCUMENT_NODE: 299 return True 300 node = node.parentNode 301 return False 302 303def _write_data(writer, data): 304 "Writes datachars to writer." 305 if data: 306 data = data.replace("&", "&").replace("<", "<"). \ 307 replace("\"", """).replace(">", ">") 308 writer.write(data) 309 310def _get_elements_by_tagName_helper(parent, name, rc): 311 for node in parent.childNodes: 312 if node.nodeType == Node.ELEMENT_NODE and \ 313 (name == "*" or node.tagName == name): 314 rc.append(node) 315 _get_elements_by_tagName_helper(node, name, rc) 316 return rc 317 318def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 319 for node in parent.childNodes: 320 if node.nodeType == Node.ELEMENT_NODE: 321 if ((localName == "*" or node.localName == localName) and 322 (nsURI == "*" or node.namespaceURI == nsURI)): 323 rc.append(node) 324 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 325 return rc 326 327class DocumentFragment(Node): 328 nodeType = Node.DOCUMENT_FRAGMENT_NODE 329 nodeName = "#document-fragment" 330 nodeValue = None 331 attributes = None 332 parentNode = None 333 _child_node_types = (Node.ELEMENT_NODE, 334 Node.TEXT_NODE, 335 Node.CDATA_SECTION_NODE, 336 Node.ENTITY_REFERENCE_NODE, 337 Node.PROCESSING_INSTRUCTION_NODE, 338 Node.COMMENT_NODE, 339 Node.NOTATION_NODE) 340 341 def __init__(self): 342 self.childNodes = NodeList() 343 344 345class Attr(Node): 346 __slots__=('_name', '_value', 'namespaceURI', 347 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement') 348 nodeType = Node.ATTRIBUTE_NODE 349 attributes = None 350 specified = False 351 _is_id = False 352 353 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 354 355 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 356 prefix=None): 357 self.ownerElement = None 358 self._name = qName 359 self.namespaceURI = namespaceURI 360 self._prefix = prefix 361 self.childNodes = NodeList() 362 363 # Add the single child node that represents the value of the attr 364 self.childNodes.append(Text()) 365 366 # nodeValue and value are set elsewhere 367 368 def _get_localName(self): 369 try: 370 return self._localName 371 except AttributeError: 372 return self.nodeName.split(":", 1)[-1] 373 374 def _get_specified(self): 375 return self.specified 376 377 def _get_name(self): 378 return self._name 379 380 def _set_name(self, value): 381 self._name = value 382 if self.ownerElement is not None: 383 _clear_id_cache(self.ownerElement) 384 385 nodeName = name = property(_get_name, _set_name) 386 387 def _get_value(self): 388 return self._value 389 390 def _set_value(self, value): 391 self._value = value 392 self.childNodes[0].data = value 393 if self.ownerElement is not None: 394 _clear_id_cache(self.ownerElement) 395 self.childNodes[0].data = value 396 397 nodeValue = value = property(_get_value, _set_value) 398 399 def _get_prefix(self): 400 return self._prefix 401 402 def _set_prefix(self, prefix): 403 nsuri = self.namespaceURI 404 if prefix == "xmlns": 405 if nsuri and nsuri != XMLNS_NAMESPACE: 406 raise xml.dom.NamespaceErr( 407 "illegal use of 'xmlns' prefix for the wrong namespace") 408 self._prefix = prefix 409 if prefix is None: 410 newName = self.localName 411 else: 412 newName = "%s:%s" % (prefix, self.localName) 413 if self.ownerElement: 414 _clear_id_cache(self.ownerElement) 415 self.name = newName 416 417 prefix = property(_get_prefix, _set_prefix) 418 419 def unlink(self): 420 # This implementation does not call the base implementation 421 # since most of that is not needed, and the expense of the 422 # method call is not warranted. We duplicate the removal of 423 # children, but that's all we needed from the base class. 424 elem = self.ownerElement 425 if elem is not None: 426 del elem._attrs[self.nodeName] 427 del elem._attrsNS[(self.namespaceURI, self.localName)] 428 if self._is_id: 429 self._is_id = False 430 elem._magic_id_nodes -= 1 431 self.ownerDocument._magic_id_count -= 1 432 for child in self.childNodes: 433 child.unlink() 434 del self.childNodes[:] 435 436 def _get_isId(self): 437 if self._is_id: 438 return True 439 doc = self.ownerDocument 440 elem = self.ownerElement 441 if doc is None or elem is None: 442 return False 443 444 info = doc._get_elem_info(elem) 445 if info is None: 446 return False 447 if self.namespaceURI: 448 return info.isIdNS(self.namespaceURI, self.localName) 449 else: 450 return info.isId(self.nodeName) 451 452 def _get_schemaType(self): 453 doc = self.ownerDocument 454 elem = self.ownerElement 455 if doc is None or elem is None: 456 return _no_type 457 458 info = doc._get_elem_info(elem) 459 if info is None: 460 return _no_type 461 if self.namespaceURI: 462 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 463 else: 464 return info.getAttributeType(self.nodeName) 465 466defproperty(Attr, "isId", doc="True if this attribute is an ID.") 467defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 468defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 469 470 471class NamedNodeMap(object): 472 """The attribute list is a transient interface to the underlying 473 dictionaries. Mutations here will change the underlying element's 474 dictionary. 475 476 Ordering is imposed artificially and does not reflect the order of 477 attributes as found in an input document. 478 """ 479 480 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 481 482 def __init__(self, attrs, attrsNS, ownerElement): 483 self._attrs = attrs 484 self._attrsNS = attrsNS 485 self._ownerElement = ownerElement 486 487 def _get_length(self): 488 return len(self._attrs) 489 490 def item(self, index): 491 try: 492 return self[list(self._attrs.keys())[index]] 493 except IndexError: 494 return None 495 496 def items(self): 497 L = [] 498 for node in self._attrs.values(): 499 L.append((node.nodeName, node.value)) 500 return L 501 502 def itemsNS(self): 503 L = [] 504 for node in self._attrs.values(): 505 L.append(((node.namespaceURI, node.localName), node.value)) 506 return L 507 508 def __contains__(self, key): 509 if isinstance(key, str): 510 return key in self._attrs 511 else: 512 return key in self._attrsNS 513 514 def keys(self): 515 return self._attrs.keys() 516 517 def keysNS(self): 518 return self._attrsNS.keys() 519 520 def values(self): 521 return self._attrs.values() 522 523 def get(self, name, value=None): 524 return self._attrs.get(name, value) 525 526 __len__ = _get_length 527 528 def _cmp(self, other): 529 if self._attrs is getattr(other, "_attrs", None): 530 return 0 531 else: 532 return (id(self) > id(other)) - (id(self) < id(other)) 533 534 def __eq__(self, other): 535 return self._cmp(other) == 0 536 537 def __ge__(self, other): 538 return self._cmp(other) >= 0 539 540 def __gt__(self, other): 541 return self._cmp(other) > 0 542 543 def __le__(self, other): 544 return self._cmp(other) <= 0 545 546 def __lt__(self, other): 547 return self._cmp(other) < 0 548 549 def __getitem__(self, attname_or_tuple): 550 if isinstance(attname_or_tuple, tuple): 551 return self._attrsNS[attname_or_tuple] 552 else: 553 return self._attrs[attname_or_tuple] 554 555 # same as set 556 def __setitem__(self, attname, value): 557 if isinstance(value, str): 558 try: 559 node = self._attrs[attname] 560 except KeyError: 561 node = Attr(attname) 562 node.ownerDocument = self._ownerElement.ownerDocument 563 self.setNamedItem(node) 564 node.value = value 565 else: 566 if not isinstance(value, Attr): 567 raise TypeError("value must be a string or Attr object") 568 node = value 569 self.setNamedItem(node) 570 571 def getNamedItem(self, name): 572 try: 573 return self._attrs[name] 574 except KeyError: 575 return None 576 577 def getNamedItemNS(self, namespaceURI, localName): 578 try: 579 return self._attrsNS[(namespaceURI, localName)] 580 except KeyError: 581 return None 582 583 def removeNamedItem(self, name): 584 n = self.getNamedItem(name) 585 if n is not None: 586 _clear_id_cache(self._ownerElement) 587 del self._attrs[n.nodeName] 588 del self._attrsNS[(n.namespaceURI, n.localName)] 589 if hasattr(n, 'ownerElement'): 590 n.ownerElement = None 591 return n 592 else: 593 raise xml.dom.NotFoundErr() 594 595 def removeNamedItemNS(self, namespaceURI, localName): 596 n = self.getNamedItemNS(namespaceURI, localName) 597 if n is not None: 598 _clear_id_cache(self._ownerElement) 599 del self._attrsNS[(n.namespaceURI, n.localName)] 600 del self._attrs[n.nodeName] 601 if hasattr(n, 'ownerElement'): 602 n.ownerElement = None 603 return n 604 else: 605 raise xml.dom.NotFoundErr() 606 607 def setNamedItem(self, node): 608 if not isinstance(node, Attr): 609 raise xml.dom.HierarchyRequestErr( 610 "%s cannot be child of %s" % (repr(node), repr(self))) 611 old = self._attrs.get(node.name) 612 if old: 613 old.unlink() 614 self._attrs[node.name] = node 615 self._attrsNS[(node.namespaceURI, node.localName)] = node 616 node.ownerElement = self._ownerElement 617 _clear_id_cache(node.ownerElement) 618 return old 619 620 def setNamedItemNS(self, node): 621 return self.setNamedItem(node) 622 623 def __delitem__(self, attname_or_tuple): 624 node = self[attname_or_tuple] 625 _clear_id_cache(node.ownerElement) 626 node.unlink() 627 628 def __getstate__(self): 629 return self._attrs, self._attrsNS, self._ownerElement 630 631 def __setstate__(self, state): 632 self._attrs, self._attrsNS, self._ownerElement = state 633 634defproperty(NamedNodeMap, "length", 635 doc="Number of nodes in the NamedNodeMap.") 636 637AttributeList = NamedNodeMap 638 639 640class TypeInfo(object): 641 __slots__ = 'namespace', 'name' 642 643 def __init__(self, namespace, name): 644 self.namespace = namespace 645 self.name = name 646 647 def __repr__(self): 648 if self.namespace: 649 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name, 650 self.namespace) 651 else: 652 return "<%s %r>" % (self.__class__.__name__, self.name) 653 654 def _get_name(self): 655 return self.name 656 657 def _get_namespace(self): 658 return self.namespace 659 660_no_type = TypeInfo(None, None) 661 662class Element(Node): 663 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix', 664 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS', 665 'nextSibling', 'previousSibling') 666 nodeType = Node.ELEMENT_NODE 667 nodeValue = None 668 schemaType = _no_type 669 670 _magic_id_nodes = 0 671 672 _child_node_types = (Node.ELEMENT_NODE, 673 Node.PROCESSING_INSTRUCTION_NODE, 674 Node.COMMENT_NODE, 675 Node.TEXT_NODE, 676 Node.CDATA_SECTION_NODE, 677 Node.ENTITY_REFERENCE_NODE) 678 679 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 680 localName=None): 681 self.parentNode = None 682 self.tagName = self.nodeName = tagName 683 self.prefix = prefix 684 self.namespaceURI = namespaceURI 685 self.childNodes = NodeList() 686 self.nextSibling = self.previousSibling = None 687 688 # Attribute dictionaries are lazily created 689 # attributes are double-indexed: 690 # tagName -> Attribute 691 # URI,localName -> Attribute 692 # in the future: consider lazy generation 693 # of attribute objects this is too tricky 694 # for now because of headaches with 695 # namespaces. 696 self._attrs = None 697 self._attrsNS = None 698 699 def _ensure_attributes(self): 700 if self._attrs is None: 701 self._attrs = {} 702 self._attrsNS = {} 703 704 def _get_localName(self): 705 try: 706 return self._localName 707 except AttributeError: 708 return self.tagName.split(":", 1)[-1] 709 710 def _get_tagName(self): 711 return self.tagName 712 713 def unlink(self): 714 if self._attrs is not None: 715 for attr in list(self._attrs.values()): 716 attr.unlink() 717 self._attrs = None 718 self._attrsNS = None 719 Node.unlink(self) 720 721 def getAttribute(self, attname): 722 """Returns the value of the specified attribute. 723 724 Returns the value of the element's attribute named attname as 725 a string. An empty string is returned if the element does not 726 have such an attribute. Note that an empty string may also be 727 returned as an explicitly given attribute value, use the 728 hasAttribute method to distinguish these two cases. 729 """ 730 if self._attrs is None: 731 return "" 732 try: 733 return self._attrs[attname].value 734 except KeyError: 735 return "" 736 737 def getAttributeNS(self, namespaceURI, localName): 738 if self._attrsNS is None: 739 return "" 740 try: 741 return self._attrsNS[(namespaceURI, localName)].value 742 except KeyError: 743 return "" 744 745 def setAttribute(self, attname, value): 746 attr = self.getAttributeNode(attname) 747 if attr is None: 748 attr = Attr(attname) 749 attr.value = value # also sets nodeValue 750 attr.ownerDocument = self.ownerDocument 751 self.setAttributeNode(attr) 752 elif value != attr.value: 753 attr.value = value 754 if attr.isId: 755 _clear_id_cache(self) 756 757 def setAttributeNS(self, namespaceURI, qualifiedName, value): 758 prefix, localname = _nssplit(qualifiedName) 759 attr = self.getAttributeNodeNS(namespaceURI, localname) 760 if attr is None: 761 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 762 attr.value = value 763 attr.ownerDocument = self.ownerDocument 764 self.setAttributeNode(attr) 765 else: 766 if value != attr.value: 767 attr.value = value 768 if attr.isId: 769 _clear_id_cache(self) 770 if attr.prefix != prefix: 771 attr.prefix = prefix 772 attr.nodeName = qualifiedName 773 774 def getAttributeNode(self, attrname): 775 if self._attrs is None: 776 return None 777 return self._attrs.get(attrname) 778 779 def getAttributeNodeNS(self, namespaceURI, localName): 780 if self._attrsNS is None: 781 return None 782 return self._attrsNS.get((namespaceURI, localName)) 783 784 def setAttributeNode(self, attr): 785 if attr.ownerElement not in (None, self): 786 raise xml.dom.InuseAttributeErr("attribute node already owned") 787 self._ensure_attributes() 788 old1 = self._attrs.get(attr.name, None) 789 if old1 is not None: 790 self.removeAttributeNode(old1) 791 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 792 if old2 is not None and old2 is not old1: 793 self.removeAttributeNode(old2) 794 _set_attribute_node(self, attr) 795 796 if old1 is not attr: 797 # It might have already been part of this node, in which case 798 # it doesn't represent a change, and should not be returned. 799 return old1 800 if old2 is not attr: 801 return old2 802 803 setAttributeNodeNS = setAttributeNode 804 805 def removeAttribute(self, name): 806 if self._attrsNS is None: 807 raise xml.dom.NotFoundErr() 808 try: 809 attr = self._attrs[name] 810 except KeyError: 811 raise xml.dom.NotFoundErr() 812 self.removeAttributeNode(attr) 813 814 def removeAttributeNS(self, namespaceURI, localName): 815 if self._attrsNS is None: 816 raise xml.dom.NotFoundErr() 817 try: 818 attr = self._attrsNS[(namespaceURI, localName)] 819 except KeyError: 820 raise xml.dom.NotFoundErr() 821 self.removeAttributeNode(attr) 822 823 def removeAttributeNode(self, node): 824 if node is None: 825 raise xml.dom.NotFoundErr() 826 try: 827 self._attrs[node.name] 828 except KeyError: 829 raise xml.dom.NotFoundErr() 830 _clear_id_cache(self) 831 node.unlink() 832 # Restore this since the node is still useful and otherwise 833 # unlinked 834 node.ownerDocument = self.ownerDocument 835 return node 836 837 removeAttributeNodeNS = removeAttributeNode 838 839 def hasAttribute(self, name): 840 """Checks whether the element has an attribute with the specified name. 841 842 Returns True if the element has an attribute with the specified name. 843 Otherwise, returns False. 844 """ 845 if self._attrs is None: 846 return False 847 return name in self._attrs 848 849 def hasAttributeNS(self, namespaceURI, localName): 850 if self._attrsNS is None: 851 return False 852 return (namespaceURI, localName) in self._attrsNS 853 854 def getElementsByTagName(self, name): 855 """Returns all descendant elements with the given tag name. 856 857 Returns the list of all descendant elements (not direct children 858 only) with the specified tag name. 859 """ 860 return _get_elements_by_tagName_helper(self, name, NodeList()) 861 862 def getElementsByTagNameNS(self, namespaceURI, localName): 863 return _get_elements_by_tagName_ns_helper( 864 self, namespaceURI, localName, NodeList()) 865 866 def __repr__(self): 867 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 868 869 def writexml(self, writer, indent="", addindent="", newl=""): 870 """Write an XML element to a file-like object 871 872 Write the element to the writer object that must provide 873 a write method (e.g. a file or StringIO object). 874 """ 875 # indent = current indentation 876 # addindent = indentation to add to higher levels 877 # newl = newline string 878 writer.write(indent+"<" + self.tagName) 879 880 attrs = self._get_attributes() 881 882 for a_name in attrs.keys(): 883 writer.write(" %s=\"" % a_name) 884 _write_data(writer, attrs[a_name].value) 885 writer.write("\"") 886 if self.childNodes: 887 writer.write(">") 888 if (len(self.childNodes) == 1 and 889 self.childNodes[0].nodeType in ( 890 Node.TEXT_NODE, Node.CDATA_SECTION_NODE)): 891 self.childNodes[0].writexml(writer, '', '', '') 892 else: 893 writer.write(newl) 894 for node in self.childNodes: 895 node.writexml(writer, indent+addindent, addindent, newl) 896 writer.write(indent) 897 writer.write("</%s>%s" % (self.tagName, newl)) 898 else: 899 writer.write("/>%s"%(newl)) 900 901 def _get_attributes(self): 902 self._ensure_attributes() 903 return NamedNodeMap(self._attrs, self._attrsNS, self) 904 905 def hasAttributes(self): 906 if self._attrs: 907 return True 908 else: 909 return False 910 911 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 912 913 def setIdAttribute(self, name): 914 idAttr = self.getAttributeNode(name) 915 self.setIdAttributeNode(idAttr) 916 917 def setIdAttributeNS(self, namespaceURI, localName): 918 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 919 self.setIdAttributeNode(idAttr) 920 921 def setIdAttributeNode(self, idAttr): 922 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 923 raise xml.dom.NotFoundErr() 924 if _get_containing_entref(self) is not None: 925 raise xml.dom.NoModificationAllowedErr() 926 if not idAttr._is_id: 927 idAttr._is_id = True 928 self._magic_id_nodes += 1 929 self.ownerDocument._magic_id_count += 1 930 _clear_id_cache(self) 931 932defproperty(Element, "attributes", 933 doc="NamedNodeMap of attributes on the element.") 934defproperty(Element, "localName", 935 doc="Namespace-local name of this element.") 936 937 938def _set_attribute_node(element, attr): 939 _clear_id_cache(element) 940 element._ensure_attributes() 941 element._attrs[attr.name] = attr 942 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 943 944 # This creates a circular reference, but Element.unlink() 945 # breaks the cycle since the references to the attribute 946 # dictionaries are tossed. 947 attr.ownerElement = element 948 949class Childless: 950 """Mixin that makes childless-ness easy to implement and avoids 951 the complexity of the Node methods that deal with children. 952 """ 953 __slots__ = () 954 955 attributes = None 956 childNodes = EmptyNodeList() 957 firstChild = None 958 lastChild = None 959 960 def _get_firstChild(self): 961 return None 962 963 def _get_lastChild(self): 964 return None 965 966 def appendChild(self, node): 967 raise xml.dom.HierarchyRequestErr( 968 self.nodeName + " nodes cannot have children") 969 970 def hasChildNodes(self): 971 return False 972 973 def insertBefore(self, newChild, refChild): 974 raise xml.dom.HierarchyRequestErr( 975 self.nodeName + " nodes do not have children") 976 977 def removeChild(self, oldChild): 978 raise xml.dom.NotFoundErr( 979 self.nodeName + " nodes do not have children") 980 981 def normalize(self): 982 # For childless nodes, normalize() has nothing to do. 983 pass 984 985 def replaceChild(self, newChild, oldChild): 986 raise xml.dom.HierarchyRequestErr( 987 self.nodeName + " nodes do not have children") 988 989 990class ProcessingInstruction(Childless, Node): 991 nodeType = Node.PROCESSING_INSTRUCTION_NODE 992 __slots__ = ('target', 'data') 993 994 def __init__(self, target, data): 995 self.target = target 996 self.data = data 997 998 # nodeValue is an alias for data 999 def _get_nodeValue(self): 1000 return self.data 1001 def _set_nodeValue(self, value): 1002 self.data = value 1003 nodeValue = property(_get_nodeValue, _set_nodeValue) 1004 1005 # nodeName is an alias for target 1006 def _get_nodeName(self): 1007 return self.target 1008 def _set_nodeName(self, value): 1009 self.target = value 1010 nodeName = property(_get_nodeName, _set_nodeName) 1011 1012 def writexml(self, writer, indent="", addindent="", newl=""): 1013 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 1014 1015 1016class CharacterData(Childless, Node): 1017 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling') 1018 1019 def __init__(self): 1020 self.ownerDocument = self.parentNode = None 1021 self.previousSibling = self.nextSibling = None 1022 self._data = '' 1023 Node.__init__(self) 1024 1025 def _get_length(self): 1026 return len(self.data) 1027 __len__ = _get_length 1028 1029 def _get_data(self): 1030 return self._data 1031 def _set_data(self, data): 1032 self._data = data 1033 1034 data = nodeValue = property(_get_data, _set_data) 1035 1036 def __repr__(self): 1037 data = self.data 1038 if len(data) > 10: 1039 dotdotdot = "..." 1040 else: 1041 dotdotdot = "" 1042 return '<DOM %s node "%r%s">' % ( 1043 self.__class__.__name__, data[0:10], dotdotdot) 1044 1045 def substringData(self, offset, count): 1046 if offset < 0: 1047 raise xml.dom.IndexSizeErr("offset cannot be negative") 1048 if offset >= len(self.data): 1049 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1050 if count < 0: 1051 raise xml.dom.IndexSizeErr("count cannot be negative") 1052 return self.data[offset:offset+count] 1053 1054 def appendData(self, arg): 1055 self.data = self.data + arg 1056 1057 def insertData(self, offset, arg): 1058 if offset < 0: 1059 raise xml.dom.IndexSizeErr("offset cannot be negative") 1060 if offset >= len(self.data): 1061 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1062 if arg: 1063 self.data = "%s%s%s" % ( 1064 self.data[:offset], arg, self.data[offset:]) 1065 1066 def deleteData(self, offset, count): 1067 if offset < 0: 1068 raise xml.dom.IndexSizeErr("offset cannot be negative") 1069 if offset >= len(self.data): 1070 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1071 if count < 0: 1072 raise xml.dom.IndexSizeErr("count cannot be negative") 1073 if count: 1074 self.data = self.data[:offset] + self.data[offset+count:] 1075 1076 def replaceData(self, offset, count, arg): 1077 if offset < 0: 1078 raise xml.dom.IndexSizeErr("offset cannot be negative") 1079 if offset >= len(self.data): 1080 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1081 if count < 0: 1082 raise xml.dom.IndexSizeErr("count cannot be negative") 1083 if count: 1084 self.data = "%s%s%s" % ( 1085 self.data[:offset], arg, self.data[offset+count:]) 1086 1087defproperty(CharacterData, "length", doc="Length of the string data.") 1088 1089 1090class Text(CharacterData): 1091 __slots__ = () 1092 1093 nodeType = Node.TEXT_NODE 1094 nodeName = "#text" 1095 attributes = None 1096 1097 def splitText(self, offset): 1098 if offset < 0 or offset > len(self.data): 1099 raise xml.dom.IndexSizeErr("illegal offset value") 1100 newText = self.__class__() 1101 newText.data = self.data[offset:] 1102 newText.ownerDocument = self.ownerDocument 1103 next = self.nextSibling 1104 if self.parentNode and self in self.parentNode.childNodes: 1105 if next is None: 1106 self.parentNode.appendChild(newText) 1107 else: 1108 self.parentNode.insertBefore(newText, next) 1109 self.data = self.data[:offset] 1110 return newText 1111 1112 def writexml(self, writer, indent="", addindent="", newl=""): 1113 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1114 1115 # DOM Level 3 (WD 9 April 2002) 1116 1117 def _get_wholeText(self): 1118 L = [self.data] 1119 n = self.previousSibling 1120 while n is not None: 1121 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1122 L.insert(0, n.data) 1123 n = n.previousSibling 1124 else: 1125 break 1126 n = self.nextSibling 1127 while n is not None: 1128 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1129 L.append(n.data) 1130 n = n.nextSibling 1131 else: 1132 break 1133 return ''.join(L) 1134 1135 def replaceWholeText(self, content): 1136 # XXX This needs to be seriously changed if minidom ever 1137 # supports EntityReference nodes. 1138 parent = self.parentNode 1139 n = self.previousSibling 1140 while n is not None: 1141 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1142 next = n.previousSibling 1143 parent.removeChild(n) 1144 n = next 1145 else: 1146 break 1147 n = self.nextSibling 1148 if not content: 1149 parent.removeChild(self) 1150 while n is not None: 1151 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1152 next = n.nextSibling 1153 parent.removeChild(n) 1154 n = next 1155 else: 1156 break 1157 if content: 1158 self.data = content 1159 return self 1160 else: 1161 return None 1162 1163 def _get_isWhitespaceInElementContent(self): 1164 if self.data.strip(): 1165 return False 1166 elem = _get_containing_element(self) 1167 if elem is None: 1168 return False 1169 info = self.ownerDocument._get_elem_info(elem) 1170 if info is None: 1171 return False 1172 else: 1173 return info.isElementContent() 1174 1175defproperty(Text, "isWhitespaceInElementContent", 1176 doc="True iff this text node contains only whitespace" 1177 " and is in element content.") 1178defproperty(Text, "wholeText", 1179 doc="The text of all logically-adjacent text nodes.") 1180 1181 1182def _get_containing_element(node): 1183 c = node.parentNode 1184 while c is not None: 1185 if c.nodeType == Node.ELEMENT_NODE: 1186 return c 1187 c = c.parentNode 1188 return None 1189 1190def _get_containing_entref(node): 1191 c = node.parentNode 1192 while c is not None: 1193 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1194 return c 1195 c = c.parentNode 1196 return None 1197 1198 1199class Comment(CharacterData): 1200 nodeType = Node.COMMENT_NODE 1201 nodeName = "#comment" 1202 1203 def __init__(self, data): 1204 CharacterData.__init__(self) 1205 self._data = data 1206 1207 def writexml(self, writer, indent="", addindent="", newl=""): 1208 if "--" in self.data: 1209 raise ValueError("'--' is not allowed in a comment node") 1210 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1211 1212 1213class CDATASection(Text): 1214 __slots__ = () 1215 1216 nodeType = Node.CDATA_SECTION_NODE 1217 nodeName = "#cdata-section" 1218 1219 def writexml(self, writer, indent="", addindent="", newl=""): 1220 if self.data.find("]]>") >= 0: 1221 raise ValueError("']]>' not allowed in a CDATA section") 1222 writer.write("<![CDATA[%s]]>" % self.data) 1223 1224 1225class ReadOnlySequentialNamedNodeMap(object): 1226 __slots__ = '_seq', 1227 1228 def __init__(self, seq=()): 1229 # seq should be a list or tuple 1230 self._seq = seq 1231 1232 def __len__(self): 1233 return len(self._seq) 1234 1235 def _get_length(self): 1236 return len(self._seq) 1237 1238 def getNamedItem(self, name): 1239 for n in self._seq: 1240 if n.nodeName == name: 1241 return n 1242 1243 def getNamedItemNS(self, namespaceURI, localName): 1244 for n in self._seq: 1245 if n.namespaceURI == namespaceURI and n.localName == localName: 1246 return n 1247 1248 def __getitem__(self, name_or_tuple): 1249 if isinstance(name_or_tuple, tuple): 1250 node = self.getNamedItemNS(*name_or_tuple) 1251 else: 1252 node = self.getNamedItem(name_or_tuple) 1253 if node is None: 1254 raise KeyError(name_or_tuple) 1255 return node 1256 1257 def item(self, index): 1258 if index < 0: 1259 return None 1260 try: 1261 return self._seq[index] 1262 except IndexError: 1263 return None 1264 1265 def removeNamedItem(self, name): 1266 raise xml.dom.NoModificationAllowedErr( 1267 "NamedNodeMap instance is read-only") 1268 1269 def removeNamedItemNS(self, namespaceURI, localName): 1270 raise xml.dom.NoModificationAllowedErr( 1271 "NamedNodeMap instance is read-only") 1272 1273 def setNamedItem(self, node): 1274 raise xml.dom.NoModificationAllowedErr( 1275 "NamedNodeMap instance is read-only") 1276 1277 def setNamedItemNS(self, node): 1278 raise xml.dom.NoModificationAllowedErr( 1279 "NamedNodeMap instance is read-only") 1280 1281 def __getstate__(self): 1282 return [self._seq] 1283 1284 def __setstate__(self, state): 1285 self._seq = state[0] 1286 1287defproperty(ReadOnlySequentialNamedNodeMap, "length", 1288 doc="Number of entries in the NamedNodeMap.") 1289 1290 1291class Identified: 1292 """Mix-in class that supports the publicId and systemId attributes.""" 1293 1294 __slots__ = 'publicId', 'systemId' 1295 1296 def _identified_mixin_init(self, publicId, systemId): 1297 self.publicId = publicId 1298 self.systemId = systemId 1299 1300 def _get_publicId(self): 1301 return self.publicId 1302 1303 def _get_systemId(self): 1304 return self.systemId 1305 1306class DocumentType(Identified, Childless, Node): 1307 nodeType = Node.DOCUMENT_TYPE_NODE 1308 nodeValue = None 1309 name = None 1310 publicId = None 1311 systemId = None 1312 internalSubset = None 1313 1314 def __init__(self, qualifiedName): 1315 self.entities = ReadOnlySequentialNamedNodeMap() 1316 self.notations = ReadOnlySequentialNamedNodeMap() 1317 if qualifiedName: 1318 prefix, localname = _nssplit(qualifiedName) 1319 self.name = localname 1320 self.nodeName = self.name 1321 1322 def _get_internalSubset(self): 1323 return self.internalSubset 1324 1325 def cloneNode(self, deep): 1326 if self.ownerDocument is None: 1327 # it's ok 1328 clone = DocumentType(None) 1329 clone.name = self.name 1330 clone.nodeName = self.name 1331 operation = xml.dom.UserDataHandler.NODE_CLONED 1332 if deep: 1333 clone.entities._seq = [] 1334 clone.notations._seq = [] 1335 for n in self.notations._seq: 1336 notation = Notation(n.nodeName, n.publicId, n.systemId) 1337 clone.notations._seq.append(notation) 1338 n._call_user_data_handler(operation, n, notation) 1339 for e in self.entities._seq: 1340 entity = Entity(e.nodeName, e.publicId, e.systemId, 1341 e.notationName) 1342 entity.actualEncoding = e.actualEncoding 1343 entity.encoding = e.encoding 1344 entity.version = e.version 1345 clone.entities._seq.append(entity) 1346 e._call_user_data_handler(operation, e, entity) 1347 self._call_user_data_handler(operation, self, clone) 1348 return clone 1349 else: 1350 return None 1351 1352 def writexml(self, writer, indent="", addindent="", newl=""): 1353 writer.write("<!DOCTYPE ") 1354 writer.write(self.name) 1355 if self.publicId: 1356 writer.write("%s PUBLIC '%s'%s '%s'" 1357 % (newl, self.publicId, newl, self.systemId)) 1358 elif self.systemId: 1359 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1360 if self.internalSubset is not None: 1361 writer.write(" [") 1362 writer.write(self.internalSubset) 1363 writer.write("]") 1364 writer.write(">"+newl) 1365 1366class Entity(Identified, Node): 1367 attributes = None 1368 nodeType = Node.ENTITY_NODE 1369 nodeValue = None 1370 1371 actualEncoding = None 1372 encoding = None 1373 version = None 1374 1375 def __init__(self, name, publicId, systemId, notation): 1376 self.nodeName = name 1377 self.notationName = notation 1378 self.childNodes = NodeList() 1379 self._identified_mixin_init(publicId, systemId) 1380 1381 def _get_actualEncoding(self): 1382 return self.actualEncoding 1383 1384 def _get_encoding(self): 1385 return self.encoding 1386 1387 def _get_version(self): 1388 return self.version 1389 1390 def appendChild(self, newChild): 1391 raise xml.dom.HierarchyRequestErr( 1392 "cannot append children to an entity node") 1393 1394 def insertBefore(self, newChild, refChild): 1395 raise xml.dom.HierarchyRequestErr( 1396 "cannot insert children below an entity node") 1397 1398 def removeChild(self, oldChild): 1399 raise xml.dom.HierarchyRequestErr( 1400 "cannot remove children from an entity node") 1401 1402 def replaceChild(self, newChild, oldChild): 1403 raise xml.dom.HierarchyRequestErr( 1404 "cannot replace children of an entity node") 1405 1406class Notation(Identified, Childless, Node): 1407 nodeType = Node.NOTATION_NODE 1408 nodeValue = None 1409 1410 def __init__(self, name, publicId, systemId): 1411 self.nodeName = name 1412 self._identified_mixin_init(publicId, systemId) 1413 1414 1415class DOMImplementation(DOMImplementationLS): 1416 _features = [("core", "1.0"), 1417 ("core", "2.0"), 1418 ("core", None), 1419 ("xml", "1.0"), 1420 ("xml", "2.0"), 1421 ("xml", None), 1422 ("ls-load", "3.0"), 1423 ("ls-load", None), 1424 ] 1425 1426 def hasFeature(self, feature, version): 1427 if version == "": 1428 version = None 1429 return (feature.lower(), version) in self._features 1430 1431 def createDocument(self, namespaceURI, qualifiedName, doctype): 1432 if doctype and doctype.parentNode is not None: 1433 raise xml.dom.WrongDocumentErr( 1434 "doctype object owned by another DOM tree") 1435 doc = self._create_document() 1436 1437 add_root_element = not (namespaceURI is None 1438 and qualifiedName is None 1439 and doctype is None) 1440 1441 if not qualifiedName and add_root_element: 1442 # The spec is unclear what to raise here; SyntaxErr 1443 # would be the other obvious candidate. Since Xerces raises 1444 # InvalidCharacterErr, and since SyntaxErr is not listed 1445 # for createDocument, that seems to be the better choice. 1446 # XXX: need to check for illegal characters here and in 1447 # createElement. 1448 1449 # DOM Level III clears this up when talking about the return value 1450 # of this function. If namespaceURI, qName and DocType are 1451 # Null the document is returned without a document element 1452 # Otherwise if doctype or namespaceURI are not None 1453 # Then we go back to the above problem 1454 raise xml.dom.InvalidCharacterErr("Element with no name") 1455 1456 if add_root_element: 1457 prefix, localname = _nssplit(qualifiedName) 1458 if prefix == "xml" \ 1459 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1460 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1461 if prefix and not namespaceURI: 1462 raise xml.dom.NamespaceErr( 1463 "illegal use of prefix without namespaces") 1464 element = doc.createElementNS(namespaceURI, qualifiedName) 1465 if doctype: 1466 doc.appendChild(doctype) 1467 doc.appendChild(element) 1468 1469 if doctype: 1470 doctype.parentNode = doctype.ownerDocument = doc 1471 1472 doc.doctype = doctype 1473 doc.implementation = self 1474 return doc 1475 1476 def createDocumentType(self, qualifiedName, publicId, systemId): 1477 doctype = DocumentType(qualifiedName) 1478 doctype.publicId = publicId 1479 doctype.systemId = systemId 1480 return doctype 1481 1482 # DOM Level 3 (WD 9 April 2002) 1483 1484 def getInterface(self, feature): 1485 if self.hasFeature(feature, None): 1486 return self 1487 else: 1488 return None 1489 1490 # internal 1491 def _create_document(self): 1492 return Document() 1493 1494class ElementInfo(object): 1495 """Object that represents content-model information for an element. 1496 1497 This implementation is not expected to be used in practice; DOM 1498 builders should provide implementations which do the right thing 1499 using information available to it. 1500 1501 """ 1502 1503 __slots__ = 'tagName', 1504 1505 def __init__(self, name): 1506 self.tagName = name 1507 1508 def getAttributeType(self, aname): 1509 return _no_type 1510 1511 def getAttributeTypeNS(self, namespaceURI, localName): 1512 return _no_type 1513 1514 def isElementContent(self): 1515 return False 1516 1517 def isEmpty(self): 1518 """Returns true iff this element is declared to have an EMPTY 1519 content model.""" 1520 return False 1521 1522 def isId(self, aname): 1523 """Returns true iff the named attribute is a DTD-style ID.""" 1524 return False 1525 1526 def isIdNS(self, namespaceURI, localName): 1527 """Returns true iff the identified attribute is a DTD-style ID.""" 1528 return False 1529 1530 def __getstate__(self): 1531 return self.tagName 1532 1533 def __setstate__(self, state): 1534 self.tagName = state 1535 1536def _clear_id_cache(node): 1537 if node.nodeType == Node.DOCUMENT_NODE: 1538 node._id_cache.clear() 1539 node._id_search_stack = None 1540 elif _in_document(node): 1541 node.ownerDocument._id_cache.clear() 1542 node.ownerDocument._id_search_stack= None 1543 1544class Document(Node, DocumentLS): 1545 __slots__ = ('_elem_info', 'doctype', 1546 '_id_search_stack', 'childNodes', '_id_cache') 1547 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1548 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1549 1550 implementation = DOMImplementation() 1551 nodeType = Node.DOCUMENT_NODE 1552 nodeName = "#document" 1553 nodeValue = None 1554 attributes = None 1555 parentNode = None 1556 previousSibling = nextSibling = None 1557 1558 1559 # Document attributes from Level 3 (WD 9 April 2002) 1560 1561 actualEncoding = None 1562 encoding = None 1563 standalone = None 1564 version = None 1565 strictErrorChecking = False 1566 errorHandler = None 1567 documentURI = None 1568 1569 _magic_id_count = 0 1570 1571 def __init__(self): 1572 self.doctype = None 1573 self.childNodes = NodeList() 1574 # mapping of (namespaceURI, localName) -> ElementInfo 1575 # and tagName -> ElementInfo 1576 self._elem_info = {} 1577 self._id_cache = {} 1578 self._id_search_stack = None 1579 1580 def _get_elem_info(self, element): 1581 if element.namespaceURI: 1582 key = element.namespaceURI, element.localName 1583 else: 1584 key = element.tagName 1585 return self._elem_info.get(key) 1586 1587 def _get_actualEncoding(self): 1588 return self.actualEncoding 1589 1590 def _get_doctype(self): 1591 return self.doctype 1592 1593 def _get_documentURI(self): 1594 return self.documentURI 1595 1596 def _get_encoding(self): 1597 return self.encoding 1598 1599 def _get_errorHandler(self): 1600 return self.errorHandler 1601 1602 def _get_standalone(self): 1603 return self.standalone 1604 1605 def _get_strictErrorChecking(self): 1606 return self.strictErrorChecking 1607 1608 def _get_version(self): 1609 return self.version 1610 1611 def appendChild(self, node): 1612 if node.nodeType not in self._child_node_types: 1613 raise xml.dom.HierarchyRequestErr( 1614 "%s cannot be child of %s" % (repr(node), repr(self))) 1615 if node.parentNode is not None: 1616 # This needs to be done before the next test since this 1617 # may *be* the document element, in which case it should 1618 # end up re-ordered to the end. 1619 node.parentNode.removeChild(node) 1620 1621 if node.nodeType == Node.ELEMENT_NODE \ 1622 and self._get_documentElement(): 1623 raise xml.dom.HierarchyRequestErr( 1624 "two document elements disallowed") 1625 return Node.appendChild(self, node) 1626 1627 def removeChild(self, oldChild): 1628 try: 1629 self.childNodes.remove(oldChild) 1630 except ValueError: 1631 raise xml.dom.NotFoundErr() 1632 oldChild.nextSibling = oldChild.previousSibling = None 1633 oldChild.parentNode = None 1634 if self.documentElement is oldChild: 1635 self.documentElement = None 1636 1637 return oldChild 1638 1639 def _get_documentElement(self): 1640 for node in self.childNodes: 1641 if node.nodeType == Node.ELEMENT_NODE: 1642 return node 1643 1644 def unlink(self): 1645 if self.doctype is not None: 1646 self.doctype.unlink() 1647 self.doctype = None 1648 Node.unlink(self) 1649 1650 def cloneNode(self, deep): 1651 if not deep: 1652 return None 1653 clone = self.implementation.createDocument(None, None, None) 1654 clone.encoding = self.encoding 1655 clone.standalone = self.standalone 1656 clone.version = self.version 1657 for n in self.childNodes: 1658 childclone = _clone_node(n, deep, clone) 1659 assert childclone.ownerDocument.isSameNode(clone) 1660 clone.childNodes.append(childclone) 1661 if childclone.nodeType == Node.DOCUMENT_NODE: 1662 assert clone.documentElement is None 1663 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1664 assert clone.doctype is None 1665 clone.doctype = childclone 1666 childclone.parentNode = clone 1667 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1668 self, clone) 1669 return clone 1670 1671 def createDocumentFragment(self): 1672 d = DocumentFragment() 1673 d.ownerDocument = self 1674 return d 1675 1676 def createElement(self, tagName): 1677 e = Element(tagName) 1678 e.ownerDocument = self 1679 return e 1680 1681 def createTextNode(self, data): 1682 if not isinstance(data, str): 1683 raise TypeError("node contents must be a string") 1684 t = Text() 1685 t.data = data 1686 t.ownerDocument = self 1687 return t 1688 1689 def createCDATASection(self, data): 1690 if not isinstance(data, str): 1691 raise TypeError("node contents must be a string") 1692 c = CDATASection() 1693 c.data = data 1694 c.ownerDocument = self 1695 return c 1696 1697 def createComment(self, data): 1698 c = Comment(data) 1699 c.ownerDocument = self 1700 return c 1701 1702 def createProcessingInstruction(self, target, data): 1703 p = ProcessingInstruction(target, data) 1704 p.ownerDocument = self 1705 return p 1706 1707 def createAttribute(self, qName): 1708 a = Attr(qName) 1709 a.ownerDocument = self 1710 a.value = "" 1711 return a 1712 1713 def createElementNS(self, namespaceURI, qualifiedName): 1714 prefix, localName = _nssplit(qualifiedName) 1715 e = Element(qualifiedName, namespaceURI, prefix) 1716 e.ownerDocument = self 1717 return e 1718 1719 def createAttributeNS(self, namespaceURI, qualifiedName): 1720 prefix, localName = _nssplit(qualifiedName) 1721 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1722 a.ownerDocument = self 1723 a.value = "" 1724 return a 1725 1726 # A couple of implementation-specific helpers to create node types 1727 # not supported by the W3C DOM specs: 1728 1729 def _create_entity(self, name, publicId, systemId, notationName): 1730 e = Entity(name, publicId, systemId, notationName) 1731 e.ownerDocument = self 1732 return e 1733 1734 def _create_notation(self, name, publicId, systemId): 1735 n = Notation(name, publicId, systemId) 1736 n.ownerDocument = self 1737 return n 1738 1739 def getElementById(self, id): 1740 if id in self._id_cache: 1741 return self._id_cache[id] 1742 if not (self._elem_info or self._magic_id_count): 1743 return None 1744 1745 stack = self._id_search_stack 1746 if stack is None: 1747 # we never searched before, or the cache has been cleared 1748 stack = [self.documentElement] 1749 self._id_search_stack = stack 1750 elif not stack: 1751 # Previous search was completed and cache is still valid; 1752 # no matching node. 1753 return None 1754 1755 result = None 1756 while stack: 1757 node = stack.pop() 1758 # add child elements to stack for continued searching 1759 stack.extend([child for child in node.childNodes 1760 if child.nodeType in _nodeTypes_with_children]) 1761 # check this node 1762 info = self._get_elem_info(node) 1763 if info: 1764 # We have to process all ID attributes before 1765 # returning in order to get all the attributes set to 1766 # be IDs using Element.setIdAttribute*(). 1767 for attr in node.attributes.values(): 1768 if attr.namespaceURI: 1769 if info.isIdNS(attr.namespaceURI, attr.localName): 1770 self._id_cache[attr.value] = node 1771 if attr.value == id: 1772 result = node 1773 elif not node._magic_id_nodes: 1774 break 1775 elif info.isId(attr.name): 1776 self._id_cache[attr.value] = node 1777 if attr.value == id: 1778 result = node 1779 elif not node._magic_id_nodes: 1780 break 1781 elif attr._is_id: 1782 self._id_cache[attr.value] = node 1783 if attr.value == id: 1784 result = node 1785 elif node._magic_id_nodes == 1: 1786 break 1787 elif node._magic_id_nodes: 1788 for attr in node.attributes.values(): 1789 if attr._is_id: 1790 self._id_cache[attr.value] = node 1791 if attr.value == id: 1792 result = node 1793 if result is not None: 1794 break 1795 return result 1796 1797 def getElementsByTagName(self, name): 1798 return _get_elements_by_tagName_helper(self, name, NodeList()) 1799 1800 def getElementsByTagNameNS(self, namespaceURI, localName): 1801 return _get_elements_by_tagName_ns_helper( 1802 self, namespaceURI, localName, NodeList()) 1803 1804 def isSupported(self, feature, version): 1805 return self.implementation.hasFeature(feature, version) 1806 1807 def importNode(self, node, deep): 1808 if node.nodeType == Node.DOCUMENT_NODE: 1809 raise xml.dom.NotSupportedErr("cannot import document nodes") 1810 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1811 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1812 return _clone_node(node, deep, self) 1813 1814 def writexml(self, writer, indent="", addindent="", newl="", encoding=None, 1815 standalone=None): 1816 declarations = [] 1817 1818 if encoding: 1819 declarations.append(f'encoding="{encoding}"') 1820 if standalone is not None: 1821 declarations.append(f'standalone="{"yes" if standalone else "no"}"') 1822 1823 writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}') 1824 1825 for node in self.childNodes: 1826 node.writexml(writer, indent, addindent, newl) 1827 1828 # DOM Level 3 (WD 9 April 2002) 1829 1830 def renameNode(self, n, namespaceURI, name): 1831 if n.ownerDocument is not self: 1832 raise xml.dom.WrongDocumentErr( 1833 "cannot rename nodes from other documents;\n" 1834 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1835 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1836 raise xml.dom.NotSupportedErr( 1837 "renameNode() only applies to element and attribute nodes") 1838 if namespaceURI != EMPTY_NAMESPACE: 1839 if ':' in name: 1840 prefix, localName = name.split(':', 1) 1841 if ( prefix == "xmlns" 1842 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1843 raise xml.dom.NamespaceErr( 1844 "illegal use of 'xmlns' prefix") 1845 else: 1846 if ( name == "xmlns" 1847 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1848 and n.nodeType == Node.ATTRIBUTE_NODE): 1849 raise xml.dom.NamespaceErr( 1850 "illegal use of the 'xmlns' attribute") 1851 prefix = None 1852 localName = name 1853 else: 1854 prefix = None 1855 localName = None 1856 if n.nodeType == Node.ATTRIBUTE_NODE: 1857 element = n.ownerElement 1858 if element is not None: 1859 is_id = n._is_id 1860 element.removeAttributeNode(n) 1861 else: 1862 element = None 1863 n.prefix = prefix 1864 n._localName = localName 1865 n.namespaceURI = namespaceURI 1866 n.nodeName = name 1867 if n.nodeType == Node.ELEMENT_NODE: 1868 n.tagName = name 1869 else: 1870 # attribute node 1871 n.name = name 1872 if element is not None: 1873 element.setAttributeNode(n) 1874 if is_id: 1875 element.setIdAttributeNode(n) 1876 # It's not clear from a semantic perspective whether we should 1877 # call the user data handlers for the NODE_RENAMED event since 1878 # we're re-using the existing node. The draft spec has been 1879 # interpreted as meaning "no, don't call the handler unless a 1880 # new node is created." 1881 return n 1882 1883defproperty(Document, "documentElement", 1884 doc="Top-level element of this document.") 1885 1886 1887def _clone_node(node, deep, newOwnerDocument): 1888 """ 1889 Clone a node and give it the new owner document. 1890 Called by Node.cloneNode and Document.importNode 1891 """ 1892 if node.ownerDocument.isSameNode(newOwnerDocument): 1893 operation = xml.dom.UserDataHandler.NODE_CLONED 1894 else: 1895 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1896 if node.nodeType == Node.ELEMENT_NODE: 1897 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1898 node.nodeName) 1899 for attr in node.attributes.values(): 1900 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1901 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1902 a.specified = attr.specified 1903 1904 if deep: 1905 for child in node.childNodes: 1906 c = _clone_node(child, deep, newOwnerDocument) 1907 clone.appendChild(c) 1908 1909 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1910 clone = newOwnerDocument.createDocumentFragment() 1911 if deep: 1912 for child in node.childNodes: 1913 c = _clone_node(child, deep, newOwnerDocument) 1914 clone.appendChild(c) 1915 1916 elif node.nodeType == Node.TEXT_NODE: 1917 clone = newOwnerDocument.createTextNode(node.data) 1918 elif node.nodeType == Node.CDATA_SECTION_NODE: 1919 clone = newOwnerDocument.createCDATASection(node.data) 1920 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1921 clone = newOwnerDocument.createProcessingInstruction(node.target, 1922 node.data) 1923 elif node.nodeType == Node.COMMENT_NODE: 1924 clone = newOwnerDocument.createComment(node.data) 1925 elif node.nodeType == Node.ATTRIBUTE_NODE: 1926 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1927 node.nodeName) 1928 clone.specified = True 1929 clone.value = node.value 1930 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1931 assert node.ownerDocument is not newOwnerDocument 1932 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1933 clone = newOwnerDocument.implementation.createDocumentType( 1934 node.name, node.publicId, node.systemId) 1935 clone.ownerDocument = newOwnerDocument 1936 if deep: 1937 clone.entities._seq = [] 1938 clone.notations._seq = [] 1939 for n in node.notations._seq: 1940 notation = Notation(n.nodeName, n.publicId, n.systemId) 1941 notation.ownerDocument = newOwnerDocument 1942 clone.notations._seq.append(notation) 1943 if hasattr(n, '_call_user_data_handler'): 1944 n._call_user_data_handler(operation, n, notation) 1945 for e in node.entities._seq: 1946 entity = Entity(e.nodeName, e.publicId, e.systemId, 1947 e.notationName) 1948 entity.actualEncoding = e.actualEncoding 1949 entity.encoding = e.encoding 1950 entity.version = e.version 1951 entity.ownerDocument = newOwnerDocument 1952 clone.entities._seq.append(entity) 1953 if hasattr(e, '_call_user_data_handler'): 1954 e._call_user_data_handler(operation, e, entity) 1955 else: 1956 # Note the cloning of Document and DocumentType nodes is 1957 # implementation specific. minidom handles those cases 1958 # directly in the cloneNode() methods. 1959 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1960 1961 # Check for _call_user_data_handler() since this could conceivably 1962 # used with other DOM implementations (one of the FourThought 1963 # DOMs, perhaps?). 1964 if hasattr(node, '_call_user_data_handler'): 1965 node._call_user_data_handler(operation, node, clone) 1966 return clone 1967 1968 1969def _nssplit(qualifiedName): 1970 fields = qualifiedName.split(':', 1) 1971 if len(fields) == 2: 1972 return fields 1973 else: 1974 return (None, fields[0]) 1975 1976 1977def _do_pulldom_parse(func, args, kwargs): 1978 events = func(*args, **kwargs) 1979 toktype, rootNode = events.getEvent() 1980 events.expandNode(rootNode) 1981 events.clear() 1982 return rootNode 1983 1984def parse(file, parser=None, bufsize=None): 1985 """Parse a file into a DOM by filename or file object.""" 1986 if parser is None and not bufsize: 1987 from xml.dom import expatbuilder 1988 return expatbuilder.parse(file) 1989 else: 1990 from xml.dom import pulldom 1991 return _do_pulldom_parse(pulldom.parse, (file,), 1992 {'parser': parser, 'bufsize': bufsize}) 1993 1994def parseString(string, parser=None): 1995 """Parse a file into a DOM from a string.""" 1996 if parser is None: 1997 from xml.dom import expatbuilder 1998 return expatbuilder.parseString(string) 1999 else: 2000 from xml.dom import pulldom 2001 return _do_pulldom_parse(pulldom.parseString, (string,), 2002 {'parser': parser}) 2003 2004def getDOMImplementation(features=None): 2005 if features: 2006 if isinstance(features, str): 2007 features = domreg._parse_feature_string(features) 2008 for f, v in features: 2009 if not Document.implementation.hasFeature(f, v): 2010 return None 2011 return Document.implementation 2012