1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import io
19import xml.dom
20
21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22from xml.dom.minicompat import *
23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25# This is used by the ID-cache invalidation checks; the list isn't
26# actually complete, since the nodes being checked will never be the
27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
28# the node being added or removed, not the node being modified.)
29#
30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31                            xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34class Node(xml.dom.Node):
35    namespaceURI = None # this is non-null only for elements and attributes
36    parentNode = None
37    ownerDocument = None
38    nextSibling = None
39    previousSibling = None
40
41    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43    def __bool__(self):
44        return True
45
46    def toxml(self, encoding=None, standalone=None):
47        return self.toprettyxml("", "", encoding, standalone)
48
49    def toprettyxml(self, indent="\t", newl="\n", encoding=None,
50                    standalone=None):
51        if encoding is None:
52            writer = io.StringIO()
53        else:
54            writer = io.TextIOWrapper(io.BytesIO(),
55                                      encoding=encoding,
56                                      errors="xmlcharrefreplace",
57                                      newline='\n')
58        if self.nodeType == Node.DOCUMENT_NODE:
59            # Can pass encoding only to document, to put it into XML header
60            self.writexml(writer, "", indent, newl, encoding, standalone)
61        else:
62            self.writexml(writer, "", indent, newl)
63        if encoding is None:
64            return writer.getvalue()
65        else:
66            return writer.detach().getvalue()
67
68    def hasChildNodes(self):
69        return bool(self.childNodes)
70
71    def _get_childNodes(self):
72        return self.childNodes
73
74    def _get_firstChild(self):
75        if self.childNodes:
76            return self.childNodes[0]
77
78    def _get_lastChild(self):
79        if self.childNodes:
80            return self.childNodes[-1]
81
82    def insertBefore(self, newChild, refChild):
83        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
84            for c in tuple(newChild.childNodes):
85                self.insertBefore(c, refChild)
86            ### The DOM does not clearly specify what to return in this case
87            return newChild
88        if newChild.nodeType not in self._child_node_types:
89            raise xml.dom.HierarchyRequestErr(
90                "%s cannot be child of %s" % (repr(newChild), repr(self)))
91        if newChild.parentNode is not None:
92            newChild.parentNode.removeChild(newChild)
93        if refChild is None:
94            self.appendChild(newChild)
95        else:
96            try:
97                index = self.childNodes.index(refChild)
98            except ValueError:
99                raise xml.dom.NotFoundErr()
100            if newChild.nodeType in _nodeTypes_with_children:
101                _clear_id_cache(self)
102            self.childNodes.insert(index, newChild)
103            newChild.nextSibling = refChild
104            refChild.previousSibling = newChild
105            if index:
106                node = self.childNodes[index-1]
107                node.nextSibling = newChild
108                newChild.previousSibling = node
109            else:
110                newChild.previousSibling = None
111            newChild.parentNode = self
112        return newChild
113
114    def appendChild(self, node):
115        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
116            for c in tuple(node.childNodes):
117                self.appendChild(c)
118            ### The DOM does not clearly specify what to return in this case
119            return node
120        if node.nodeType not in self._child_node_types:
121            raise xml.dom.HierarchyRequestErr(
122                "%s cannot be child of %s" % (repr(node), repr(self)))
123        elif node.nodeType in _nodeTypes_with_children:
124            _clear_id_cache(self)
125        if node.parentNode is not None:
126            node.parentNode.removeChild(node)
127        _append_child(self, node)
128        node.nextSibling = None
129        return node
130
131    def replaceChild(self, newChild, oldChild):
132        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
133            refChild = oldChild.nextSibling
134            self.removeChild(oldChild)
135            return self.insertBefore(newChild, refChild)
136        if newChild.nodeType not in self._child_node_types:
137            raise xml.dom.HierarchyRequestErr(
138                "%s cannot be child of %s" % (repr(newChild), repr(self)))
139        if newChild is oldChild:
140            return
141        if newChild.parentNode is not None:
142            newChild.parentNode.removeChild(newChild)
143        try:
144            index = self.childNodes.index(oldChild)
145        except ValueError:
146            raise xml.dom.NotFoundErr()
147        self.childNodes[index] = newChild
148        newChild.parentNode = self
149        oldChild.parentNode = None
150        if (newChild.nodeType in _nodeTypes_with_children
151            or oldChild.nodeType in _nodeTypes_with_children):
152            _clear_id_cache(self)
153        newChild.nextSibling = oldChild.nextSibling
154        newChild.previousSibling = oldChild.previousSibling
155        oldChild.nextSibling = None
156        oldChild.previousSibling = None
157        if newChild.previousSibling:
158            newChild.previousSibling.nextSibling = newChild
159        if newChild.nextSibling:
160            newChild.nextSibling.previousSibling = newChild
161        return oldChild
162
163    def removeChild(self, oldChild):
164        try:
165            self.childNodes.remove(oldChild)
166        except ValueError:
167            raise xml.dom.NotFoundErr()
168        if oldChild.nextSibling is not None:
169            oldChild.nextSibling.previousSibling = oldChild.previousSibling
170        if oldChild.previousSibling is not None:
171            oldChild.previousSibling.nextSibling = oldChild.nextSibling
172        oldChild.nextSibling = oldChild.previousSibling = None
173        if oldChild.nodeType in _nodeTypes_with_children:
174            _clear_id_cache(self)
175
176        oldChild.parentNode = None
177        return oldChild
178
179    def normalize(self):
180        L = []
181        for child in self.childNodes:
182            if child.nodeType == Node.TEXT_NODE:
183                if not child.data:
184                    # empty text node; discard
185                    if L:
186                        L[-1].nextSibling = child.nextSibling
187                    if child.nextSibling:
188                        child.nextSibling.previousSibling = child.previousSibling
189                    child.unlink()
190                elif L and L[-1].nodeType == child.nodeType:
191                    # collapse text node
192                    node = L[-1]
193                    node.data = node.data + child.data
194                    node.nextSibling = child.nextSibling
195                    if child.nextSibling:
196                        child.nextSibling.previousSibling = node
197                    child.unlink()
198                else:
199                    L.append(child)
200            else:
201                L.append(child)
202                if child.nodeType == Node.ELEMENT_NODE:
203                    child.normalize()
204        self.childNodes[:] = L
205
206    def cloneNode(self, deep):
207        return _clone_node(self, deep, self.ownerDocument or self)
208
209    def isSupported(self, feature, version):
210        return self.ownerDocument.implementation.hasFeature(feature, version)
211
212    def _get_localName(self):
213        # Overridden in Element and Attr where localName can be Non-Null
214        return None
215
216    # Node interfaces from Level 3 (WD 9 April 2002)
217
218    def isSameNode(self, other):
219        return self is other
220
221    def getInterface(self, feature):
222        if self.isSupported(feature, None):
223            return self
224        else:
225            return None
226
227    # The "user data" functions use a dictionary that is only present
228    # if some user data has been set, so be careful not to assume it
229    # exists.
230
231    def getUserData(self, key):
232        try:
233            return self._user_data[key][0]
234        except (AttributeError, KeyError):
235            return None
236
237    def setUserData(self, key, data, handler):
238        old = None
239        try:
240            d = self._user_data
241        except AttributeError:
242            d = {}
243            self._user_data = d
244        if key in d:
245            old = d[key][0]
246        if data is None:
247            # ignore handlers passed for None
248            handler = None
249            if old is not None:
250                del d[key]
251        else:
252            d[key] = (data, handler)
253        return old
254
255    def _call_user_data_handler(self, operation, src, dst):
256        if hasattr(self, "_user_data"):
257            for key, (data, handler) in list(self._user_data.items()):
258                if handler is not None:
259                    handler.handle(operation, key, data, src, dst)
260
261    # minidom-specific API:
262
263    def unlink(self):
264        self.parentNode = self.ownerDocument = None
265        if self.childNodes:
266            for child in self.childNodes:
267                child.unlink()
268            self.childNodes = NodeList()
269        self.previousSibling = None
270        self.nextSibling = None
271
272    # A Node is its own context manager, to ensure that an unlink() call occurs.
273    # This is similar to how a file object works.
274    def __enter__(self):
275        return self
276
277    def __exit__(self, et, ev, tb):
278        self.unlink()
279
280defproperty(Node, "firstChild", doc="First child node, or None.")
281defproperty(Node, "lastChild",  doc="Last child node, or None.")
282defproperty(Node, "localName",  doc="Namespace-local name of this node.")
283
284
285def _append_child(self, node):
286    # fast path with less checks; usable by DOM builders if careful
287    childNodes = self.childNodes
288    if childNodes:
289        last = childNodes[-1]
290        node.previousSibling = last
291        last.nextSibling = node
292    childNodes.append(node)
293    node.parentNode = self
294
295def _in_document(node):
296    # return True iff node is part of a document tree
297    while node is not None:
298        if node.nodeType == Node.DOCUMENT_NODE:
299            return True
300        node = node.parentNode
301    return False
302
303def _write_data(writer, data):
304    "Writes datachars to writer."
305    if data:
306        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
307                    replace("\"", "&quot;").replace(">", "&gt;")
308        writer.write(data)
309
310def _get_elements_by_tagName_helper(parent, name, rc):
311    for node in parent.childNodes:
312        if node.nodeType == Node.ELEMENT_NODE and \
313            (name == "*" or node.tagName == name):
314            rc.append(node)
315        _get_elements_by_tagName_helper(node, name, rc)
316    return rc
317
318def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
319    for node in parent.childNodes:
320        if node.nodeType == Node.ELEMENT_NODE:
321            if ((localName == "*" or node.localName == localName) and
322                (nsURI == "*" or node.namespaceURI == nsURI)):
323                rc.append(node)
324            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
325    return rc
326
327class DocumentFragment(Node):
328    nodeType = Node.DOCUMENT_FRAGMENT_NODE
329    nodeName = "#document-fragment"
330    nodeValue = None
331    attributes = None
332    parentNode = None
333    _child_node_types = (Node.ELEMENT_NODE,
334                         Node.TEXT_NODE,
335                         Node.CDATA_SECTION_NODE,
336                         Node.ENTITY_REFERENCE_NODE,
337                         Node.PROCESSING_INSTRUCTION_NODE,
338                         Node.COMMENT_NODE,
339                         Node.NOTATION_NODE)
340
341    def __init__(self):
342        self.childNodes = NodeList()
343
344
345class Attr(Node):
346    __slots__=('_name', '_value', 'namespaceURI',
347               '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
348    nodeType = Node.ATTRIBUTE_NODE
349    attributes = None
350    specified = False
351    _is_id = False
352
353    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
354
355    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
356                 prefix=None):
357        self.ownerElement = None
358        self._name = qName
359        self.namespaceURI = namespaceURI
360        self._prefix = prefix
361        self.childNodes = NodeList()
362
363        # Add the single child node that represents the value of the attr
364        self.childNodes.append(Text())
365
366        # nodeValue and value are set elsewhere
367
368    def _get_localName(self):
369        try:
370            return self._localName
371        except AttributeError:
372            return self.nodeName.split(":", 1)[-1]
373
374    def _get_specified(self):
375        return self.specified
376
377    def _get_name(self):
378        return self._name
379
380    def _set_name(self, value):
381        self._name = value
382        if self.ownerElement is not None:
383            _clear_id_cache(self.ownerElement)
384
385    nodeName = name = property(_get_name, _set_name)
386
387    def _get_value(self):
388        return self._value
389
390    def _set_value(self, value):
391        self._value = value
392        self.childNodes[0].data = value
393        if self.ownerElement is not None:
394            _clear_id_cache(self.ownerElement)
395        self.childNodes[0].data = value
396
397    nodeValue = value = property(_get_value, _set_value)
398
399    def _get_prefix(self):
400        return self._prefix
401
402    def _set_prefix(self, prefix):
403        nsuri = self.namespaceURI
404        if prefix == "xmlns":
405            if nsuri and nsuri != XMLNS_NAMESPACE:
406                raise xml.dom.NamespaceErr(
407                    "illegal use of 'xmlns' prefix for the wrong namespace")
408        self._prefix = prefix
409        if prefix is None:
410            newName = self.localName
411        else:
412            newName = "%s:%s" % (prefix, self.localName)
413        if self.ownerElement:
414            _clear_id_cache(self.ownerElement)
415        self.name = newName
416
417    prefix = property(_get_prefix, _set_prefix)
418
419    def unlink(self):
420        # This implementation does not call the base implementation
421        # since most of that is not needed, and the expense of the
422        # method call is not warranted.  We duplicate the removal of
423        # children, but that's all we needed from the base class.
424        elem = self.ownerElement
425        if elem is not None:
426            del elem._attrs[self.nodeName]
427            del elem._attrsNS[(self.namespaceURI, self.localName)]
428            if self._is_id:
429                self._is_id = False
430                elem._magic_id_nodes -= 1
431                self.ownerDocument._magic_id_count -= 1
432        for child in self.childNodes:
433            child.unlink()
434        del self.childNodes[:]
435
436    def _get_isId(self):
437        if self._is_id:
438            return True
439        doc = self.ownerDocument
440        elem = self.ownerElement
441        if doc is None or elem is None:
442            return False
443
444        info = doc._get_elem_info(elem)
445        if info is None:
446            return False
447        if self.namespaceURI:
448            return info.isIdNS(self.namespaceURI, self.localName)
449        else:
450            return info.isId(self.nodeName)
451
452    def _get_schemaType(self):
453        doc = self.ownerDocument
454        elem = self.ownerElement
455        if doc is None or elem is None:
456            return _no_type
457
458        info = doc._get_elem_info(elem)
459        if info is None:
460            return _no_type
461        if self.namespaceURI:
462            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
463        else:
464            return info.getAttributeType(self.nodeName)
465
466defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
467defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
468defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
469
470
471class NamedNodeMap(object):
472    """The attribute list is a transient interface to the underlying
473    dictionaries.  Mutations here will change the underlying element's
474    dictionary.
475
476    Ordering is imposed artificially and does not reflect the order of
477    attributes as found in an input document.
478    """
479
480    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
481
482    def __init__(self, attrs, attrsNS, ownerElement):
483        self._attrs = attrs
484        self._attrsNS = attrsNS
485        self._ownerElement = ownerElement
486
487    def _get_length(self):
488        return len(self._attrs)
489
490    def item(self, index):
491        try:
492            return self[list(self._attrs.keys())[index]]
493        except IndexError:
494            return None
495
496    def items(self):
497        L = []
498        for node in self._attrs.values():
499            L.append((node.nodeName, node.value))
500        return L
501
502    def itemsNS(self):
503        L = []
504        for node in self._attrs.values():
505            L.append(((node.namespaceURI, node.localName), node.value))
506        return L
507
508    def __contains__(self, key):
509        if isinstance(key, str):
510            return key in self._attrs
511        else:
512            return key in self._attrsNS
513
514    def keys(self):
515        return self._attrs.keys()
516
517    def keysNS(self):
518        return self._attrsNS.keys()
519
520    def values(self):
521        return self._attrs.values()
522
523    def get(self, name, value=None):
524        return self._attrs.get(name, value)
525
526    __len__ = _get_length
527
528    def _cmp(self, other):
529        if self._attrs is getattr(other, "_attrs", None):
530            return 0
531        else:
532            return (id(self) > id(other)) - (id(self) < id(other))
533
534    def __eq__(self, other):
535        return self._cmp(other) == 0
536
537    def __ge__(self, other):
538        return self._cmp(other) >= 0
539
540    def __gt__(self, other):
541        return self._cmp(other) > 0
542
543    def __le__(self, other):
544        return self._cmp(other) <= 0
545
546    def __lt__(self, other):
547        return self._cmp(other) < 0
548
549    def __getitem__(self, attname_or_tuple):
550        if isinstance(attname_or_tuple, tuple):
551            return self._attrsNS[attname_or_tuple]
552        else:
553            return self._attrs[attname_or_tuple]
554
555    # same as set
556    def __setitem__(self, attname, value):
557        if isinstance(value, str):
558            try:
559                node = self._attrs[attname]
560            except KeyError:
561                node = Attr(attname)
562                node.ownerDocument = self._ownerElement.ownerDocument
563                self.setNamedItem(node)
564            node.value = value
565        else:
566            if not isinstance(value, Attr):
567                raise TypeError("value must be a string or Attr object")
568            node = value
569            self.setNamedItem(node)
570
571    def getNamedItem(self, name):
572        try:
573            return self._attrs[name]
574        except KeyError:
575            return None
576
577    def getNamedItemNS(self, namespaceURI, localName):
578        try:
579            return self._attrsNS[(namespaceURI, localName)]
580        except KeyError:
581            return None
582
583    def removeNamedItem(self, name):
584        n = self.getNamedItem(name)
585        if n is not None:
586            _clear_id_cache(self._ownerElement)
587            del self._attrs[n.nodeName]
588            del self._attrsNS[(n.namespaceURI, n.localName)]
589            if hasattr(n, 'ownerElement'):
590                n.ownerElement = None
591            return n
592        else:
593            raise xml.dom.NotFoundErr()
594
595    def removeNamedItemNS(self, namespaceURI, localName):
596        n = self.getNamedItemNS(namespaceURI, localName)
597        if n is not None:
598            _clear_id_cache(self._ownerElement)
599            del self._attrsNS[(n.namespaceURI, n.localName)]
600            del self._attrs[n.nodeName]
601            if hasattr(n, 'ownerElement'):
602                n.ownerElement = None
603            return n
604        else:
605            raise xml.dom.NotFoundErr()
606
607    def setNamedItem(self, node):
608        if not isinstance(node, Attr):
609            raise xml.dom.HierarchyRequestErr(
610                "%s cannot be child of %s" % (repr(node), repr(self)))
611        old = self._attrs.get(node.name)
612        if old:
613            old.unlink()
614        self._attrs[node.name] = node
615        self._attrsNS[(node.namespaceURI, node.localName)] = node
616        node.ownerElement = self._ownerElement
617        _clear_id_cache(node.ownerElement)
618        return old
619
620    def setNamedItemNS(self, node):
621        return self.setNamedItem(node)
622
623    def __delitem__(self, attname_or_tuple):
624        node = self[attname_or_tuple]
625        _clear_id_cache(node.ownerElement)
626        node.unlink()
627
628    def __getstate__(self):
629        return self._attrs, self._attrsNS, self._ownerElement
630
631    def __setstate__(self, state):
632        self._attrs, self._attrsNS, self._ownerElement = state
633
634defproperty(NamedNodeMap, "length",
635            doc="Number of nodes in the NamedNodeMap.")
636
637AttributeList = NamedNodeMap
638
639
640class TypeInfo(object):
641    __slots__ = 'namespace', 'name'
642
643    def __init__(self, namespace, name):
644        self.namespace = namespace
645        self.name = name
646
647    def __repr__(self):
648        if self.namespace:
649            return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
650                                          self.namespace)
651        else:
652            return "<%s %r>" % (self.__class__.__name__, self.name)
653
654    def _get_name(self):
655        return self.name
656
657    def _get_namespace(self):
658        return self.namespace
659
660_no_type = TypeInfo(None, None)
661
662class Element(Node):
663    __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
664               'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
665               'nextSibling', 'previousSibling')
666    nodeType = Node.ELEMENT_NODE
667    nodeValue = None
668    schemaType = _no_type
669
670    _magic_id_nodes = 0
671
672    _child_node_types = (Node.ELEMENT_NODE,
673                         Node.PROCESSING_INSTRUCTION_NODE,
674                         Node.COMMENT_NODE,
675                         Node.TEXT_NODE,
676                         Node.CDATA_SECTION_NODE,
677                         Node.ENTITY_REFERENCE_NODE)
678
679    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
680                 localName=None):
681        self.parentNode = None
682        self.tagName = self.nodeName = tagName
683        self.prefix = prefix
684        self.namespaceURI = namespaceURI
685        self.childNodes = NodeList()
686        self.nextSibling = self.previousSibling = None
687
688        # Attribute dictionaries are lazily created
689        # attributes are double-indexed:
690        #    tagName -> Attribute
691        #    URI,localName -> Attribute
692        # in the future: consider lazy generation
693        # of attribute objects this is too tricky
694        # for now because of headaches with
695        # namespaces.
696        self._attrs = None
697        self._attrsNS = None
698
699    def _ensure_attributes(self):
700        if self._attrs is None:
701            self._attrs = {}
702            self._attrsNS = {}
703
704    def _get_localName(self):
705        try:
706            return self._localName
707        except AttributeError:
708            return self.tagName.split(":", 1)[-1]
709
710    def _get_tagName(self):
711        return self.tagName
712
713    def unlink(self):
714        if self._attrs is not None:
715            for attr in list(self._attrs.values()):
716                attr.unlink()
717        self._attrs = None
718        self._attrsNS = None
719        Node.unlink(self)
720
721    def getAttribute(self, attname):
722        """Returns the value of the specified attribute.
723
724        Returns the value of the element's attribute named attname as
725        a string. An empty string is returned if the element does not
726        have such an attribute. Note that an empty string may also be
727        returned as an explicitly given attribute value, use the
728        hasAttribute method to distinguish these two cases.
729        """
730        if self._attrs is None:
731            return ""
732        try:
733            return self._attrs[attname].value
734        except KeyError:
735            return ""
736
737    def getAttributeNS(self, namespaceURI, localName):
738        if self._attrsNS is None:
739            return ""
740        try:
741            return self._attrsNS[(namespaceURI, localName)].value
742        except KeyError:
743            return ""
744
745    def setAttribute(self, attname, value):
746        attr = self.getAttributeNode(attname)
747        if attr is None:
748            attr = Attr(attname)
749            attr.value = value # also sets nodeValue
750            attr.ownerDocument = self.ownerDocument
751            self.setAttributeNode(attr)
752        elif value != attr.value:
753            attr.value = value
754            if attr.isId:
755                _clear_id_cache(self)
756
757    def setAttributeNS(self, namespaceURI, qualifiedName, value):
758        prefix, localname = _nssplit(qualifiedName)
759        attr = self.getAttributeNodeNS(namespaceURI, localname)
760        if attr is None:
761            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
762            attr.value = value
763            attr.ownerDocument = self.ownerDocument
764            self.setAttributeNode(attr)
765        else:
766            if value != attr.value:
767                attr.value = value
768                if attr.isId:
769                    _clear_id_cache(self)
770            if attr.prefix != prefix:
771                attr.prefix = prefix
772                attr.nodeName = qualifiedName
773
774    def getAttributeNode(self, attrname):
775        if self._attrs is None:
776            return None
777        return self._attrs.get(attrname)
778
779    def getAttributeNodeNS(self, namespaceURI, localName):
780        if self._attrsNS is None:
781            return None
782        return self._attrsNS.get((namespaceURI, localName))
783
784    def setAttributeNode(self, attr):
785        if attr.ownerElement not in (None, self):
786            raise xml.dom.InuseAttributeErr("attribute node already owned")
787        self._ensure_attributes()
788        old1 = self._attrs.get(attr.name, None)
789        if old1 is not None:
790            self.removeAttributeNode(old1)
791        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
792        if old2 is not None and old2 is not old1:
793            self.removeAttributeNode(old2)
794        _set_attribute_node(self, attr)
795
796        if old1 is not attr:
797            # It might have already been part of this node, in which case
798            # it doesn't represent a change, and should not be returned.
799            return old1
800        if old2 is not attr:
801            return old2
802
803    setAttributeNodeNS = setAttributeNode
804
805    def removeAttribute(self, name):
806        if self._attrsNS is None:
807            raise xml.dom.NotFoundErr()
808        try:
809            attr = self._attrs[name]
810        except KeyError:
811            raise xml.dom.NotFoundErr()
812        self.removeAttributeNode(attr)
813
814    def removeAttributeNS(self, namespaceURI, localName):
815        if self._attrsNS is None:
816            raise xml.dom.NotFoundErr()
817        try:
818            attr = self._attrsNS[(namespaceURI, localName)]
819        except KeyError:
820            raise xml.dom.NotFoundErr()
821        self.removeAttributeNode(attr)
822
823    def removeAttributeNode(self, node):
824        if node is None:
825            raise xml.dom.NotFoundErr()
826        try:
827            self._attrs[node.name]
828        except KeyError:
829            raise xml.dom.NotFoundErr()
830        _clear_id_cache(self)
831        node.unlink()
832        # Restore this since the node is still useful and otherwise
833        # unlinked
834        node.ownerDocument = self.ownerDocument
835        return node
836
837    removeAttributeNodeNS = removeAttributeNode
838
839    def hasAttribute(self, name):
840        """Checks whether the element has an attribute with the specified name.
841
842        Returns True if the element has an attribute with the specified name.
843        Otherwise, returns False.
844        """
845        if self._attrs is None:
846            return False
847        return name in self._attrs
848
849    def hasAttributeNS(self, namespaceURI, localName):
850        if self._attrsNS is None:
851            return False
852        return (namespaceURI, localName) in self._attrsNS
853
854    def getElementsByTagName(self, name):
855        """Returns all descendant elements with the given tag name.
856
857        Returns the list of all descendant elements (not direct children
858        only) with the specified tag name.
859        """
860        return _get_elements_by_tagName_helper(self, name, NodeList())
861
862    def getElementsByTagNameNS(self, namespaceURI, localName):
863        return _get_elements_by_tagName_ns_helper(
864            self, namespaceURI, localName, NodeList())
865
866    def __repr__(self):
867        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
868
869    def writexml(self, writer, indent="", addindent="", newl=""):
870        """Write an XML element to a file-like object
871
872        Write the element to the writer object that must provide
873        a write method (e.g. a file or StringIO object).
874        """
875        # indent = current indentation
876        # addindent = indentation to add to higher levels
877        # newl = newline string
878        writer.write(indent+"<" + self.tagName)
879
880        attrs = self._get_attributes()
881
882        for a_name in attrs.keys():
883            writer.write(" %s=\"" % a_name)
884            _write_data(writer, attrs[a_name].value)
885            writer.write("\"")
886        if self.childNodes:
887            writer.write(">")
888            if (len(self.childNodes) == 1 and
889                self.childNodes[0].nodeType in (
890                        Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
891                self.childNodes[0].writexml(writer, '', '', '')
892            else:
893                writer.write(newl)
894                for node in self.childNodes:
895                    node.writexml(writer, indent+addindent, addindent, newl)
896                writer.write(indent)
897            writer.write("</%s>%s" % (self.tagName, newl))
898        else:
899            writer.write("/>%s"%(newl))
900
901    def _get_attributes(self):
902        self._ensure_attributes()
903        return NamedNodeMap(self._attrs, self._attrsNS, self)
904
905    def hasAttributes(self):
906        if self._attrs:
907            return True
908        else:
909            return False
910
911    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
912
913    def setIdAttribute(self, name):
914        idAttr = self.getAttributeNode(name)
915        self.setIdAttributeNode(idAttr)
916
917    def setIdAttributeNS(self, namespaceURI, localName):
918        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
919        self.setIdAttributeNode(idAttr)
920
921    def setIdAttributeNode(self, idAttr):
922        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
923            raise xml.dom.NotFoundErr()
924        if _get_containing_entref(self) is not None:
925            raise xml.dom.NoModificationAllowedErr()
926        if not idAttr._is_id:
927            idAttr._is_id = True
928            self._magic_id_nodes += 1
929            self.ownerDocument._magic_id_count += 1
930            _clear_id_cache(self)
931
932defproperty(Element, "attributes",
933            doc="NamedNodeMap of attributes on the element.")
934defproperty(Element, "localName",
935            doc="Namespace-local name of this element.")
936
937
938def _set_attribute_node(element, attr):
939    _clear_id_cache(element)
940    element._ensure_attributes()
941    element._attrs[attr.name] = attr
942    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
943
944    # This creates a circular reference, but Element.unlink()
945    # breaks the cycle since the references to the attribute
946    # dictionaries are tossed.
947    attr.ownerElement = element
948
949class Childless:
950    """Mixin that makes childless-ness easy to implement and avoids
951    the complexity of the Node methods that deal with children.
952    """
953    __slots__ = ()
954
955    attributes = None
956    childNodes = EmptyNodeList()
957    firstChild = None
958    lastChild = None
959
960    def _get_firstChild(self):
961        return None
962
963    def _get_lastChild(self):
964        return None
965
966    def appendChild(self, node):
967        raise xml.dom.HierarchyRequestErr(
968            self.nodeName + " nodes cannot have children")
969
970    def hasChildNodes(self):
971        return False
972
973    def insertBefore(self, newChild, refChild):
974        raise xml.dom.HierarchyRequestErr(
975            self.nodeName + " nodes do not have children")
976
977    def removeChild(self, oldChild):
978        raise xml.dom.NotFoundErr(
979            self.nodeName + " nodes do not have children")
980
981    def normalize(self):
982        # For childless nodes, normalize() has nothing to do.
983        pass
984
985    def replaceChild(self, newChild, oldChild):
986        raise xml.dom.HierarchyRequestErr(
987            self.nodeName + " nodes do not have children")
988
989
990class ProcessingInstruction(Childless, Node):
991    nodeType = Node.PROCESSING_INSTRUCTION_NODE
992    __slots__ = ('target', 'data')
993
994    def __init__(self, target, data):
995        self.target = target
996        self.data = data
997
998    # nodeValue is an alias for data
999    def _get_nodeValue(self):
1000        return self.data
1001    def _set_nodeValue(self, value):
1002        self.data = value
1003    nodeValue = property(_get_nodeValue, _set_nodeValue)
1004
1005    # nodeName is an alias for target
1006    def _get_nodeName(self):
1007        return self.target
1008    def _set_nodeName(self, value):
1009        self.target = value
1010    nodeName = property(_get_nodeName, _set_nodeName)
1011
1012    def writexml(self, writer, indent="", addindent="", newl=""):
1013        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
1014
1015
1016class CharacterData(Childless, Node):
1017    __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
1018
1019    def __init__(self):
1020        self.ownerDocument = self.parentNode = None
1021        self.previousSibling = self.nextSibling = None
1022        self._data = ''
1023        Node.__init__(self)
1024
1025    def _get_length(self):
1026        return len(self.data)
1027    __len__ = _get_length
1028
1029    def _get_data(self):
1030        return self._data
1031    def _set_data(self, data):
1032        self._data = data
1033
1034    data = nodeValue = property(_get_data, _set_data)
1035
1036    def __repr__(self):
1037        data = self.data
1038        if len(data) > 10:
1039            dotdotdot = "..."
1040        else:
1041            dotdotdot = ""
1042        return '<DOM %s node "%r%s">' % (
1043            self.__class__.__name__, data[0:10], dotdotdot)
1044
1045    def substringData(self, offset, count):
1046        if offset < 0:
1047            raise xml.dom.IndexSizeErr("offset cannot be negative")
1048        if offset >= len(self.data):
1049            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1050        if count < 0:
1051            raise xml.dom.IndexSizeErr("count cannot be negative")
1052        return self.data[offset:offset+count]
1053
1054    def appendData(self, arg):
1055        self.data = self.data + arg
1056
1057    def insertData(self, offset, arg):
1058        if offset < 0:
1059            raise xml.dom.IndexSizeErr("offset cannot be negative")
1060        if offset >= len(self.data):
1061            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1062        if arg:
1063            self.data = "%s%s%s" % (
1064                self.data[:offset], arg, self.data[offset:])
1065
1066    def deleteData(self, offset, count):
1067        if offset < 0:
1068            raise xml.dom.IndexSizeErr("offset cannot be negative")
1069        if offset >= len(self.data):
1070            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1071        if count < 0:
1072            raise xml.dom.IndexSizeErr("count cannot be negative")
1073        if count:
1074            self.data = self.data[:offset] + self.data[offset+count:]
1075
1076    def replaceData(self, offset, count, arg):
1077        if offset < 0:
1078            raise xml.dom.IndexSizeErr("offset cannot be negative")
1079        if offset >= len(self.data):
1080            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1081        if count < 0:
1082            raise xml.dom.IndexSizeErr("count cannot be negative")
1083        if count:
1084            self.data = "%s%s%s" % (
1085                self.data[:offset], arg, self.data[offset+count:])
1086
1087defproperty(CharacterData, "length", doc="Length of the string data.")
1088
1089
1090class Text(CharacterData):
1091    __slots__ = ()
1092
1093    nodeType = Node.TEXT_NODE
1094    nodeName = "#text"
1095    attributes = None
1096
1097    def splitText(self, offset):
1098        if offset < 0 or offset > len(self.data):
1099            raise xml.dom.IndexSizeErr("illegal offset value")
1100        newText = self.__class__()
1101        newText.data = self.data[offset:]
1102        newText.ownerDocument = self.ownerDocument
1103        next = self.nextSibling
1104        if self.parentNode and self in self.parentNode.childNodes:
1105            if next is None:
1106                self.parentNode.appendChild(newText)
1107            else:
1108                self.parentNode.insertBefore(newText, next)
1109        self.data = self.data[:offset]
1110        return newText
1111
1112    def writexml(self, writer, indent="", addindent="", newl=""):
1113        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1114
1115    # DOM Level 3 (WD 9 April 2002)
1116
1117    def _get_wholeText(self):
1118        L = [self.data]
1119        n = self.previousSibling
1120        while n is not None:
1121            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1122                L.insert(0, n.data)
1123                n = n.previousSibling
1124            else:
1125                break
1126        n = self.nextSibling
1127        while n is not None:
1128            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1129                L.append(n.data)
1130                n = n.nextSibling
1131            else:
1132                break
1133        return ''.join(L)
1134
1135    def replaceWholeText(self, content):
1136        # XXX This needs to be seriously changed if minidom ever
1137        # supports EntityReference nodes.
1138        parent = self.parentNode
1139        n = self.previousSibling
1140        while n is not None:
1141            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1142                next = n.previousSibling
1143                parent.removeChild(n)
1144                n = next
1145            else:
1146                break
1147        n = self.nextSibling
1148        if not content:
1149            parent.removeChild(self)
1150        while n is not None:
1151            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1152                next = n.nextSibling
1153                parent.removeChild(n)
1154                n = next
1155            else:
1156                break
1157        if content:
1158            self.data = content
1159            return self
1160        else:
1161            return None
1162
1163    def _get_isWhitespaceInElementContent(self):
1164        if self.data.strip():
1165            return False
1166        elem = _get_containing_element(self)
1167        if elem is None:
1168            return False
1169        info = self.ownerDocument._get_elem_info(elem)
1170        if info is None:
1171            return False
1172        else:
1173            return info.isElementContent()
1174
1175defproperty(Text, "isWhitespaceInElementContent",
1176            doc="True iff this text node contains only whitespace"
1177                " and is in element content.")
1178defproperty(Text, "wholeText",
1179            doc="The text of all logically-adjacent text nodes.")
1180
1181
1182def _get_containing_element(node):
1183    c = node.parentNode
1184    while c is not None:
1185        if c.nodeType == Node.ELEMENT_NODE:
1186            return c
1187        c = c.parentNode
1188    return None
1189
1190def _get_containing_entref(node):
1191    c = node.parentNode
1192    while c is not None:
1193        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1194            return c
1195        c = c.parentNode
1196    return None
1197
1198
1199class Comment(CharacterData):
1200    nodeType = Node.COMMENT_NODE
1201    nodeName = "#comment"
1202
1203    def __init__(self, data):
1204        CharacterData.__init__(self)
1205        self._data = data
1206
1207    def writexml(self, writer, indent="", addindent="", newl=""):
1208        if "--" in self.data:
1209            raise ValueError("'--' is not allowed in a comment node")
1210        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1211
1212
1213class CDATASection(Text):
1214    __slots__ = ()
1215
1216    nodeType = Node.CDATA_SECTION_NODE
1217    nodeName = "#cdata-section"
1218
1219    def writexml(self, writer, indent="", addindent="", newl=""):
1220        if self.data.find("]]>") >= 0:
1221            raise ValueError("']]>' not allowed in a CDATA section")
1222        writer.write("<![CDATA[%s]]>" % self.data)
1223
1224
1225class ReadOnlySequentialNamedNodeMap(object):
1226    __slots__ = '_seq',
1227
1228    def __init__(self, seq=()):
1229        # seq should be a list or tuple
1230        self._seq = seq
1231
1232    def __len__(self):
1233        return len(self._seq)
1234
1235    def _get_length(self):
1236        return len(self._seq)
1237
1238    def getNamedItem(self, name):
1239        for n in self._seq:
1240            if n.nodeName == name:
1241                return n
1242
1243    def getNamedItemNS(self, namespaceURI, localName):
1244        for n in self._seq:
1245            if n.namespaceURI == namespaceURI and n.localName == localName:
1246                return n
1247
1248    def __getitem__(self, name_or_tuple):
1249        if isinstance(name_or_tuple, tuple):
1250            node = self.getNamedItemNS(*name_or_tuple)
1251        else:
1252            node = self.getNamedItem(name_or_tuple)
1253        if node is None:
1254            raise KeyError(name_or_tuple)
1255        return node
1256
1257    def item(self, index):
1258        if index < 0:
1259            return None
1260        try:
1261            return self._seq[index]
1262        except IndexError:
1263            return None
1264
1265    def removeNamedItem(self, name):
1266        raise xml.dom.NoModificationAllowedErr(
1267            "NamedNodeMap instance is read-only")
1268
1269    def removeNamedItemNS(self, namespaceURI, localName):
1270        raise xml.dom.NoModificationAllowedErr(
1271            "NamedNodeMap instance is read-only")
1272
1273    def setNamedItem(self, node):
1274        raise xml.dom.NoModificationAllowedErr(
1275            "NamedNodeMap instance is read-only")
1276
1277    def setNamedItemNS(self, node):
1278        raise xml.dom.NoModificationAllowedErr(
1279            "NamedNodeMap instance is read-only")
1280
1281    def __getstate__(self):
1282        return [self._seq]
1283
1284    def __setstate__(self, state):
1285        self._seq = state[0]
1286
1287defproperty(ReadOnlySequentialNamedNodeMap, "length",
1288            doc="Number of entries in the NamedNodeMap.")
1289
1290
1291class Identified:
1292    """Mix-in class that supports the publicId and systemId attributes."""
1293
1294    __slots__ = 'publicId', 'systemId'
1295
1296    def _identified_mixin_init(self, publicId, systemId):
1297        self.publicId = publicId
1298        self.systemId = systemId
1299
1300    def _get_publicId(self):
1301        return self.publicId
1302
1303    def _get_systemId(self):
1304        return self.systemId
1305
1306class DocumentType(Identified, Childless, Node):
1307    nodeType = Node.DOCUMENT_TYPE_NODE
1308    nodeValue = None
1309    name = None
1310    publicId = None
1311    systemId = None
1312    internalSubset = None
1313
1314    def __init__(self, qualifiedName):
1315        self.entities = ReadOnlySequentialNamedNodeMap()
1316        self.notations = ReadOnlySequentialNamedNodeMap()
1317        if qualifiedName:
1318            prefix, localname = _nssplit(qualifiedName)
1319            self.name = localname
1320        self.nodeName = self.name
1321
1322    def _get_internalSubset(self):
1323        return self.internalSubset
1324
1325    def cloneNode(self, deep):
1326        if self.ownerDocument is None:
1327            # it's ok
1328            clone = DocumentType(None)
1329            clone.name = self.name
1330            clone.nodeName = self.name
1331            operation = xml.dom.UserDataHandler.NODE_CLONED
1332            if deep:
1333                clone.entities._seq = []
1334                clone.notations._seq = []
1335                for n in self.notations._seq:
1336                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1337                    clone.notations._seq.append(notation)
1338                    n._call_user_data_handler(operation, n, notation)
1339                for e in self.entities._seq:
1340                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1341                                    e.notationName)
1342                    entity.actualEncoding = e.actualEncoding
1343                    entity.encoding = e.encoding
1344                    entity.version = e.version
1345                    clone.entities._seq.append(entity)
1346                    e._call_user_data_handler(operation, e, entity)
1347            self._call_user_data_handler(operation, self, clone)
1348            return clone
1349        else:
1350            return None
1351
1352    def writexml(self, writer, indent="", addindent="", newl=""):
1353        writer.write("<!DOCTYPE ")
1354        writer.write(self.name)
1355        if self.publicId:
1356            writer.write("%s  PUBLIC '%s'%s  '%s'"
1357                         % (newl, self.publicId, newl, self.systemId))
1358        elif self.systemId:
1359            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1360        if self.internalSubset is not None:
1361            writer.write(" [")
1362            writer.write(self.internalSubset)
1363            writer.write("]")
1364        writer.write(">"+newl)
1365
1366class Entity(Identified, Node):
1367    attributes = None
1368    nodeType = Node.ENTITY_NODE
1369    nodeValue = None
1370
1371    actualEncoding = None
1372    encoding = None
1373    version = None
1374
1375    def __init__(self, name, publicId, systemId, notation):
1376        self.nodeName = name
1377        self.notationName = notation
1378        self.childNodes = NodeList()
1379        self._identified_mixin_init(publicId, systemId)
1380
1381    def _get_actualEncoding(self):
1382        return self.actualEncoding
1383
1384    def _get_encoding(self):
1385        return self.encoding
1386
1387    def _get_version(self):
1388        return self.version
1389
1390    def appendChild(self, newChild):
1391        raise xml.dom.HierarchyRequestErr(
1392            "cannot append children to an entity node")
1393
1394    def insertBefore(self, newChild, refChild):
1395        raise xml.dom.HierarchyRequestErr(
1396            "cannot insert children below an entity node")
1397
1398    def removeChild(self, oldChild):
1399        raise xml.dom.HierarchyRequestErr(
1400            "cannot remove children from an entity node")
1401
1402    def replaceChild(self, newChild, oldChild):
1403        raise xml.dom.HierarchyRequestErr(
1404            "cannot replace children of an entity node")
1405
1406class Notation(Identified, Childless, Node):
1407    nodeType = Node.NOTATION_NODE
1408    nodeValue = None
1409
1410    def __init__(self, name, publicId, systemId):
1411        self.nodeName = name
1412        self._identified_mixin_init(publicId, systemId)
1413
1414
1415class DOMImplementation(DOMImplementationLS):
1416    _features = [("core", "1.0"),
1417                 ("core", "2.0"),
1418                 ("core", None),
1419                 ("xml", "1.0"),
1420                 ("xml", "2.0"),
1421                 ("xml", None),
1422                 ("ls-load", "3.0"),
1423                 ("ls-load", None),
1424                 ]
1425
1426    def hasFeature(self, feature, version):
1427        if version == "":
1428            version = None
1429        return (feature.lower(), version) in self._features
1430
1431    def createDocument(self, namespaceURI, qualifiedName, doctype):
1432        if doctype and doctype.parentNode is not None:
1433            raise xml.dom.WrongDocumentErr(
1434                "doctype object owned by another DOM tree")
1435        doc = self._create_document()
1436
1437        add_root_element = not (namespaceURI is None
1438                                and qualifiedName is None
1439                                and doctype is None)
1440
1441        if not qualifiedName and add_root_element:
1442            # The spec is unclear what to raise here; SyntaxErr
1443            # would be the other obvious candidate. Since Xerces raises
1444            # InvalidCharacterErr, and since SyntaxErr is not listed
1445            # for createDocument, that seems to be the better choice.
1446            # XXX: need to check for illegal characters here and in
1447            # createElement.
1448
1449            # DOM Level III clears this up when talking about the return value
1450            # of this function.  If namespaceURI, qName and DocType are
1451            # Null the document is returned without a document element
1452            # Otherwise if doctype or namespaceURI are not None
1453            # Then we go back to the above problem
1454            raise xml.dom.InvalidCharacterErr("Element with no name")
1455
1456        if add_root_element:
1457            prefix, localname = _nssplit(qualifiedName)
1458            if prefix == "xml" \
1459               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1460                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1461            if prefix and not namespaceURI:
1462                raise xml.dom.NamespaceErr(
1463                    "illegal use of prefix without namespaces")
1464            element = doc.createElementNS(namespaceURI, qualifiedName)
1465            if doctype:
1466                doc.appendChild(doctype)
1467            doc.appendChild(element)
1468
1469        if doctype:
1470            doctype.parentNode = doctype.ownerDocument = doc
1471
1472        doc.doctype = doctype
1473        doc.implementation = self
1474        return doc
1475
1476    def createDocumentType(self, qualifiedName, publicId, systemId):
1477        doctype = DocumentType(qualifiedName)
1478        doctype.publicId = publicId
1479        doctype.systemId = systemId
1480        return doctype
1481
1482    # DOM Level 3 (WD 9 April 2002)
1483
1484    def getInterface(self, feature):
1485        if self.hasFeature(feature, None):
1486            return self
1487        else:
1488            return None
1489
1490    # internal
1491    def _create_document(self):
1492        return Document()
1493
1494class ElementInfo(object):
1495    """Object that represents content-model information for an element.
1496
1497    This implementation is not expected to be used in practice; DOM
1498    builders should provide implementations which do the right thing
1499    using information available to it.
1500
1501    """
1502
1503    __slots__ = 'tagName',
1504
1505    def __init__(self, name):
1506        self.tagName = name
1507
1508    def getAttributeType(self, aname):
1509        return _no_type
1510
1511    def getAttributeTypeNS(self, namespaceURI, localName):
1512        return _no_type
1513
1514    def isElementContent(self):
1515        return False
1516
1517    def isEmpty(self):
1518        """Returns true iff this element is declared to have an EMPTY
1519        content model."""
1520        return False
1521
1522    def isId(self, aname):
1523        """Returns true iff the named attribute is a DTD-style ID."""
1524        return False
1525
1526    def isIdNS(self, namespaceURI, localName):
1527        """Returns true iff the identified attribute is a DTD-style ID."""
1528        return False
1529
1530    def __getstate__(self):
1531        return self.tagName
1532
1533    def __setstate__(self, state):
1534        self.tagName = state
1535
1536def _clear_id_cache(node):
1537    if node.nodeType == Node.DOCUMENT_NODE:
1538        node._id_cache.clear()
1539        node._id_search_stack = None
1540    elif _in_document(node):
1541        node.ownerDocument._id_cache.clear()
1542        node.ownerDocument._id_search_stack= None
1543
1544class Document(Node, DocumentLS):
1545    __slots__ = ('_elem_info', 'doctype',
1546                 '_id_search_stack', 'childNodes', '_id_cache')
1547    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1548                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1549
1550    implementation = DOMImplementation()
1551    nodeType = Node.DOCUMENT_NODE
1552    nodeName = "#document"
1553    nodeValue = None
1554    attributes = None
1555    parentNode = None
1556    previousSibling = nextSibling = None
1557
1558
1559    # Document attributes from Level 3 (WD 9 April 2002)
1560
1561    actualEncoding = None
1562    encoding = None
1563    standalone = None
1564    version = None
1565    strictErrorChecking = False
1566    errorHandler = None
1567    documentURI = None
1568
1569    _magic_id_count = 0
1570
1571    def __init__(self):
1572        self.doctype = None
1573        self.childNodes = NodeList()
1574        # mapping of (namespaceURI, localName) -> ElementInfo
1575        #        and tagName -> ElementInfo
1576        self._elem_info = {}
1577        self._id_cache = {}
1578        self._id_search_stack = None
1579
1580    def _get_elem_info(self, element):
1581        if element.namespaceURI:
1582            key = element.namespaceURI, element.localName
1583        else:
1584            key = element.tagName
1585        return self._elem_info.get(key)
1586
1587    def _get_actualEncoding(self):
1588        return self.actualEncoding
1589
1590    def _get_doctype(self):
1591        return self.doctype
1592
1593    def _get_documentURI(self):
1594        return self.documentURI
1595
1596    def _get_encoding(self):
1597        return self.encoding
1598
1599    def _get_errorHandler(self):
1600        return self.errorHandler
1601
1602    def _get_standalone(self):
1603        return self.standalone
1604
1605    def _get_strictErrorChecking(self):
1606        return self.strictErrorChecking
1607
1608    def _get_version(self):
1609        return self.version
1610
1611    def appendChild(self, node):
1612        if node.nodeType not in self._child_node_types:
1613            raise xml.dom.HierarchyRequestErr(
1614                "%s cannot be child of %s" % (repr(node), repr(self)))
1615        if node.parentNode is not None:
1616            # This needs to be done before the next test since this
1617            # may *be* the document element, in which case it should
1618            # end up re-ordered to the end.
1619            node.parentNode.removeChild(node)
1620
1621        if node.nodeType == Node.ELEMENT_NODE \
1622           and self._get_documentElement():
1623            raise xml.dom.HierarchyRequestErr(
1624                "two document elements disallowed")
1625        return Node.appendChild(self, node)
1626
1627    def removeChild(self, oldChild):
1628        try:
1629            self.childNodes.remove(oldChild)
1630        except ValueError:
1631            raise xml.dom.NotFoundErr()
1632        oldChild.nextSibling = oldChild.previousSibling = None
1633        oldChild.parentNode = None
1634        if self.documentElement is oldChild:
1635            self.documentElement = None
1636
1637        return oldChild
1638
1639    def _get_documentElement(self):
1640        for node in self.childNodes:
1641            if node.nodeType == Node.ELEMENT_NODE:
1642                return node
1643
1644    def unlink(self):
1645        if self.doctype is not None:
1646            self.doctype.unlink()
1647            self.doctype = None
1648        Node.unlink(self)
1649
1650    def cloneNode(self, deep):
1651        if not deep:
1652            return None
1653        clone = self.implementation.createDocument(None, None, None)
1654        clone.encoding = self.encoding
1655        clone.standalone = self.standalone
1656        clone.version = self.version
1657        for n in self.childNodes:
1658            childclone = _clone_node(n, deep, clone)
1659            assert childclone.ownerDocument.isSameNode(clone)
1660            clone.childNodes.append(childclone)
1661            if childclone.nodeType == Node.DOCUMENT_NODE:
1662                assert clone.documentElement is None
1663            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1664                assert clone.doctype is None
1665                clone.doctype = childclone
1666            childclone.parentNode = clone
1667        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1668                                     self, clone)
1669        return clone
1670
1671    def createDocumentFragment(self):
1672        d = DocumentFragment()
1673        d.ownerDocument = self
1674        return d
1675
1676    def createElement(self, tagName):
1677        e = Element(tagName)
1678        e.ownerDocument = self
1679        return e
1680
1681    def createTextNode(self, data):
1682        if not isinstance(data, str):
1683            raise TypeError("node contents must be a string")
1684        t = Text()
1685        t.data = data
1686        t.ownerDocument = self
1687        return t
1688
1689    def createCDATASection(self, data):
1690        if not isinstance(data, str):
1691            raise TypeError("node contents must be a string")
1692        c = CDATASection()
1693        c.data = data
1694        c.ownerDocument = self
1695        return c
1696
1697    def createComment(self, data):
1698        c = Comment(data)
1699        c.ownerDocument = self
1700        return c
1701
1702    def createProcessingInstruction(self, target, data):
1703        p = ProcessingInstruction(target, data)
1704        p.ownerDocument = self
1705        return p
1706
1707    def createAttribute(self, qName):
1708        a = Attr(qName)
1709        a.ownerDocument = self
1710        a.value = ""
1711        return a
1712
1713    def createElementNS(self, namespaceURI, qualifiedName):
1714        prefix, localName = _nssplit(qualifiedName)
1715        e = Element(qualifiedName, namespaceURI, prefix)
1716        e.ownerDocument = self
1717        return e
1718
1719    def createAttributeNS(self, namespaceURI, qualifiedName):
1720        prefix, localName = _nssplit(qualifiedName)
1721        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1722        a.ownerDocument = self
1723        a.value = ""
1724        return a
1725
1726    # A couple of implementation-specific helpers to create node types
1727    # not supported by the W3C DOM specs:
1728
1729    def _create_entity(self, name, publicId, systemId, notationName):
1730        e = Entity(name, publicId, systemId, notationName)
1731        e.ownerDocument = self
1732        return e
1733
1734    def _create_notation(self, name, publicId, systemId):
1735        n = Notation(name, publicId, systemId)
1736        n.ownerDocument = self
1737        return n
1738
1739    def getElementById(self, id):
1740        if id in self._id_cache:
1741            return self._id_cache[id]
1742        if not (self._elem_info or self._magic_id_count):
1743            return None
1744
1745        stack = self._id_search_stack
1746        if stack is None:
1747            # we never searched before, or the cache has been cleared
1748            stack = [self.documentElement]
1749            self._id_search_stack = stack
1750        elif not stack:
1751            # Previous search was completed and cache is still valid;
1752            # no matching node.
1753            return None
1754
1755        result = None
1756        while stack:
1757            node = stack.pop()
1758            # add child elements to stack for continued searching
1759            stack.extend([child for child in node.childNodes
1760                          if child.nodeType in _nodeTypes_with_children])
1761            # check this node
1762            info = self._get_elem_info(node)
1763            if info:
1764                # We have to process all ID attributes before
1765                # returning in order to get all the attributes set to
1766                # be IDs using Element.setIdAttribute*().
1767                for attr in node.attributes.values():
1768                    if attr.namespaceURI:
1769                        if info.isIdNS(attr.namespaceURI, attr.localName):
1770                            self._id_cache[attr.value] = node
1771                            if attr.value == id:
1772                                result = node
1773                            elif not node._magic_id_nodes:
1774                                break
1775                    elif info.isId(attr.name):
1776                        self._id_cache[attr.value] = node
1777                        if attr.value == id:
1778                            result = node
1779                        elif not node._magic_id_nodes:
1780                            break
1781                    elif attr._is_id:
1782                        self._id_cache[attr.value] = node
1783                        if attr.value == id:
1784                            result = node
1785                        elif node._magic_id_nodes == 1:
1786                            break
1787            elif node._magic_id_nodes:
1788                for attr in node.attributes.values():
1789                    if attr._is_id:
1790                        self._id_cache[attr.value] = node
1791                        if attr.value == id:
1792                            result = node
1793            if result is not None:
1794                break
1795        return result
1796
1797    def getElementsByTagName(self, name):
1798        return _get_elements_by_tagName_helper(self, name, NodeList())
1799
1800    def getElementsByTagNameNS(self, namespaceURI, localName):
1801        return _get_elements_by_tagName_ns_helper(
1802            self, namespaceURI, localName, NodeList())
1803
1804    def isSupported(self, feature, version):
1805        return self.implementation.hasFeature(feature, version)
1806
1807    def importNode(self, node, deep):
1808        if node.nodeType == Node.DOCUMENT_NODE:
1809            raise xml.dom.NotSupportedErr("cannot import document nodes")
1810        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1811            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1812        return _clone_node(node, deep, self)
1813
1814    def writexml(self, writer, indent="", addindent="", newl="", encoding=None,
1815                 standalone=None):
1816        declarations = []
1817
1818        if encoding:
1819            declarations.append(f'encoding="{encoding}"')
1820        if standalone is not None:
1821            declarations.append(f'standalone="{"yes" if standalone else "no"}"')
1822
1823        writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}')
1824
1825        for node in self.childNodes:
1826            node.writexml(writer, indent, addindent, newl)
1827
1828    # DOM Level 3 (WD 9 April 2002)
1829
1830    def renameNode(self, n, namespaceURI, name):
1831        if n.ownerDocument is not self:
1832            raise xml.dom.WrongDocumentErr(
1833                "cannot rename nodes from other documents;\n"
1834                "expected %s,\nfound %s" % (self, n.ownerDocument))
1835        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1836            raise xml.dom.NotSupportedErr(
1837                "renameNode() only applies to element and attribute nodes")
1838        if namespaceURI != EMPTY_NAMESPACE:
1839            if ':' in name:
1840                prefix, localName = name.split(':', 1)
1841                if (  prefix == "xmlns"
1842                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1843                    raise xml.dom.NamespaceErr(
1844                        "illegal use of 'xmlns' prefix")
1845            else:
1846                if (  name == "xmlns"
1847                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1848                      and n.nodeType == Node.ATTRIBUTE_NODE):
1849                    raise xml.dom.NamespaceErr(
1850                        "illegal use of the 'xmlns' attribute")
1851                prefix = None
1852                localName = name
1853        else:
1854            prefix = None
1855            localName = None
1856        if n.nodeType == Node.ATTRIBUTE_NODE:
1857            element = n.ownerElement
1858            if element is not None:
1859                is_id = n._is_id
1860                element.removeAttributeNode(n)
1861        else:
1862            element = None
1863        n.prefix = prefix
1864        n._localName = localName
1865        n.namespaceURI = namespaceURI
1866        n.nodeName = name
1867        if n.nodeType == Node.ELEMENT_NODE:
1868            n.tagName = name
1869        else:
1870            # attribute node
1871            n.name = name
1872            if element is not None:
1873                element.setAttributeNode(n)
1874                if is_id:
1875                    element.setIdAttributeNode(n)
1876        # It's not clear from a semantic perspective whether we should
1877        # call the user data handlers for the NODE_RENAMED event since
1878        # we're re-using the existing node.  The draft spec has been
1879        # interpreted as meaning "no, don't call the handler unless a
1880        # new node is created."
1881        return n
1882
1883defproperty(Document, "documentElement",
1884            doc="Top-level element of this document.")
1885
1886
1887def _clone_node(node, deep, newOwnerDocument):
1888    """
1889    Clone a node and give it the new owner document.
1890    Called by Node.cloneNode and Document.importNode
1891    """
1892    if node.ownerDocument.isSameNode(newOwnerDocument):
1893        operation = xml.dom.UserDataHandler.NODE_CLONED
1894    else:
1895        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1896    if node.nodeType == Node.ELEMENT_NODE:
1897        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1898                                                 node.nodeName)
1899        for attr in node.attributes.values():
1900            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1901            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1902            a.specified = attr.specified
1903
1904        if deep:
1905            for child in node.childNodes:
1906                c = _clone_node(child, deep, newOwnerDocument)
1907                clone.appendChild(c)
1908
1909    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1910        clone = newOwnerDocument.createDocumentFragment()
1911        if deep:
1912            for child in node.childNodes:
1913                c = _clone_node(child, deep, newOwnerDocument)
1914                clone.appendChild(c)
1915
1916    elif node.nodeType == Node.TEXT_NODE:
1917        clone = newOwnerDocument.createTextNode(node.data)
1918    elif node.nodeType == Node.CDATA_SECTION_NODE:
1919        clone = newOwnerDocument.createCDATASection(node.data)
1920    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1921        clone = newOwnerDocument.createProcessingInstruction(node.target,
1922                                                             node.data)
1923    elif node.nodeType == Node.COMMENT_NODE:
1924        clone = newOwnerDocument.createComment(node.data)
1925    elif node.nodeType == Node.ATTRIBUTE_NODE:
1926        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1927                                                   node.nodeName)
1928        clone.specified = True
1929        clone.value = node.value
1930    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1931        assert node.ownerDocument is not newOwnerDocument
1932        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1933        clone = newOwnerDocument.implementation.createDocumentType(
1934            node.name, node.publicId, node.systemId)
1935        clone.ownerDocument = newOwnerDocument
1936        if deep:
1937            clone.entities._seq = []
1938            clone.notations._seq = []
1939            for n in node.notations._seq:
1940                notation = Notation(n.nodeName, n.publicId, n.systemId)
1941                notation.ownerDocument = newOwnerDocument
1942                clone.notations._seq.append(notation)
1943                if hasattr(n, '_call_user_data_handler'):
1944                    n._call_user_data_handler(operation, n, notation)
1945            for e in node.entities._seq:
1946                entity = Entity(e.nodeName, e.publicId, e.systemId,
1947                                e.notationName)
1948                entity.actualEncoding = e.actualEncoding
1949                entity.encoding = e.encoding
1950                entity.version = e.version
1951                entity.ownerDocument = newOwnerDocument
1952                clone.entities._seq.append(entity)
1953                if hasattr(e, '_call_user_data_handler'):
1954                    e._call_user_data_handler(operation, e, entity)
1955    else:
1956        # Note the cloning of Document and DocumentType nodes is
1957        # implementation specific.  minidom handles those cases
1958        # directly in the cloneNode() methods.
1959        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1960
1961    # Check for _call_user_data_handler() since this could conceivably
1962    # used with other DOM implementations (one of the FourThought
1963    # DOMs, perhaps?).
1964    if hasattr(node, '_call_user_data_handler'):
1965        node._call_user_data_handler(operation, node, clone)
1966    return clone
1967
1968
1969def _nssplit(qualifiedName):
1970    fields = qualifiedName.split(':', 1)
1971    if len(fields) == 2:
1972        return fields
1973    else:
1974        return (None, fields[0])
1975
1976
1977def _do_pulldom_parse(func, args, kwargs):
1978    events = func(*args, **kwargs)
1979    toktype, rootNode = events.getEvent()
1980    events.expandNode(rootNode)
1981    events.clear()
1982    return rootNode
1983
1984def parse(file, parser=None, bufsize=None):
1985    """Parse a file into a DOM by filename or file object."""
1986    if parser is None and not bufsize:
1987        from xml.dom import expatbuilder
1988        return expatbuilder.parse(file)
1989    else:
1990        from xml.dom import pulldom
1991        return _do_pulldom_parse(pulldom.parse, (file,),
1992            {'parser': parser, 'bufsize': bufsize})
1993
1994def parseString(string, parser=None):
1995    """Parse a file into a DOM from a string."""
1996    if parser is None:
1997        from xml.dom import expatbuilder
1998        return expatbuilder.parseString(string)
1999    else:
2000        from xml.dom import pulldom
2001        return _do_pulldom_parse(pulldom.parseString, (string,),
2002                                 {'parser': parser})
2003
2004def getDOMImplementation(features=None):
2005    if features:
2006        if isinstance(features, str):
2007            features = domreg._parse_feature_string(features)
2008        for f, v in features:
2009            if not Document.implementation.hasFeature(f, v):
2010                return None
2011    return Document.implementation
2012