1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import io
19import xml.dom
20
21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22from xml.dom.minicompat import *
23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25# This is used by the ID-cache invalidation checks; the list isn't
26# actually complete, since the nodes being checked will never be the
27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
28# the node being added or removed, not the node being modified.)
29#
30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31                            xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34class Node(xml.dom.Node):
35    namespaceURI = None # this is non-null only for elements and attributes
36    parentNode = None
37    ownerDocument = None
38    nextSibling = None
39    previousSibling = None
40
41    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43    def __bool__(self):
44        return True
45
46    def toxml(self, encoding=None):
47        return self.toprettyxml("", "", encoding)
48
49    def toprettyxml(self, indent="\t", newl="\n", encoding=None):
50        if encoding is None:
51            writer = io.StringIO()
52        else:
53            writer = io.TextIOWrapper(io.BytesIO(),
54                                      encoding=encoding,
55                                      errors="xmlcharrefreplace",
56                                      newline='\n')
57        if self.nodeType == Node.DOCUMENT_NODE:
58            # Can pass encoding only to document, to put it into XML header
59            self.writexml(writer, "", indent, newl, encoding)
60        else:
61            self.writexml(writer, "", indent, newl)
62        if encoding is None:
63            return writer.getvalue()
64        else:
65            return writer.detach().getvalue()
66
67    def hasChildNodes(self):
68        return bool(self.childNodes)
69
70    def _get_childNodes(self):
71        return self.childNodes
72
73    def _get_firstChild(self):
74        if self.childNodes:
75            return self.childNodes[0]
76
77    def _get_lastChild(self):
78        if self.childNodes:
79            return self.childNodes[-1]
80
81    def insertBefore(self, newChild, refChild):
82        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
83            for c in tuple(newChild.childNodes):
84                self.insertBefore(c, refChild)
85            ### The DOM does not clearly specify what to return in this case
86            return newChild
87        if newChild.nodeType not in self._child_node_types:
88            raise xml.dom.HierarchyRequestErr(
89                "%s cannot be child of %s" % (repr(newChild), repr(self)))
90        if newChild.parentNode is not None:
91            newChild.parentNode.removeChild(newChild)
92        if refChild is None:
93            self.appendChild(newChild)
94        else:
95            try:
96                index = self.childNodes.index(refChild)
97            except ValueError:
98                raise xml.dom.NotFoundErr()
99            if newChild.nodeType in _nodeTypes_with_children:
100                _clear_id_cache(self)
101            self.childNodes.insert(index, newChild)
102            newChild.nextSibling = refChild
103            refChild.previousSibling = newChild
104            if index:
105                node = self.childNodes[index-1]
106                node.nextSibling = newChild
107                newChild.previousSibling = node
108            else:
109                newChild.previousSibling = None
110            newChild.parentNode = self
111        return newChild
112
113    def appendChild(self, node):
114        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
115            for c in tuple(node.childNodes):
116                self.appendChild(c)
117            ### The DOM does not clearly specify what to return in this case
118            return node
119        if node.nodeType not in self._child_node_types:
120            raise xml.dom.HierarchyRequestErr(
121                "%s cannot be child of %s" % (repr(node), repr(self)))
122        elif node.nodeType in _nodeTypes_with_children:
123            _clear_id_cache(self)
124        if node.parentNode is not None:
125            node.parentNode.removeChild(node)
126        _append_child(self, node)
127        node.nextSibling = None
128        return node
129
130    def replaceChild(self, newChild, oldChild):
131        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
132            refChild = oldChild.nextSibling
133            self.removeChild(oldChild)
134            return self.insertBefore(newChild, refChild)
135        if newChild.nodeType not in self._child_node_types:
136            raise xml.dom.HierarchyRequestErr(
137                "%s cannot be child of %s" % (repr(newChild), repr(self)))
138        if newChild is oldChild:
139            return
140        if newChild.parentNode is not None:
141            newChild.parentNode.removeChild(newChild)
142        try:
143            index = self.childNodes.index(oldChild)
144        except ValueError:
145            raise xml.dom.NotFoundErr()
146        self.childNodes[index] = newChild
147        newChild.parentNode = self
148        oldChild.parentNode = None
149        if (newChild.nodeType in _nodeTypes_with_children
150            or oldChild.nodeType in _nodeTypes_with_children):
151            _clear_id_cache(self)
152        newChild.nextSibling = oldChild.nextSibling
153        newChild.previousSibling = oldChild.previousSibling
154        oldChild.nextSibling = None
155        oldChild.previousSibling = None
156        if newChild.previousSibling:
157            newChild.previousSibling.nextSibling = newChild
158        if newChild.nextSibling:
159            newChild.nextSibling.previousSibling = newChild
160        return oldChild
161
162    def removeChild(self, oldChild):
163        try:
164            self.childNodes.remove(oldChild)
165        except ValueError:
166            raise xml.dom.NotFoundErr()
167        if oldChild.nextSibling is not None:
168            oldChild.nextSibling.previousSibling = oldChild.previousSibling
169        if oldChild.previousSibling is not None:
170            oldChild.previousSibling.nextSibling = oldChild.nextSibling
171        oldChild.nextSibling = oldChild.previousSibling = None
172        if oldChild.nodeType in _nodeTypes_with_children:
173            _clear_id_cache(self)
174
175        oldChild.parentNode = None
176        return oldChild
177
178    def normalize(self):
179        L = []
180        for child in self.childNodes:
181            if child.nodeType == Node.TEXT_NODE:
182                if not child.data:
183                    # empty text node; discard
184                    if L:
185                        L[-1].nextSibling = child.nextSibling
186                    if child.nextSibling:
187                        child.nextSibling.previousSibling = child.previousSibling
188                    child.unlink()
189                elif L and L[-1].nodeType == child.nodeType:
190                    # collapse text node
191                    node = L[-1]
192                    node.data = node.data + child.data
193                    node.nextSibling = child.nextSibling
194                    if child.nextSibling:
195                        child.nextSibling.previousSibling = node
196                    child.unlink()
197                else:
198                    L.append(child)
199            else:
200                L.append(child)
201                if child.nodeType == Node.ELEMENT_NODE:
202                    child.normalize()
203        self.childNodes[:] = L
204
205    def cloneNode(self, deep):
206        return _clone_node(self, deep, self.ownerDocument or self)
207
208    def isSupported(self, feature, version):
209        return self.ownerDocument.implementation.hasFeature(feature, version)
210
211    def _get_localName(self):
212        # Overridden in Element and Attr where localName can be Non-Null
213        return None
214
215    # Node interfaces from Level 3 (WD 9 April 2002)
216
217    def isSameNode(self, other):
218        return self is other
219
220    def getInterface(self, feature):
221        if self.isSupported(feature, None):
222            return self
223        else:
224            return None
225
226    # The "user data" functions use a dictionary that is only present
227    # if some user data has been set, so be careful not to assume it
228    # exists.
229
230    def getUserData(self, key):
231        try:
232            return self._user_data[key][0]
233        except (AttributeError, KeyError):
234            return None
235
236    def setUserData(self, key, data, handler):
237        old = None
238        try:
239            d = self._user_data
240        except AttributeError:
241            d = {}
242            self._user_data = d
243        if key in d:
244            old = d[key][0]
245        if data is None:
246            # ignore handlers passed for None
247            handler = None
248            if old is not None:
249                del d[key]
250        else:
251            d[key] = (data, handler)
252        return old
253
254    def _call_user_data_handler(self, operation, src, dst):
255        if hasattr(self, "_user_data"):
256            for key, (data, handler) in list(self._user_data.items()):
257                if handler is not None:
258                    handler.handle(operation, key, data, src, dst)
259
260    # minidom-specific API:
261
262    def unlink(self):
263        self.parentNode = self.ownerDocument = None
264        if self.childNodes:
265            for child in self.childNodes:
266                child.unlink()
267            self.childNodes = NodeList()
268        self.previousSibling = None
269        self.nextSibling = None
270
271    # A Node is its own context manager, to ensure that an unlink() call occurs.
272    # This is similar to how a file object works.
273    def __enter__(self):
274        return self
275
276    def __exit__(self, et, ev, tb):
277        self.unlink()
278
279defproperty(Node, "firstChild", doc="First child node, or None.")
280defproperty(Node, "lastChild",  doc="Last child node, or None.")
281defproperty(Node, "localName",  doc="Namespace-local name of this node.")
282
283
284def _append_child(self, node):
285    # fast path with less checks; usable by DOM builders if careful
286    childNodes = self.childNodes
287    if childNodes:
288        last = childNodes[-1]
289        node.previousSibling = last
290        last.nextSibling = node
291    childNodes.append(node)
292    node.parentNode = self
293
294def _in_document(node):
295    # return True iff node is part of a document tree
296    while node is not None:
297        if node.nodeType == Node.DOCUMENT_NODE:
298            return True
299        node = node.parentNode
300    return False
301
302def _write_data(writer, data):
303    "Writes datachars to writer."
304    if data:
305        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
306                    replace("\"", "&quot;").replace(">", "&gt;")
307        writer.write(data)
308
309def _get_elements_by_tagName_helper(parent, name, rc):
310    for node in parent.childNodes:
311        if node.nodeType == Node.ELEMENT_NODE and \
312            (name == "*" or node.tagName == name):
313            rc.append(node)
314        _get_elements_by_tagName_helper(node, name, rc)
315    return rc
316
317def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
318    for node in parent.childNodes:
319        if node.nodeType == Node.ELEMENT_NODE:
320            if ((localName == "*" or node.localName == localName) and
321                (nsURI == "*" or node.namespaceURI == nsURI)):
322                rc.append(node)
323            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
324    return rc
325
326class DocumentFragment(Node):
327    nodeType = Node.DOCUMENT_FRAGMENT_NODE
328    nodeName = "#document-fragment"
329    nodeValue = None
330    attributes = None
331    parentNode = None
332    _child_node_types = (Node.ELEMENT_NODE,
333                         Node.TEXT_NODE,
334                         Node.CDATA_SECTION_NODE,
335                         Node.ENTITY_REFERENCE_NODE,
336                         Node.PROCESSING_INSTRUCTION_NODE,
337                         Node.COMMENT_NODE,
338                         Node.NOTATION_NODE)
339
340    def __init__(self):
341        self.childNodes = NodeList()
342
343
344class Attr(Node):
345    __slots__=('_name', '_value', 'namespaceURI',
346               '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
347    nodeType = Node.ATTRIBUTE_NODE
348    attributes = None
349    specified = False
350    _is_id = False
351
352    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
353
354    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
355                 prefix=None):
356        self.ownerElement = None
357        self._name = qName
358        self.namespaceURI = namespaceURI
359        self._prefix = prefix
360        self.childNodes = NodeList()
361
362        # Add the single child node that represents the value of the attr
363        self.childNodes.append(Text())
364
365        # nodeValue and value are set elsewhere
366
367    def _get_localName(self):
368        try:
369            return self._localName
370        except AttributeError:
371            return self.nodeName.split(":", 1)[-1]
372
373    def _get_specified(self):
374        return self.specified
375
376    def _get_name(self):
377        return self._name
378
379    def _set_name(self, value):
380        self._name = value
381        if self.ownerElement is not None:
382            _clear_id_cache(self.ownerElement)
383
384    nodeName = name = property(_get_name, _set_name)
385
386    def _get_value(self):
387        return self._value
388
389    def _set_value(self, value):
390        self._value = value
391        self.childNodes[0].data = value
392        if self.ownerElement is not None:
393            _clear_id_cache(self.ownerElement)
394        self.childNodes[0].data = value
395
396    nodeValue = value = property(_get_value, _set_value)
397
398    def _get_prefix(self):
399        return self._prefix
400
401    def _set_prefix(self, prefix):
402        nsuri = self.namespaceURI
403        if prefix == "xmlns":
404            if nsuri and nsuri != XMLNS_NAMESPACE:
405                raise xml.dom.NamespaceErr(
406                    "illegal use of 'xmlns' prefix for the wrong namespace")
407        self._prefix = prefix
408        if prefix is None:
409            newName = self.localName
410        else:
411            newName = "%s:%s" % (prefix, self.localName)
412        if self.ownerElement:
413            _clear_id_cache(self.ownerElement)
414        self.name = newName
415
416    prefix = property(_get_prefix, _set_prefix)
417
418    def unlink(self):
419        # This implementation does not call the base implementation
420        # since most of that is not needed, and the expense of the
421        # method call is not warranted.  We duplicate the removal of
422        # children, but that's all we needed from the base class.
423        elem = self.ownerElement
424        if elem is not None:
425            del elem._attrs[self.nodeName]
426            del elem._attrsNS[(self.namespaceURI, self.localName)]
427            if self._is_id:
428                self._is_id = False
429                elem._magic_id_nodes -= 1
430                self.ownerDocument._magic_id_count -= 1
431        for child in self.childNodes:
432            child.unlink()
433        del self.childNodes[:]
434
435    def _get_isId(self):
436        if self._is_id:
437            return True
438        doc = self.ownerDocument
439        elem = self.ownerElement
440        if doc is None or elem is None:
441            return False
442
443        info = doc._get_elem_info(elem)
444        if info is None:
445            return False
446        if self.namespaceURI:
447            return info.isIdNS(self.namespaceURI, self.localName)
448        else:
449            return info.isId(self.nodeName)
450
451    def _get_schemaType(self):
452        doc = self.ownerDocument
453        elem = self.ownerElement
454        if doc is None or elem is None:
455            return _no_type
456
457        info = doc._get_elem_info(elem)
458        if info is None:
459            return _no_type
460        if self.namespaceURI:
461            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
462        else:
463            return info.getAttributeType(self.nodeName)
464
465defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
466defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
467defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
468
469
470class NamedNodeMap(object):
471    """The attribute list is a transient interface to the underlying
472    dictionaries.  Mutations here will change the underlying element's
473    dictionary.
474
475    Ordering is imposed artificially and does not reflect the order of
476    attributes as found in an input document.
477    """
478
479    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
480
481    def __init__(self, attrs, attrsNS, ownerElement):
482        self._attrs = attrs
483        self._attrsNS = attrsNS
484        self._ownerElement = ownerElement
485
486    def _get_length(self):
487        return len(self._attrs)
488
489    def item(self, index):
490        try:
491            return self[list(self._attrs.keys())[index]]
492        except IndexError:
493            return None
494
495    def items(self):
496        L = []
497        for node in self._attrs.values():
498            L.append((node.nodeName, node.value))
499        return L
500
501    def itemsNS(self):
502        L = []
503        for node in self._attrs.values():
504            L.append(((node.namespaceURI, node.localName), node.value))
505        return L
506
507    def __contains__(self, key):
508        if isinstance(key, str):
509            return key in self._attrs
510        else:
511            return key in self._attrsNS
512
513    def keys(self):
514        return self._attrs.keys()
515
516    def keysNS(self):
517        return self._attrsNS.keys()
518
519    def values(self):
520        return self._attrs.values()
521
522    def get(self, name, value=None):
523        return self._attrs.get(name, value)
524
525    __len__ = _get_length
526
527    def _cmp(self, other):
528        if self._attrs is getattr(other, "_attrs", None):
529            return 0
530        else:
531            return (id(self) > id(other)) - (id(self) < id(other))
532
533    def __eq__(self, other):
534        return self._cmp(other) == 0
535
536    def __ge__(self, other):
537        return self._cmp(other) >= 0
538
539    def __gt__(self, other):
540        return self._cmp(other) > 0
541
542    def __le__(self, other):
543        return self._cmp(other) <= 0
544
545    def __lt__(self, other):
546        return self._cmp(other) < 0
547
548    def __getitem__(self, attname_or_tuple):
549        if isinstance(attname_or_tuple, tuple):
550            return self._attrsNS[attname_or_tuple]
551        else:
552            return self._attrs[attname_or_tuple]
553
554    # same as set
555    def __setitem__(self, attname, value):
556        if isinstance(value, str):
557            try:
558                node = self._attrs[attname]
559            except KeyError:
560                node = Attr(attname)
561                node.ownerDocument = self._ownerElement.ownerDocument
562                self.setNamedItem(node)
563            node.value = value
564        else:
565            if not isinstance(value, Attr):
566                raise TypeError("value must be a string or Attr object")
567            node = value
568            self.setNamedItem(node)
569
570    def getNamedItem(self, name):
571        try:
572            return self._attrs[name]
573        except KeyError:
574            return None
575
576    def getNamedItemNS(self, namespaceURI, localName):
577        try:
578            return self._attrsNS[(namespaceURI, localName)]
579        except KeyError:
580            return None
581
582    def removeNamedItem(self, name):
583        n = self.getNamedItem(name)
584        if n is not None:
585            _clear_id_cache(self._ownerElement)
586            del self._attrs[n.nodeName]
587            del self._attrsNS[(n.namespaceURI, n.localName)]
588            if hasattr(n, 'ownerElement'):
589                n.ownerElement = None
590            return n
591        else:
592            raise xml.dom.NotFoundErr()
593
594    def removeNamedItemNS(self, namespaceURI, localName):
595        n = self.getNamedItemNS(namespaceURI, localName)
596        if n is not None:
597            _clear_id_cache(self._ownerElement)
598            del self._attrsNS[(n.namespaceURI, n.localName)]
599            del self._attrs[n.nodeName]
600            if hasattr(n, 'ownerElement'):
601                n.ownerElement = None
602            return n
603        else:
604            raise xml.dom.NotFoundErr()
605
606    def setNamedItem(self, node):
607        if not isinstance(node, Attr):
608            raise xml.dom.HierarchyRequestErr(
609                "%s cannot be child of %s" % (repr(node), repr(self)))
610        old = self._attrs.get(node.name)
611        if old:
612            old.unlink()
613        self._attrs[node.name] = node
614        self._attrsNS[(node.namespaceURI, node.localName)] = node
615        node.ownerElement = self._ownerElement
616        _clear_id_cache(node.ownerElement)
617        return old
618
619    def setNamedItemNS(self, node):
620        return self.setNamedItem(node)
621
622    def __delitem__(self, attname_or_tuple):
623        node = self[attname_or_tuple]
624        _clear_id_cache(node.ownerElement)
625        node.unlink()
626
627    def __getstate__(self):
628        return self._attrs, self._attrsNS, self._ownerElement
629
630    def __setstate__(self, state):
631        self._attrs, self._attrsNS, self._ownerElement = state
632
633defproperty(NamedNodeMap, "length",
634            doc="Number of nodes in the NamedNodeMap.")
635
636AttributeList = NamedNodeMap
637
638
639class TypeInfo(object):
640    __slots__ = 'namespace', 'name'
641
642    def __init__(self, namespace, name):
643        self.namespace = namespace
644        self.name = name
645
646    def __repr__(self):
647        if self.namespace:
648            return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
649                                          self.namespace)
650        else:
651            return "<%s %r>" % (self.__class__.__name__, self.name)
652
653    def _get_name(self):
654        return self.name
655
656    def _get_namespace(self):
657        return self.namespace
658
659_no_type = TypeInfo(None, None)
660
661class Element(Node):
662    __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
663               'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
664               'nextSibling', 'previousSibling')
665    nodeType = Node.ELEMENT_NODE
666    nodeValue = None
667    schemaType = _no_type
668
669    _magic_id_nodes = 0
670
671    _child_node_types = (Node.ELEMENT_NODE,
672                         Node.PROCESSING_INSTRUCTION_NODE,
673                         Node.COMMENT_NODE,
674                         Node.TEXT_NODE,
675                         Node.CDATA_SECTION_NODE,
676                         Node.ENTITY_REFERENCE_NODE)
677
678    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
679                 localName=None):
680        self.parentNode = None
681        self.tagName = self.nodeName = tagName
682        self.prefix = prefix
683        self.namespaceURI = namespaceURI
684        self.childNodes = NodeList()
685        self.nextSibling = self.previousSibling = None
686
687        # Attribute dictionaries are lazily created
688        # attributes are double-indexed:
689        #    tagName -> Attribute
690        #    URI,localName -> Attribute
691        # in the future: consider lazy generation
692        # of attribute objects this is too tricky
693        # for now because of headaches with
694        # namespaces.
695        self._attrs = None
696        self._attrsNS = None
697
698    def _ensure_attributes(self):
699        if self._attrs is None:
700            self._attrs = {}
701            self._attrsNS = {}
702
703    def _get_localName(self):
704        try:
705            return self._localName
706        except AttributeError:
707            return self.tagName.split(":", 1)[-1]
708
709    def _get_tagName(self):
710        return self.tagName
711
712    def unlink(self):
713        if self._attrs is not None:
714            for attr in list(self._attrs.values()):
715                attr.unlink()
716        self._attrs = None
717        self._attrsNS = None
718        Node.unlink(self)
719
720    def getAttribute(self, attname):
721        if self._attrs is None:
722            return ""
723        try:
724            return self._attrs[attname].value
725        except KeyError:
726            return ""
727
728    def getAttributeNS(self, namespaceURI, localName):
729        if self._attrsNS is None:
730            return ""
731        try:
732            return self._attrsNS[(namespaceURI, localName)].value
733        except KeyError:
734            return ""
735
736    def setAttribute(self, attname, value):
737        attr = self.getAttributeNode(attname)
738        if attr is None:
739            attr = Attr(attname)
740            attr.value = value # also sets nodeValue
741            attr.ownerDocument = self.ownerDocument
742            self.setAttributeNode(attr)
743        elif value != attr.value:
744            attr.value = value
745            if attr.isId:
746                _clear_id_cache(self)
747
748    def setAttributeNS(self, namespaceURI, qualifiedName, value):
749        prefix, localname = _nssplit(qualifiedName)
750        attr = self.getAttributeNodeNS(namespaceURI, localname)
751        if attr is None:
752            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
753            attr.value = value
754            attr.ownerDocument = self.ownerDocument
755            self.setAttributeNode(attr)
756        else:
757            if value != attr.value:
758                attr.value = value
759                if attr.isId:
760                    _clear_id_cache(self)
761            if attr.prefix != prefix:
762                attr.prefix = prefix
763                attr.nodeName = qualifiedName
764
765    def getAttributeNode(self, attrname):
766        if self._attrs is None:
767            return None
768        return self._attrs.get(attrname)
769
770    def getAttributeNodeNS(self, namespaceURI, localName):
771        if self._attrsNS is None:
772            return None
773        return self._attrsNS.get((namespaceURI, localName))
774
775    def setAttributeNode(self, attr):
776        if attr.ownerElement not in (None, self):
777            raise xml.dom.InuseAttributeErr("attribute node already owned")
778        self._ensure_attributes()
779        old1 = self._attrs.get(attr.name, None)
780        if old1 is not None:
781            self.removeAttributeNode(old1)
782        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
783        if old2 is not None and old2 is not old1:
784            self.removeAttributeNode(old2)
785        _set_attribute_node(self, attr)
786
787        if old1 is not attr:
788            # It might have already been part of this node, in which case
789            # it doesn't represent a change, and should not be returned.
790            return old1
791        if old2 is not attr:
792            return old2
793
794    setAttributeNodeNS = setAttributeNode
795
796    def removeAttribute(self, name):
797        if self._attrsNS is None:
798            raise xml.dom.NotFoundErr()
799        try:
800            attr = self._attrs[name]
801        except KeyError:
802            raise xml.dom.NotFoundErr()
803        self.removeAttributeNode(attr)
804
805    def removeAttributeNS(self, namespaceURI, localName):
806        if self._attrsNS is None:
807            raise xml.dom.NotFoundErr()
808        try:
809            attr = self._attrsNS[(namespaceURI, localName)]
810        except KeyError:
811            raise xml.dom.NotFoundErr()
812        self.removeAttributeNode(attr)
813
814    def removeAttributeNode(self, node):
815        if node is None:
816            raise xml.dom.NotFoundErr()
817        try:
818            self._attrs[node.name]
819        except KeyError:
820            raise xml.dom.NotFoundErr()
821        _clear_id_cache(self)
822        node.unlink()
823        # Restore this since the node is still useful and otherwise
824        # unlinked
825        node.ownerDocument = self.ownerDocument
826        return node
827
828    removeAttributeNodeNS = removeAttributeNode
829
830    def hasAttribute(self, name):
831        if self._attrs is None:
832            return False
833        return name in self._attrs
834
835    def hasAttributeNS(self, namespaceURI, localName):
836        if self._attrsNS is None:
837            return False
838        return (namespaceURI, localName) in self._attrsNS
839
840    def getElementsByTagName(self, name):
841        return _get_elements_by_tagName_helper(self, name, NodeList())
842
843    def getElementsByTagNameNS(self, namespaceURI, localName):
844        return _get_elements_by_tagName_ns_helper(
845            self, namespaceURI, localName, NodeList())
846
847    def __repr__(self):
848        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
849
850    def writexml(self, writer, indent="", addindent="", newl=""):
851        # indent = current indentation
852        # addindent = indentation to add to higher levels
853        # newl = newline string
854        writer.write(indent+"<" + self.tagName)
855
856        attrs = self._get_attributes()
857
858        for a_name in attrs.keys():
859            writer.write(" %s=\"" % a_name)
860            _write_data(writer, attrs[a_name].value)
861            writer.write("\"")
862        if self.childNodes:
863            writer.write(">")
864            if (len(self.childNodes) == 1 and
865                self.childNodes[0].nodeType in (
866                        Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
867                self.childNodes[0].writexml(writer, '', '', '')
868            else:
869                writer.write(newl)
870                for node in self.childNodes:
871                    node.writexml(writer, indent+addindent, addindent, newl)
872                writer.write(indent)
873            writer.write("</%s>%s" % (self.tagName, newl))
874        else:
875            writer.write("/>%s"%(newl))
876
877    def _get_attributes(self):
878        self._ensure_attributes()
879        return NamedNodeMap(self._attrs, self._attrsNS, self)
880
881    def hasAttributes(self):
882        if self._attrs:
883            return True
884        else:
885            return False
886
887    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
888
889    def setIdAttribute(self, name):
890        idAttr = self.getAttributeNode(name)
891        self.setIdAttributeNode(idAttr)
892
893    def setIdAttributeNS(self, namespaceURI, localName):
894        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
895        self.setIdAttributeNode(idAttr)
896
897    def setIdAttributeNode(self, idAttr):
898        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
899            raise xml.dom.NotFoundErr()
900        if _get_containing_entref(self) is not None:
901            raise xml.dom.NoModificationAllowedErr()
902        if not idAttr._is_id:
903            idAttr._is_id = True
904            self._magic_id_nodes += 1
905            self.ownerDocument._magic_id_count += 1
906            _clear_id_cache(self)
907
908defproperty(Element, "attributes",
909            doc="NamedNodeMap of attributes on the element.")
910defproperty(Element, "localName",
911            doc="Namespace-local name of this element.")
912
913
914def _set_attribute_node(element, attr):
915    _clear_id_cache(element)
916    element._ensure_attributes()
917    element._attrs[attr.name] = attr
918    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
919
920    # This creates a circular reference, but Element.unlink()
921    # breaks the cycle since the references to the attribute
922    # dictionaries are tossed.
923    attr.ownerElement = element
924
925class Childless:
926    """Mixin that makes childless-ness easy to implement and avoids
927    the complexity of the Node methods that deal with children.
928    """
929    __slots__ = ()
930
931    attributes = None
932    childNodes = EmptyNodeList()
933    firstChild = None
934    lastChild = None
935
936    def _get_firstChild(self):
937        return None
938
939    def _get_lastChild(self):
940        return None
941
942    def appendChild(self, node):
943        raise xml.dom.HierarchyRequestErr(
944            self.nodeName + " nodes cannot have children")
945
946    def hasChildNodes(self):
947        return False
948
949    def insertBefore(self, newChild, refChild):
950        raise xml.dom.HierarchyRequestErr(
951            self.nodeName + " nodes do not have children")
952
953    def removeChild(self, oldChild):
954        raise xml.dom.NotFoundErr(
955            self.nodeName + " nodes do not have children")
956
957    def normalize(self):
958        # For childless nodes, normalize() has nothing to do.
959        pass
960
961    def replaceChild(self, newChild, oldChild):
962        raise xml.dom.HierarchyRequestErr(
963            self.nodeName + " nodes do not have children")
964
965
966class ProcessingInstruction(Childless, Node):
967    nodeType = Node.PROCESSING_INSTRUCTION_NODE
968    __slots__ = ('target', 'data')
969
970    def __init__(self, target, data):
971        self.target = target
972        self.data = data
973
974    # nodeValue is an alias for data
975    def _get_nodeValue(self):
976        return self.data
977    def _set_nodeValue(self, value):
978        self.data = value
979    nodeValue = property(_get_nodeValue, _set_nodeValue)
980
981    # nodeName is an alias for target
982    def _get_nodeName(self):
983        return self.target
984    def _set_nodeName(self, value):
985        self.target = value
986    nodeName = property(_get_nodeName, _set_nodeName)
987
988    def writexml(self, writer, indent="", addindent="", newl=""):
989        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
990
991
992class CharacterData(Childless, Node):
993    __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
994
995    def __init__(self):
996        self.ownerDocument = self.parentNode = None
997        self.previousSibling = self.nextSibling = None
998        self._data = ''
999        Node.__init__(self)
1000
1001    def _get_length(self):
1002        return len(self.data)
1003    __len__ = _get_length
1004
1005    def _get_data(self):
1006        return self._data
1007    def _set_data(self, data):
1008        self._data = data
1009
1010    data = nodeValue = property(_get_data, _set_data)
1011
1012    def __repr__(self):
1013        data = self.data
1014        if len(data) > 10:
1015            dotdotdot = "..."
1016        else:
1017            dotdotdot = ""
1018        return '<DOM %s node "%r%s">' % (
1019            self.__class__.__name__, data[0:10], dotdotdot)
1020
1021    def substringData(self, offset, count):
1022        if offset < 0:
1023            raise xml.dom.IndexSizeErr("offset cannot be negative")
1024        if offset >= len(self.data):
1025            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1026        if count < 0:
1027            raise xml.dom.IndexSizeErr("count cannot be negative")
1028        return self.data[offset:offset+count]
1029
1030    def appendData(self, arg):
1031        self.data = self.data + arg
1032
1033    def insertData(self, offset, arg):
1034        if offset < 0:
1035            raise xml.dom.IndexSizeErr("offset cannot be negative")
1036        if offset >= len(self.data):
1037            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1038        if arg:
1039            self.data = "%s%s%s" % (
1040                self.data[:offset], arg, self.data[offset:])
1041
1042    def deleteData(self, offset, count):
1043        if offset < 0:
1044            raise xml.dom.IndexSizeErr("offset cannot be negative")
1045        if offset >= len(self.data):
1046            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1047        if count < 0:
1048            raise xml.dom.IndexSizeErr("count cannot be negative")
1049        if count:
1050            self.data = self.data[:offset] + self.data[offset+count:]
1051
1052    def replaceData(self, offset, count, arg):
1053        if offset < 0:
1054            raise xml.dom.IndexSizeErr("offset cannot be negative")
1055        if offset >= len(self.data):
1056            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1057        if count < 0:
1058            raise xml.dom.IndexSizeErr("count cannot be negative")
1059        if count:
1060            self.data = "%s%s%s" % (
1061                self.data[:offset], arg, self.data[offset+count:])
1062
1063defproperty(CharacterData, "length", doc="Length of the string data.")
1064
1065
1066class Text(CharacterData):
1067    __slots__ = ()
1068
1069    nodeType = Node.TEXT_NODE
1070    nodeName = "#text"
1071    attributes = None
1072
1073    def splitText(self, offset):
1074        if offset < 0 or offset > len(self.data):
1075            raise xml.dom.IndexSizeErr("illegal offset value")
1076        newText = self.__class__()
1077        newText.data = self.data[offset:]
1078        newText.ownerDocument = self.ownerDocument
1079        next = self.nextSibling
1080        if self.parentNode and self in self.parentNode.childNodes:
1081            if next is None:
1082                self.parentNode.appendChild(newText)
1083            else:
1084                self.parentNode.insertBefore(newText, next)
1085        self.data = self.data[:offset]
1086        return newText
1087
1088    def writexml(self, writer, indent="", addindent="", newl=""):
1089        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1090
1091    # DOM Level 3 (WD 9 April 2002)
1092
1093    def _get_wholeText(self):
1094        L = [self.data]
1095        n = self.previousSibling
1096        while n is not None:
1097            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1098                L.insert(0, n.data)
1099                n = n.previousSibling
1100            else:
1101                break
1102        n = self.nextSibling
1103        while n is not None:
1104            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1105                L.append(n.data)
1106                n = n.nextSibling
1107            else:
1108                break
1109        return ''.join(L)
1110
1111    def replaceWholeText(self, content):
1112        # XXX This needs to be seriously changed if minidom ever
1113        # supports EntityReference nodes.
1114        parent = self.parentNode
1115        n = self.previousSibling
1116        while n is not None:
1117            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1118                next = n.previousSibling
1119                parent.removeChild(n)
1120                n = next
1121            else:
1122                break
1123        n = self.nextSibling
1124        if not content:
1125            parent.removeChild(self)
1126        while n is not None:
1127            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1128                next = n.nextSibling
1129                parent.removeChild(n)
1130                n = next
1131            else:
1132                break
1133        if content:
1134            self.data = content
1135            return self
1136        else:
1137            return None
1138
1139    def _get_isWhitespaceInElementContent(self):
1140        if self.data.strip():
1141            return False
1142        elem = _get_containing_element(self)
1143        if elem is None:
1144            return False
1145        info = self.ownerDocument._get_elem_info(elem)
1146        if info is None:
1147            return False
1148        else:
1149            return info.isElementContent()
1150
1151defproperty(Text, "isWhitespaceInElementContent",
1152            doc="True iff this text node contains only whitespace"
1153                " and is in element content.")
1154defproperty(Text, "wholeText",
1155            doc="The text of all logically-adjacent text nodes.")
1156
1157
1158def _get_containing_element(node):
1159    c = node.parentNode
1160    while c is not None:
1161        if c.nodeType == Node.ELEMENT_NODE:
1162            return c
1163        c = c.parentNode
1164    return None
1165
1166def _get_containing_entref(node):
1167    c = node.parentNode
1168    while c is not None:
1169        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1170            return c
1171        c = c.parentNode
1172    return None
1173
1174
1175class Comment(CharacterData):
1176    nodeType = Node.COMMENT_NODE
1177    nodeName = "#comment"
1178
1179    def __init__(self, data):
1180        CharacterData.__init__(self)
1181        self._data = data
1182
1183    def writexml(self, writer, indent="", addindent="", newl=""):
1184        if "--" in self.data:
1185            raise ValueError("'--' is not allowed in a comment node")
1186        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1187
1188
1189class CDATASection(Text):
1190    __slots__ = ()
1191
1192    nodeType = Node.CDATA_SECTION_NODE
1193    nodeName = "#cdata-section"
1194
1195    def writexml(self, writer, indent="", addindent="", newl=""):
1196        if self.data.find("]]>") >= 0:
1197            raise ValueError("']]>' not allowed in a CDATA section")
1198        writer.write("<![CDATA[%s]]>" % self.data)
1199
1200
1201class ReadOnlySequentialNamedNodeMap(object):
1202    __slots__ = '_seq',
1203
1204    def __init__(self, seq=()):
1205        # seq should be a list or tuple
1206        self._seq = seq
1207
1208    def __len__(self):
1209        return len(self._seq)
1210
1211    def _get_length(self):
1212        return len(self._seq)
1213
1214    def getNamedItem(self, name):
1215        for n in self._seq:
1216            if n.nodeName == name:
1217                return n
1218
1219    def getNamedItemNS(self, namespaceURI, localName):
1220        for n in self._seq:
1221            if n.namespaceURI == namespaceURI and n.localName == localName:
1222                return n
1223
1224    def __getitem__(self, name_or_tuple):
1225        if isinstance(name_or_tuple, tuple):
1226            node = self.getNamedItemNS(*name_or_tuple)
1227        else:
1228            node = self.getNamedItem(name_or_tuple)
1229        if node is None:
1230            raise KeyError(name_or_tuple)
1231        return node
1232
1233    def item(self, index):
1234        if index < 0:
1235            return None
1236        try:
1237            return self._seq[index]
1238        except IndexError:
1239            return None
1240
1241    def removeNamedItem(self, name):
1242        raise xml.dom.NoModificationAllowedErr(
1243            "NamedNodeMap instance is read-only")
1244
1245    def removeNamedItemNS(self, namespaceURI, localName):
1246        raise xml.dom.NoModificationAllowedErr(
1247            "NamedNodeMap instance is read-only")
1248
1249    def setNamedItem(self, node):
1250        raise xml.dom.NoModificationAllowedErr(
1251            "NamedNodeMap instance is read-only")
1252
1253    def setNamedItemNS(self, node):
1254        raise xml.dom.NoModificationAllowedErr(
1255            "NamedNodeMap instance is read-only")
1256
1257    def __getstate__(self):
1258        return [self._seq]
1259
1260    def __setstate__(self, state):
1261        self._seq = state[0]
1262
1263defproperty(ReadOnlySequentialNamedNodeMap, "length",
1264            doc="Number of entries in the NamedNodeMap.")
1265
1266
1267class Identified:
1268    """Mix-in class that supports the publicId and systemId attributes."""
1269
1270    __slots__ = 'publicId', 'systemId'
1271
1272    def _identified_mixin_init(self, publicId, systemId):
1273        self.publicId = publicId
1274        self.systemId = systemId
1275
1276    def _get_publicId(self):
1277        return self.publicId
1278
1279    def _get_systemId(self):
1280        return self.systemId
1281
1282class DocumentType(Identified, Childless, Node):
1283    nodeType = Node.DOCUMENT_TYPE_NODE
1284    nodeValue = None
1285    name = None
1286    publicId = None
1287    systemId = None
1288    internalSubset = None
1289
1290    def __init__(self, qualifiedName):
1291        self.entities = ReadOnlySequentialNamedNodeMap()
1292        self.notations = ReadOnlySequentialNamedNodeMap()
1293        if qualifiedName:
1294            prefix, localname = _nssplit(qualifiedName)
1295            self.name = localname
1296        self.nodeName = self.name
1297
1298    def _get_internalSubset(self):
1299        return self.internalSubset
1300
1301    def cloneNode(self, deep):
1302        if self.ownerDocument is None:
1303            # it's ok
1304            clone = DocumentType(None)
1305            clone.name = self.name
1306            clone.nodeName = self.name
1307            operation = xml.dom.UserDataHandler.NODE_CLONED
1308            if deep:
1309                clone.entities._seq = []
1310                clone.notations._seq = []
1311                for n in self.notations._seq:
1312                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1313                    clone.notations._seq.append(notation)
1314                    n._call_user_data_handler(operation, n, notation)
1315                for e in self.entities._seq:
1316                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1317                                    e.notationName)
1318                    entity.actualEncoding = e.actualEncoding
1319                    entity.encoding = e.encoding
1320                    entity.version = e.version
1321                    clone.entities._seq.append(entity)
1322                    e._call_user_data_handler(operation, e, entity)
1323            self._call_user_data_handler(operation, self, clone)
1324            return clone
1325        else:
1326            return None
1327
1328    def writexml(self, writer, indent="", addindent="", newl=""):
1329        writer.write("<!DOCTYPE ")
1330        writer.write(self.name)
1331        if self.publicId:
1332            writer.write("%s  PUBLIC '%s'%s  '%s'"
1333                         % (newl, self.publicId, newl, self.systemId))
1334        elif self.systemId:
1335            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1336        if self.internalSubset is not None:
1337            writer.write(" [")
1338            writer.write(self.internalSubset)
1339            writer.write("]")
1340        writer.write(">"+newl)
1341
1342class Entity(Identified, Node):
1343    attributes = None
1344    nodeType = Node.ENTITY_NODE
1345    nodeValue = None
1346
1347    actualEncoding = None
1348    encoding = None
1349    version = None
1350
1351    def __init__(self, name, publicId, systemId, notation):
1352        self.nodeName = name
1353        self.notationName = notation
1354        self.childNodes = NodeList()
1355        self._identified_mixin_init(publicId, systemId)
1356
1357    def _get_actualEncoding(self):
1358        return self.actualEncoding
1359
1360    def _get_encoding(self):
1361        return self.encoding
1362
1363    def _get_version(self):
1364        return self.version
1365
1366    def appendChild(self, newChild):
1367        raise xml.dom.HierarchyRequestErr(
1368            "cannot append children to an entity node")
1369
1370    def insertBefore(self, newChild, refChild):
1371        raise xml.dom.HierarchyRequestErr(
1372            "cannot insert children below an entity node")
1373
1374    def removeChild(self, oldChild):
1375        raise xml.dom.HierarchyRequestErr(
1376            "cannot remove children from an entity node")
1377
1378    def replaceChild(self, newChild, oldChild):
1379        raise xml.dom.HierarchyRequestErr(
1380            "cannot replace children of an entity node")
1381
1382class Notation(Identified, Childless, Node):
1383    nodeType = Node.NOTATION_NODE
1384    nodeValue = None
1385
1386    def __init__(self, name, publicId, systemId):
1387        self.nodeName = name
1388        self._identified_mixin_init(publicId, systemId)
1389
1390
1391class DOMImplementation(DOMImplementationLS):
1392    _features = [("core", "1.0"),
1393                 ("core", "2.0"),
1394                 ("core", None),
1395                 ("xml", "1.0"),
1396                 ("xml", "2.0"),
1397                 ("xml", None),
1398                 ("ls-load", "3.0"),
1399                 ("ls-load", None),
1400                 ]
1401
1402    def hasFeature(self, feature, version):
1403        if version == "":
1404            version = None
1405        return (feature.lower(), version) in self._features
1406
1407    def createDocument(self, namespaceURI, qualifiedName, doctype):
1408        if doctype and doctype.parentNode is not None:
1409            raise xml.dom.WrongDocumentErr(
1410                "doctype object owned by another DOM tree")
1411        doc = self._create_document()
1412
1413        add_root_element = not (namespaceURI is None
1414                                and qualifiedName is None
1415                                and doctype is None)
1416
1417        if not qualifiedName and add_root_element:
1418            # The spec is unclear what to raise here; SyntaxErr
1419            # would be the other obvious candidate. Since Xerces raises
1420            # InvalidCharacterErr, and since SyntaxErr is not listed
1421            # for createDocument, that seems to be the better choice.
1422            # XXX: need to check for illegal characters here and in
1423            # createElement.
1424
1425            # DOM Level III clears this up when talking about the return value
1426            # of this function.  If namespaceURI, qName and DocType are
1427            # Null the document is returned without a document element
1428            # Otherwise if doctype or namespaceURI are not None
1429            # Then we go back to the above problem
1430            raise xml.dom.InvalidCharacterErr("Element with no name")
1431
1432        if add_root_element:
1433            prefix, localname = _nssplit(qualifiedName)
1434            if prefix == "xml" \
1435               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1436                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1437            if prefix and not namespaceURI:
1438                raise xml.dom.NamespaceErr(
1439                    "illegal use of prefix without namespaces")
1440            element = doc.createElementNS(namespaceURI, qualifiedName)
1441            if doctype:
1442                doc.appendChild(doctype)
1443            doc.appendChild(element)
1444
1445        if doctype:
1446            doctype.parentNode = doctype.ownerDocument = doc
1447
1448        doc.doctype = doctype
1449        doc.implementation = self
1450        return doc
1451
1452    def createDocumentType(self, qualifiedName, publicId, systemId):
1453        doctype = DocumentType(qualifiedName)
1454        doctype.publicId = publicId
1455        doctype.systemId = systemId
1456        return doctype
1457
1458    # DOM Level 3 (WD 9 April 2002)
1459
1460    def getInterface(self, feature):
1461        if self.hasFeature(feature, None):
1462            return self
1463        else:
1464            return None
1465
1466    # internal
1467    def _create_document(self):
1468        return Document()
1469
1470class ElementInfo(object):
1471    """Object that represents content-model information for an element.
1472
1473    This implementation is not expected to be used in practice; DOM
1474    builders should provide implementations which do the right thing
1475    using information available to it.
1476
1477    """
1478
1479    __slots__ = 'tagName',
1480
1481    def __init__(self, name):
1482        self.tagName = name
1483
1484    def getAttributeType(self, aname):
1485        return _no_type
1486
1487    def getAttributeTypeNS(self, namespaceURI, localName):
1488        return _no_type
1489
1490    def isElementContent(self):
1491        return False
1492
1493    def isEmpty(self):
1494        """Returns true iff this element is declared to have an EMPTY
1495        content model."""
1496        return False
1497
1498    def isId(self, aname):
1499        """Returns true iff the named attribute is a DTD-style ID."""
1500        return False
1501
1502    def isIdNS(self, namespaceURI, localName):
1503        """Returns true iff the identified attribute is a DTD-style ID."""
1504        return False
1505
1506    def __getstate__(self):
1507        return self.tagName
1508
1509    def __setstate__(self, state):
1510        self.tagName = state
1511
1512def _clear_id_cache(node):
1513    if node.nodeType == Node.DOCUMENT_NODE:
1514        node._id_cache.clear()
1515        node._id_search_stack = None
1516    elif _in_document(node):
1517        node.ownerDocument._id_cache.clear()
1518        node.ownerDocument._id_search_stack= None
1519
1520class Document(Node, DocumentLS):
1521    __slots__ = ('_elem_info', 'doctype',
1522                 '_id_search_stack', 'childNodes', '_id_cache')
1523    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1524                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1525
1526    implementation = DOMImplementation()
1527    nodeType = Node.DOCUMENT_NODE
1528    nodeName = "#document"
1529    nodeValue = None
1530    attributes = None
1531    parentNode = None
1532    previousSibling = nextSibling = None
1533
1534
1535    # Document attributes from Level 3 (WD 9 April 2002)
1536
1537    actualEncoding = None
1538    encoding = None
1539    standalone = None
1540    version = None
1541    strictErrorChecking = False
1542    errorHandler = None
1543    documentURI = None
1544
1545    _magic_id_count = 0
1546
1547    def __init__(self):
1548        self.doctype = None
1549        self.childNodes = NodeList()
1550        # mapping of (namespaceURI, localName) -> ElementInfo
1551        #        and tagName -> ElementInfo
1552        self._elem_info = {}
1553        self._id_cache = {}
1554        self._id_search_stack = None
1555
1556    def _get_elem_info(self, element):
1557        if element.namespaceURI:
1558            key = element.namespaceURI, element.localName
1559        else:
1560            key = element.tagName
1561        return self._elem_info.get(key)
1562
1563    def _get_actualEncoding(self):
1564        return self.actualEncoding
1565
1566    def _get_doctype(self):
1567        return self.doctype
1568
1569    def _get_documentURI(self):
1570        return self.documentURI
1571
1572    def _get_encoding(self):
1573        return self.encoding
1574
1575    def _get_errorHandler(self):
1576        return self.errorHandler
1577
1578    def _get_standalone(self):
1579        return self.standalone
1580
1581    def _get_strictErrorChecking(self):
1582        return self.strictErrorChecking
1583
1584    def _get_version(self):
1585        return self.version
1586
1587    def appendChild(self, node):
1588        if node.nodeType not in self._child_node_types:
1589            raise xml.dom.HierarchyRequestErr(
1590                "%s cannot be child of %s" % (repr(node), repr(self)))
1591        if node.parentNode is not None:
1592            # This needs to be done before the next test since this
1593            # may *be* the document element, in which case it should
1594            # end up re-ordered to the end.
1595            node.parentNode.removeChild(node)
1596
1597        if node.nodeType == Node.ELEMENT_NODE \
1598           and self._get_documentElement():
1599            raise xml.dom.HierarchyRequestErr(
1600                "two document elements disallowed")
1601        return Node.appendChild(self, node)
1602
1603    def removeChild(self, oldChild):
1604        try:
1605            self.childNodes.remove(oldChild)
1606        except ValueError:
1607            raise xml.dom.NotFoundErr()
1608        oldChild.nextSibling = oldChild.previousSibling = None
1609        oldChild.parentNode = None
1610        if self.documentElement is oldChild:
1611            self.documentElement = None
1612
1613        return oldChild
1614
1615    def _get_documentElement(self):
1616        for node in self.childNodes:
1617            if node.nodeType == Node.ELEMENT_NODE:
1618                return node
1619
1620    def unlink(self):
1621        if self.doctype is not None:
1622            self.doctype.unlink()
1623            self.doctype = None
1624        Node.unlink(self)
1625
1626    def cloneNode(self, deep):
1627        if not deep:
1628            return None
1629        clone = self.implementation.createDocument(None, None, None)
1630        clone.encoding = self.encoding
1631        clone.standalone = self.standalone
1632        clone.version = self.version
1633        for n in self.childNodes:
1634            childclone = _clone_node(n, deep, clone)
1635            assert childclone.ownerDocument.isSameNode(clone)
1636            clone.childNodes.append(childclone)
1637            if childclone.nodeType == Node.DOCUMENT_NODE:
1638                assert clone.documentElement is None
1639            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1640                assert clone.doctype is None
1641                clone.doctype = childclone
1642            childclone.parentNode = clone
1643        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1644                                     self, clone)
1645        return clone
1646
1647    def createDocumentFragment(self):
1648        d = DocumentFragment()
1649        d.ownerDocument = self
1650        return d
1651
1652    def createElement(self, tagName):
1653        e = Element(tagName)
1654        e.ownerDocument = self
1655        return e
1656
1657    def createTextNode(self, data):
1658        if not isinstance(data, str):
1659            raise TypeError("node contents must be a string")
1660        t = Text()
1661        t.data = data
1662        t.ownerDocument = self
1663        return t
1664
1665    def createCDATASection(self, data):
1666        if not isinstance(data, str):
1667            raise TypeError("node contents must be a string")
1668        c = CDATASection()
1669        c.data = data
1670        c.ownerDocument = self
1671        return c
1672
1673    def createComment(self, data):
1674        c = Comment(data)
1675        c.ownerDocument = self
1676        return c
1677
1678    def createProcessingInstruction(self, target, data):
1679        p = ProcessingInstruction(target, data)
1680        p.ownerDocument = self
1681        return p
1682
1683    def createAttribute(self, qName):
1684        a = Attr(qName)
1685        a.ownerDocument = self
1686        a.value = ""
1687        return a
1688
1689    def createElementNS(self, namespaceURI, qualifiedName):
1690        prefix, localName = _nssplit(qualifiedName)
1691        e = Element(qualifiedName, namespaceURI, prefix)
1692        e.ownerDocument = self
1693        return e
1694
1695    def createAttributeNS(self, namespaceURI, qualifiedName):
1696        prefix, localName = _nssplit(qualifiedName)
1697        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1698        a.ownerDocument = self
1699        a.value = ""
1700        return a
1701
1702    # A couple of implementation-specific helpers to create node types
1703    # not supported by the W3C DOM specs:
1704
1705    def _create_entity(self, name, publicId, systemId, notationName):
1706        e = Entity(name, publicId, systemId, notationName)
1707        e.ownerDocument = self
1708        return e
1709
1710    def _create_notation(self, name, publicId, systemId):
1711        n = Notation(name, publicId, systemId)
1712        n.ownerDocument = self
1713        return n
1714
1715    def getElementById(self, id):
1716        if id in self._id_cache:
1717            return self._id_cache[id]
1718        if not (self._elem_info or self._magic_id_count):
1719            return None
1720
1721        stack = self._id_search_stack
1722        if stack is None:
1723            # we never searched before, or the cache has been cleared
1724            stack = [self.documentElement]
1725            self._id_search_stack = stack
1726        elif not stack:
1727            # Previous search was completed and cache is still valid;
1728            # no matching node.
1729            return None
1730
1731        result = None
1732        while stack:
1733            node = stack.pop()
1734            # add child elements to stack for continued searching
1735            stack.extend([child for child in node.childNodes
1736                          if child.nodeType in _nodeTypes_with_children])
1737            # check this node
1738            info = self._get_elem_info(node)
1739            if info:
1740                # We have to process all ID attributes before
1741                # returning in order to get all the attributes set to
1742                # be IDs using Element.setIdAttribute*().
1743                for attr in node.attributes.values():
1744                    if attr.namespaceURI:
1745                        if info.isIdNS(attr.namespaceURI, attr.localName):
1746                            self._id_cache[attr.value] = node
1747                            if attr.value == id:
1748                                result = node
1749                            elif not node._magic_id_nodes:
1750                                break
1751                    elif info.isId(attr.name):
1752                        self._id_cache[attr.value] = node
1753                        if attr.value == id:
1754                            result = node
1755                        elif not node._magic_id_nodes:
1756                            break
1757                    elif attr._is_id:
1758                        self._id_cache[attr.value] = node
1759                        if attr.value == id:
1760                            result = node
1761                        elif node._magic_id_nodes == 1:
1762                            break
1763            elif node._magic_id_nodes:
1764                for attr in node.attributes.values():
1765                    if attr._is_id:
1766                        self._id_cache[attr.value] = node
1767                        if attr.value == id:
1768                            result = node
1769            if result is not None:
1770                break
1771        return result
1772
1773    def getElementsByTagName(self, name):
1774        return _get_elements_by_tagName_helper(self, name, NodeList())
1775
1776    def getElementsByTagNameNS(self, namespaceURI, localName):
1777        return _get_elements_by_tagName_ns_helper(
1778            self, namespaceURI, localName, NodeList())
1779
1780    def isSupported(self, feature, version):
1781        return self.implementation.hasFeature(feature, version)
1782
1783    def importNode(self, node, deep):
1784        if node.nodeType == Node.DOCUMENT_NODE:
1785            raise xml.dom.NotSupportedErr("cannot import document nodes")
1786        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1787            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1788        return _clone_node(node, deep, self)
1789
1790    def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
1791        if encoding is None:
1792            writer.write('<?xml version="1.0" ?>'+newl)
1793        else:
1794            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
1795                encoding, newl))
1796        for node in self.childNodes:
1797            node.writexml(writer, indent, addindent, newl)
1798
1799    # DOM Level 3 (WD 9 April 2002)
1800
1801    def renameNode(self, n, namespaceURI, name):
1802        if n.ownerDocument is not self:
1803            raise xml.dom.WrongDocumentErr(
1804                "cannot rename nodes from other documents;\n"
1805                "expected %s,\nfound %s" % (self, n.ownerDocument))
1806        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1807            raise xml.dom.NotSupportedErr(
1808                "renameNode() only applies to element and attribute nodes")
1809        if namespaceURI != EMPTY_NAMESPACE:
1810            if ':' in name:
1811                prefix, localName = name.split(':', 1)
1812                if (  prefix == "xmlns"
1813                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1814                    raise xml.dom.NamespaceErr(
1815                        "illegal use of 'xmlns' prefix")
1816            else:
1817                if (  name == "xmlns"
1818                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1819                      and n.nodeType == Node.ATTRIBUTE_NODE):
1820                    raise xml.dom.NamespaceErr(
1821                        "illegal use of the 'xmlns' attribute")
1822                prefix = None
1823                localName = name
1824        else:
1825            prefix = None
1826            localName = None
1827        if n.nodeType == Node.ATTRIBUTE_NODE:
1828            element = n.ownerElement
1829            if element is not None:
1830                is_id = n._is_id
1831                element.removeAttributeNode(n)
1832        else:
1833            element = None
1834        n.prefix = prefix
1835        n._localName = localName
1836        n.namespaceURI = namespaceURI
1837        n.nodeName = name
1838        if n.nodeType == Node.ELEMENT_NODE:
1839            n.tagName = name
1840        else:
1841            # attribute node
1842            n.name = name
1843            if element is not None:
1844                element.setAttributeNode(n)
1845                if is_id:
1846                    element.setIdAttributeNode(n)
1847        # It's not clear from a semantic perspective whether we should
1848        # call the user data handlers for the NODE_RENAMED event since
1849        # we're re-using the existing node.  The draft spec has been
1850        # interpreted as meaning "no, don't call the handler unless a
1851        # new node is created."
1852        return n
1853
1854defproperty(Document, "documentElement",
1855            doc="Top-level element of this document.")
1856
1857
1858def _clone_node(node, deep, newOwnerDocument):
1859    """
1860    Clone a node and give it the new owner document.
1861    Called by Node.cloneNode and Document.importNode
1862    """
1863    if node.ownerDocument.isSameNode(newOwnerDocument):
1864        operation = xml.dom.UserDataHandler.NODE_CLONED
1865    else:
1866        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1867    if node.nodeType == Node.ELEMENT_NODE:
1868        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1869                                                 node.nodeName)
1870        for attr in node.attributes.values():
1871            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1872            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1873            a.specified = attr.specified
1874
1875        if deep:
1876            for child in node.childNodes:
1877                c = _clone_node(child, deep, newOwnerDocument)
1878                clone.appendChild(c)
1879
1880    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1881        clone = newOwnerDocument.createDocumentFragment()
1882        if deep:
1883            for child in node.childNodes:
1884                c = _clone_node(child, deep, newOwnerDocument)
1885                clone.appendChild(c)
1886
1887    elif node.nodeType == Node.TEXT_NODE:
1888        clone = newOwnerDocument.createTextNode(node.data)
1889    elif node.nodeType == Node.CDATA_SECTION_NODE:
1890        clone = newOwnerDocument.createCDATASection(node.data)
1891    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1892        clone = newOwnerDocument.createProcessingInstruction(node.target,
1893                                                             node.data)
1894    elif node.nodeType == Node.COMMENT_NODE:
1895        clone = newOwnerDocument.createComment(node.data)
1896    elif node.nodeType == Node.ATTRIBUTE_NODE:
1897        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1898                                                   node.nodeName)
1899        clone.specified = True
1900        clone.value = node.value
1901    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1902        assert node.ownerDocument is not newOwnerDocument
1903        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1904        clone = newOwnerDocument.implementation.createDocumentType(
1905            node.name, node.publicId, node.systemId)
1906        clone.ownerDocument = newOwnerDocument
1907        if deep:
1908            clone.entities._seq = []
1909            clone.notations._seq = []
1910            for n in node.notations._seq:
1911                notation = Notation(n.nodeName, n.publicId, n.systemId)
1912                notation.ownerDocument = newOwnerDocument
1913                clone.notations._seq.append(notation)
1914                if hasattr(n, '_call_user_data_handler'):
1915                    n._call_user_data_handler(operation, n, notation)
1916            for e in node.entities._seq:
1917                entity = Entity(e.nodeName, e.publicId, e.systemId,
1918                                e.notationName)
1919                entity.actualEncoding = e.actualEncoding
1920                entity.encoding = e.encoding
1921                entity.version = e.version
1922                entity.ownerDocument = newOwnerDocument
1923                clone.entities._seq.append(entity)
1924                if hasattr(e, '_call_user_data_handler'):
1925                    e._call_user_data_handler(operation, e, entity)
1926    else:
1927        # Note the cloning of Document and DocumentType nodes is
1928        # implementation specific.  minidom handles those cases
1929        # directly in the cloneNode() methods.
1930        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1931
1932    # Check for _call_user_data_handler() since this could conceivably
1933    # used with other DOM implementations (one of the FourThought
1934    # DOMs, perhaps?).
1935    if hasattr(node, '_call_user_data_handler'):
1936        node._call_user_data_handler(operation, node, clone)
1937    return clone
1938
1939
1940def _nssplit(qualifiedName):
1941    fields = qualifiedName.split(':', 1)
1942    if len(fields) == 2:
1943        return fields
1944    else:
1945        return (None, fields[0])
1946
1947
1948def _do_pulldom_parse(func, args, kwargs):
1949    events = func(*args, **kwargs)
1950    toktype, rootNode = events.getEvent()
1951    events.expandNode(rootNode)
1952    events.clear()
1953    return rootNode
1954
1955def parse(file, parser=None, bufsize=None):
1956    """Parse a file into a DOM by filename or file object."""
1957    if parser is None and not bufsize:
1958        from xml.dom import expatbuilder
1959        return expatbuilder.parse(file)
1960    else:
1961        from xml.dom import pulldom
1962        return _do_pulldom_parse(pulldom.parse, (file,),
1963            {'parser': parser, 'bufsize': bufsize})
1964
1965def parseString(string, parser=None):
1966    """Parse a file into a DOM from a string."""
1967    if parser is None:
1968        from xml.dom import expatbuilder
1969        return expatbuilder.parseString(string)
1970    else:
1971        from xml.dom import pulldom
1972        return _do_pulldom_parse(pulldom.parseString, (string,),
1973                                 {'parser': parser})
1974
1975def getDOMImplementation(features=None):
1976    if features:
1977        if isinstance(features, str):
1978            features = domreg._parse_feature_string(features)
1979        for f, v in features:
1980            if not Document.implementation.hasFeature(f, v):
1981                return None
1982    return Document.implementation
1983