1"""Simple implementation of the Level 1 DOM.
2
3Namespaces and other minor Level 2 features are also supported.
4
5parse("foo.xml")
6
7parseString("<foo><bar/></foo>")
8
9Todo:
10=====
11 * convenience methods for getting elements and text.
12 * more testing
13 * bring some of the writer and linearizer code into conformance with this
14        interface
15 * SAX 2 namespaces
16"""
17
18import io
19import xml.dom
20
21from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
22from xml.dom.minicompat import *
23from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
24
25# This is used by the ID-cache invalidation checks; the list isn't
26# actually complete, since the nodes being checked will never be the
27# DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
28# the node being added or removed, not the node being modified.)
29#
30_nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
31                            xml.dom.Node.ENTITY_REFERENCE_NODE)
32
33
34class Node(xml.dom.Node):
35    namespaceURI = None # this is non-null only for elements and attributes
36    parentNode = None
37    ownerDocument = None
38    nextSibling = None
39    previousSibling = None
40
41    prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
42
43    def __bool__(self):
44        return True
45
46    def toxml(self, encoding=None):
47        return self.toprettyxml("", "", encoding)
48
49    def toprettyxml(self, indent="\t", newl="\n", encoding=None):
50        if encoding is None:
51            writer = io.StringIO()
52        else:
53            writer = io.TextIOWrapper(io.BytesIO(),
54                                      encoding=encoding,
55                                      errors="xmlcharrefreplace",
56                                      newline='\n')
57        if self.nodeType == Node.DOCUMENT_NODE:
58            # Can pass encoding only to document, to put it into XML header
59            self.writexml(writer, "", indent, newl, encoding)
60        else:
61            self.writexml(writer, "", indent, newl)
62        if encoding is None:
63            return writer.getvalue()
64        else:
65            return writer.detach().getvalue()
66
67    def hasChildNodes(self):
68        return bool(self.childNodes)
69
70    def _get_childNodes(self):
71        return self.childNodes
72
73    def _get_firstChild(self):
74        if self.childNodes:
75            return self.childNodes[0]
76
77    def _get_lastChild(self):
78        if self.childNodes:
79            return self.childNodes[-1]
80
81    def insertBefore(self, newChild, refChild):
82        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
83            for c in tuple(newChild.childNodes):
84                self.insertBefore(c, refChild)
85            ### The DOM does not clearly specify what to return in this case
86            return newChild
87        if newChild.nodeType not in self._child_node_types:
88            raise xml.dom.HierarchyRequestErr(
89                "%s cannot be child of %s" % (repr(newChild), repr(self)))
90        if newChild.parentNode is not None:
91            newChild.parentNode.removeChild(newChild)
92        if refChild is None:
93            self.appendChild(newChild)
94        else:
95            try:
96                index = self.childNodes.index(refChild)
97            except ValueError:
98                raise xml.dom.NotFoundErr()
99            if newChild.nodeType in _nodeTypes_with_children:
100                _clear_id_cache(self)
101            self.childNodes.insert(index, newChild)
102            newChild.nextSibling = refChild
103            refChild.previousSibling = newChild
104            if index:
105                node = self.childNodes[index-1]
106                node.nextSibling = newChild
107                newChild.previousSibling = node
108            else:
109                newChild.previousSibling = None
110            newChild.parentNode = self
111        return newChild
112
113    def appendChild(self, node):
114        if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
115            for c in tuple(node.childNodes):
116                self.appendChild(c)
117            ### The DOM does not clearly specify what to return in this case
118            return node
119        if node.nodeType not in self._child_node_types:
120            raise xml.dom.HierarchyRequestErr(
121                "%s cannot be child of %s" % (repr(node), repr(self)))
122        elif node.nodeType in _nodeTypes_with_children:
123            _clear_id_cache(self)
124        if node.parentNode is not None:
125            node.parentNode.removeChild(node)
126        _append_child(self, node)
127        node.nextSibling = None
128        return node
129
130    def replaceChild(self, newChild, oldChild):
131        if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
132            refChild = oldChild.nextSibling
133            self.removeChild(oldChild)
134            return self.insertBefore(newChild, refChild)
135        if newChild.nodeType not in self._child_node_types:
136            raise xml.dom.HierarchyRequestErr(
137                "%s cannot be child of %s" % (repr(newChild), repr(self)))
138        if newChild is oldChild:
139            return
140        if newChild.parentNode is not None:
141            newChild.parentNode.removeChild(newChild)
142        try:
143            index = self.childNodes.index(oldChild)
144        except ValueError:
145            raise xml.dom.NotFoundErr()
146        self.childNodes[index] = newChild
147        newChild.parentNode = self
148        oldChild.parentNode = None
149        if (newChild.nodeType in _nodeTypes_with_children
150            or oldChild.nodeType in _nodeTypes_with_children):
151            _clear_id_cache(self)
152        newChild.nextSibling = oldChild.nextSibling
153        newChild.previousSibling = oldChild.previousSibling
154        oldChild.nextSibling = None
155        oldChild.previousSibling = None
156        if newChild.previousSibling:
157            newChild.previousSibling.nextSibling = newChild
158        if newChild.nextSibling:
159            newChild.nextSibling.previousSibling = newChild
160        return oldChild
161
162    def removeChild(self, oldChild):
163        try:
164            self.childNodes.remove(oldChild)
165        except ValueError:
166            raise xml.dom.NotFoundErr()
167        if oldChild.nextSibling is not None:
168            oldChild.nextSibling.previousSibling = oldChild.previousSibling
169        if oldChild.previousSibling is not None:
170            oldChild.previousSibling.nextSibling = oldChild.nextSibling
171        oldChild.nextSibling = oldChild.previousSibling = None
172        if oldChild.nodeType in _nodeTypes_with_children:
173            _clear_id_cache(self)
174
175        oldChild.parentNode = None
176        return oldChild
177
178    def normalize(self):
179        L = []
180        for child in self.childNodes:
181            if child.nodeType == Node.TEXT_NODE:
182                if not child.data:
183                    # empty text node; discard
184                    if L:
185                        L[-1].nextSibling = child.nextSibling
186                    if child.nextSibling:
187                        child.nextSibling.previousSibling = child.previousSibling
188                    child.unlink()
189                elif L and L[-1].nodeType == child.nodeType:
190                    # collapse text node
191                    node = L[-1]
192                    node.data = node.data + child.data
193                    node.nextSibling = child.nextSibling
194                    if child.nextSibling:
195                        child.nextSibling.previousSibling = node
196                    child.unlink()
197                else:
198                    L.append(child)
199            else:
200                L.append(child)
201                if child.nodeType == Node.ELEMENT_NODE:
202                    child.normalize()
203        self.childNodes[:] = L
204
205    def cloneNode(self, deep):
206        return _clone_node(self, deep, self.ownerDocument or self)
207
208    def isSupported(self, feature, version):
209        return self.ownerDocument.implementation.hasFeature(feature, version)
210
211    def _get_localName(self):
212        # Overridden in Element and Attr where localName can be Non-Null
213        return None
214
215    # Node interfaces from Level 3 (WD 9 April 2002)
216
217    def isSameNode(self, other):
218        return self is other
219
220    def getInterface(self, feature):
221        if self.isSupported(feature, None):
222            return self
223        else:
224            return None
225
226    # The "user data" functions use a dictionary that is only present
227    # if some user data has been set, so be careful not to assume it
228    # exists.
229
230    def getUserData(self, key):
231        try:
232            return self._user_data[key][0]
233        except (AttributeError, KeyError):
234            return None
235
236    def setUserData(self, key, data, handler):
237        old = None
238        try:
239            d = self._user_data
240        except AttributeError:
241            d = {}
242            self._user_data = d
243        if key in d:
244            old = d[key][0]
245        if data is None:
246            # ignore handlers passed for None
247            handler = None
248            if old is not None:
249                del d[key]
250        else:
251            d[key] = (data, handler)
252        return old
253
254    def _call_user_data_handler(self, operation, src, dst):
255        if hasattr(self, "_user_data"):
256            for key, (data, handler) in list(self._user_data.items()):
257                if handler is not None:
258                    handler.handle(operation, key, data, src, dst)
259
260    # minidom-specific API:
261
262    def unlink(self):
263        self.parentNode = self.ownerDocument = None
264        if self.childNodes:
265            for child in self.childNodes:
266                child.unlink()
267            self.childNodes = NodeList()
268        self.previousSibling = None
269        self.nextSibling = None
270
271    # A Node is its own context manager, to ensure that an unlink() call occurs.
272    # This is similar to how a file object works.
273    def __enter__(self):
274        return self
275
276    def __exit__(self, et, ev, tb):
277        self.unlink()
278
279defproperty(Node, "firstChild", doc="First child node, or None.")
280defproperty(Node, "lastChild",  doc="Last child node, or None.")
281defproperty(Node, "localName",  doc="Namespace-local name of this node.")
282
283
284def _append_child(self, node):
285    # fast path with less checks; usable by DOM builders if careful
286    childNodes = self.childNodes
287    if childNodes:
288        last = childNodes[-1]
289        node.previousSibling = last
290        last.nextSibling = node
291    childNodes.append(node)
292    node.parentNode = self
293
294def _in_document(node):
295    # return True iff node is part of a document tree
296    while node is not None:
297        if node.nodeType == Node.DOCUMENT_NODE:
298            return True
299        node = node.parentNode
300    return False
301
302def _write_data(writer, data):
303    "Writes datachars to writer."
304    if data:
305        data = data.replace("&", "&amp;").replace("<", "&lt;"). \
306                    replace("\"", "&quot;").replace(">", "&gt;")
307        writer.write(data)
308
309def _get_elements_by_tagName_helper(parent, name, rc):
310    for node in parent.childNodes:
311        if node.nodeType == Node.ELEMENT_NODE and \
312            (name == "*" or node.tagName == name):
313            rc.append(node)
314        _get_elements_by_tagName_helper(node, name, rc)
315    return rc
316
317def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
318    for node in parent.childNodes:
319        if node.nodeType == Node.ELEMENT_NODE:
320            if ((localName == "*" or node.localName == localName) and
321                (nsURI == "*" or node.namespaceURI == nsURI)):
322                rc.append(node)
323            _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
324    return rc
325
326class DocumentFragment(Node):
327    nodeType = Node.DOCUMENT_FRAGMENT_NODE
328    nodeName = "#document-fragment"
329    nodeValue = None
330    attributes = None
331    parentNode = None
332    _child_node_types = (Node.ELEMENT_NODE,
333                         Node.TEXT_NODE,
334                         Node.CDATA_SECTION_NODE,
335                         Node.ENTITY_REFERENCE_NODE,
336                         Node.PROCESSING_INSTRUCTION_NODE,
337                         Node.COMMENT_NODE,
338                         Node.NOTATION_NODE)
339
340    def __init__(self):
341        self.childNodes = NodeList()
342
343
344class Attr(Node):
345    __slots__=('_name', '_value', 'namespaceURI',
346               '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
347    nodeType = Node.ATTRIBUTE_NODE
348    attributes = None
349    specified = False
350    _is_id = False
351
352    _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
353
354    def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
355                 prefix=None):
356        self.ownerElement = None
357        self._name = qName
358        self.namespaceURI = namespaceURI
359        self._prefix = prefix
360        self.childNodes = NodeList()
361
362        # Add the single child node that represents the value of the attr
363        self.childNodes.append(Text())
364
365        # nodeValue and value are set elsewhere
366
367    def _get_localName(self):
368        try:
369            return self._localName
370        except AttributeError:
371            return self.nodeName.split(":", 1)[-1]
372
373    def _get_specified(self):
374        return self.specified
375
376    def _get_name(self):
377        return self._name
378
379    def _set_name(self, value):
380        self._name = value
381        if self.ownerElement is not None:
382            _clear_id_cache(self.ownerElement)
383
384    nodeName = name = property(_get_name, _set_name)
385
386    def _get_value(self):
387        return self._value
388
389    def _set_value(self, value):
390        self._value = value
391        self.childNodes[0].data = value
392        if self.ownerElement is not None:
393            _clear_id_cache(self.ownerElement)
394        self.childNodes[0].data = value
395
396    nodeValue = value = property(_get_value, _set_value)
397
398    def _get_prefix(self):
399        return self._prefix
400
401    def _set_prefix(self, prefix):
402        nsuri = self.namespaceURI
403        if prefix == "xmlns":
404            if nsuri and nsuri != XMLNS_NAMESPACE:
405                raise xml.dom.NamespaceErr(
406                    "illegal use of 'xmlns' prefix for the wrong namespace")
407        self._prefix = prefix
408        if prefix is None:
409            newName = self.localName
410        else:
411            newName = "%s:%s" % (prefix, self.localName)
412        if self.ownerElement:
413            _clear_id_cache(self.ownerElement)
414        self.name = newName
415
416    prefix = property(_get_prefix, _set_prefix)
417
418    def unlink(self):
419        # This implementation does not call the base implementation
420        # since most of that is not needed, and the expense of the
421        # method call is not warranted.  We duplicate the removal of
422        # children, but that's all we needed from the base class.
423        elem = self.ownerElement
424        if elem is not None:
425            del elem._attrs[self.nodeName]
426            del elem._attrsNS[(self.namespaceURI, self.localName)]
427            if self._is_id:
428                self._is_id = False
429                elem._magic_id_nodes -= 1
430                self.ownerDocument._magic_id_count -= 1
431        for child in self.childNodes:
432            child.unlink()
433        del self.childNodes[:]
434
435    def _get_isId(self):
436        if self._is_id:
437            return True
438        doc = self.ownerDocument
439        elem = self.ownerElement
440        if doc is None or elem is None:
441            return False
442
443        info = doc._get_elem_info(elem)
444        if info is None:
445            return False
446        if self.namespaceURI:
447            return info.isIdNS(self.namespaceURI, self.localName)
448        else:
449            return info.isId(self.nodeName)
450
451    def _get_schemaType(self):
452        doc = self.ownerDocument
453        elem = self.ownerElement
454        if doc is None or elem is None:
455            return _no_type
456
457        info = doc._get_elem_info(elem)
458        if info is None:
459            return _no_type
460        if self.namespaceURI:
461            return info.getAttributeTypeNS(self.namespaceURI, self.localName)
462        else:
463            return info.getAttributeType(self.nodeName)
464
465defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
466defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
467defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
468
469
470class NamedNodeMap(object):
471    """The attribute list is a transient interface to the underlying
472    dictionaries.  Mutations here will change the underlying element's
473    dictionary.
474
475    Ordering is imposed artificially and does not reflect the order of
476    attributes as found in an input document.
477    """
478
479    __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
480
481    def __init__(self, attrs, attrsNS, ownerElement):
482        self._attrs = attrs
483        self._attrsNS = attrsNS
484        self._ownerElement = ownerElement
485
486    def _get_length(self):
487        return len(self._attrs)
488
489    def item(self, index):
490        try:
491            return self[list(self._attrs.keys())[index]]
492        except IndexError:
493            return None
494
495    def items(self):
496        L = []
497        for node in self._attrs.values():
498            L.append((node.nodeName, node.value))
499        return L
500
501    def itemsNS(self):
502        L = []
503        for node in self._attrs.values():
504            L.append(((node.namespaceURI, node.localName), node.value))
505        return L
506
507    def __contains__(self, key):
508        if isinstance(key, str):
509            return key in self._attrs
510        else:
511            return key in self._attrsNS
512
513    def keys(self):
514        return self._attrs.keys()
515
516    def keysNS(self):
517        return self._attrsNS.keys()
518
519    def values(self):
520        return self._attrs.values()
521
522    def get(self, name, value=None):
523        return self._attrs.get(name, value)
524
525    __len__ = _get_length
526
527    def _cmp(self, other):
528        if self._attrs is getattr(other, "_attrs", None):
529            return 0
530        else:
531            return (id(self) > id(other)) - (id(self) < id(other))
532
533    def __eq__(self, other):
534        return self._cmp(other) == 0
535
536    def __ge__(self, other):
537        return self._cmp(other) >= 0
538
539    def __gt__(self, other):
540        return self._cmp(other) > 0
541
542    def __le__(self, other):
543        return self._cmp(other) <= 0
544
545    def __lt__(self, other):
546        return self._cmp(other) < 0
547
548    def __getitem__(self, attname_or_tuple):
549        if isinstance(attname_or_tuple, tuple):
550            return self._attrsNS[attname_or_tuple]
551        else:
552            return self._attrs[attname_or_tuple]
553
554    # same as set
555    def __setitem__(self, attname, value):
556        if isinstance(value, str):
557            try:
558                node = self._attrs[attname]
559            except KeyError:
560                node = Attr(attname)
561                node.ownerDocument = self._ownerElement.ownerDocument
562                self.setNamedItem(node)
563            node.value = value
564        else:
565            if not isinstance(value, Attr):
566                raise TypeError("value must be a string or Attr object")
567            node = value
568            self.setNamedItem(node)
569
570    def getNamedItem(self, name):
571        try:
572            return self._attrs[name]
573        except KeyError:
574            return None
575
576    def getNamedItemNS(self, namespaceURI, localName):
577        try:
578            return self._attrsNS[(namespaceURI, localName)]
579        except KeyError:
580            return None
581
582    def removeNamedItem(self, name):
583        n = self.getNamedItem(name)
584        if n is not None:
585            _clear_id_cache(self._ownerElement)
586            del self._attrs[n.nodeName]
587            del self._attrsNS[(n.namespaceURI, n.localName)]
588            if hasattr(n, 'ownerElement'):
589                n.ownerElement = None
590            return n
591        else:
592            raise xml.dom.NotFoundErr()
593
594    def removeNamedItemNS(self, namespaceURI, localName):
595        n = self.getNamedItemNS(namespaceURI, localName)
596        if n is not None:
597            _clear_id_cache(self._ownerElement)
598            del self._attrsNS[(n.namespaceURI, n.localName)]
599            del self._attrs[n.nodeName]
600            if hasattr(n, 'ownerElement'):
601                n.ownerElement = None
602            return n
603        else:
604            raise xml.dom.NotFoundErr()
605
606    def setNamedItem(self, node):
607        if not isinstance(node, Attr):
608            raise xml.dom.HierarchyRequestErr(
609                "%s cannot be child of %s" % (repr(node), repr(self)))
610        old = self._attrs.get(node.name)
611        if old:
612            old.unlink()
613        self._attrs[node.name] = node
614        self._attrsNS[(node.namespaceURI, node.localName)] = node
615        node.ownerElement = self._ownerElement
616        _clear_id_cache(node.ownerElement)
617        return old
618
619    def setNamedItemNS(self, node):
620        return self.setNamedItem(node)
621
622    def __delitem__(self, attname_or_tuple):
623        node = self[attname_or_tuple]
624        _clear_id_cache(node.ownerElement)
625        node.unlink()
626
627    def __getstate__(self):
628        return self._attrs, self._attrsNS, self._ownerElement
629
630    def __setstate__(self, state):
631        self._attrs, self._attrsNS, self._ownerElement = state
632
633defproperty(NamedNodeMap, "length",
634            doc="Number of nodes in the NamedNodeMap.")
635
636AttributeList = NamedNodeMap
637
638
639class TypeInfo(object):
640    __slots__ = 'namespace', 'name'
641
642    def __init__(self, namespace, name):
643        self.namespace = namespace
644        self.name = name
645
646    def __repr__(self):
647        if self.namespace:
648            return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
649                                          self.namespace)
650        else:
651            return "<%s %r>" % (self.__class__.__name__, self.name)
652
653    def _get_name(self):
654        return self.name
655
656    def _get_namespace(self):
657        return self.namespace
658
659_no_type = TypeInfo(None, None)
660
661class Element(Node):
662    __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
663               'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
664               'nextSibling', 'previousSibling')
665    nodeType = Node.ELEMENT_NODE
666    nodeValue = None
667    schemaType = _no_type
668
669    _magic_id_nodes = 0
670
671    _child_node_types = (Node.ELEMENT_NODE,
672                         Node.PROCESSING_INSTRUCTION_NODE,
673                         Node.COMMENT_NODE,
674                         Node.TEXT_NODE,
675                         Node.CDATA_SECTION_NODE,
676                         Node.ENTITY_REFERENCE_NODE)
677
678    def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
679                 localName=None):
680        self.parentNode = None
681        self.tagName = self.nodeName = tagName
682        self.prefix = prefix
683        self.namespaceURI = namespaceURI
684        self.childNodes = NodeList()
685        self.nextSibling = self.previousSibling = None
686
687        # Attribute dictionaries are lazily created
688        # attributes are double-indexed:
689        #    tagName -> Attribute
690        #    URI,localName -> Attribute
691        # in the future: consider lazy generation
692        # of attribute objects this is too tricky
693        # for now because of headaches with
694        # namespaces.
695        self._attrs = None
696        self._attrsNS = None
697
698    def _ensure_attributes(self):
699        if self._attrs is None:
700            self._attrs = {}
701            self._attrsNS = {}
702
703    def _get_localName(self):
704        try:
705            return self._localName
706        except AttributeError:
707            return self.tagName.split(":", 1)[-1]
708
709    def _get_tagName(self):
710        return self.tagName
711
712    def unlink(self):
713        if self._attrs is not None:
714            for attr in list(self._attrs.values()):
715                attr.unlink()
716        self._attrs = None
717        self._attrsNS = None
718        Node.unlink(self)
719
720    def getAttribute(self, attname):
721        if self._attrs is None:
722            return ""
723        try:
724            return self._attrs[attname].value
725        except KeyError:
726            return ""
727
728    def getAttributeNS(self, namespaceURI, localName):
729        if self._attrsNS is None:
730            return ""
731        try:
732            return self._attrsNS[(namespaceURI, localName)].value
733        except KeyError:
734            return ""
735
736    def setAttribute(self, attname, value):
737        attr = self.getAttributeNode(attname)
738        if attr is None:
739            attr = Attr(attname)
740            attr.value = value # also sets nodeValue
741            attr.ownerDocument = self.ownerDocument
742            self.setAttributeNode(attr)
743        elif value != attr.value:
744            attr.value = value
745            if attr.isId:
746                _clear_id_cache(self)
747
748    def setAttributeNS(self, namespaceURI, qualifiedName, value):
749        prefix, localname = _nssplit(qualifiedName)
750        attr = self.getAttributeNodeNS(namespaceURI, localname)
751        if attr is None:
752            attr = Attr(qualifiedName, namespaceURI, localname, prefix)
753            attr.value = value
754            attr.ownerDocument = self.ownerDocument
755            self.setAttributeNode(attr)
756        else:
757            if value != attr.value:
758                attr.value = value
759                if attr.isId:
760                    _clear_id_cache(self)
761            if attr.prefix != prefix:
762                attr.prefix = prefix
763                attr.nodeName = qualifiedName
764
765    def getAttributeNode(self, attrname):
766        if self._attrs is None:
767            return None
768        return self._attrs.get(attrname)
769
770    def getAttributeNodeNS(self, namespaceURI, localName):
771        if self._attrsNS is None:
772            return None
773        return self._attrsNS.get((namespaceURI, localName))
774
775    def setAttributeNode(self, attr):
776        if attr.ownerElement not in (None, self):
777            raise xml.dom.InuseAttributeErr("attribute node already owned")
778        self._ensure_attributes()
779        old1 = self._attrs.get(attr.name, None)
780        if old1 is not None:
781            self.removeAttributeNode(old1)
782        old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
783        if old2 is not None and old2 is not old1:
784            self.removeAttributeNode(old2)
785        _set_attribute_node(self, attr)
786
787        if old1 is not attr:
788            # It might have already been part of this node, in which case
789            # it doesn't represent a change, and should not be returned.
790            return old1
791        if old2 is not attr:
792            return old2
793
794    setAttributeNodeNS = setAttributeNode
795
796    def removeAttribute(self, name):
797        if self._attrsNS is None:
798            raise xml.dom.NotFoundErr()
799        try:
800            attr = self._attrs[name]
801        except KeyError:
802            raise xml.dom.NotFoundErr()
803        self.removeAttributeNode(attr)
804
805    def removeAttributeNS(self, namespaceURI, localName):
806        if self._attrsNS is None:
807            raise xml.dom.NotFoundErr()
808        try:
809            attr = self._attrsNS[(namespaceURI, localName)]
810        except KeyError:
811            raise xml.dom.NotFoundErr()
812        self.removeAttributeNode(attr)
813
814    def removeAttributeNode(self, node):
815        if node is None:
816            raise xml.dom.NotFoundErr()
817        try:
818            self._attrs[node.name]
819        except KeyError:
820            raise xml.dom.NotFoundErr()
821        _clear_id_cache(self)
822        node.unlink()
823        # Restore this since the node is still useful and otherwise
824        # unlinked
825        node.ownerDocument = self.ownerDocument
826
827    removeAttributeNodeNS = removeAttributeNode
828
829    def hasAttribute(self, name):
830        if self._attrs is None:
831            return False
832        return name in self._attrs
833
834    def hasAttributeNS(self, namespaceURI, localName):
835        if self._attrsNS is None:
836            return False
837        return (namespaceURI, localName) in self._attrsNS
838
839    def getElementsByTagName(self, name):
840        return _get_elements_by_tagName_helper(self, name, NodeList())
841
842    def getElementsByTagNameNS(self, namespaceURI, localName):
843        return _get_elements_by_tagName_ns_helper(
844            self, namespaceURI, localName, NodeList())
845
846    def __repr__(self):
847        return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
848
849    def writexml(self, writer, indent="", addindent="", newl=""):
850        # indent = current indentation
851        # addindent = indentation to add to higher levels
852        # newl = newline string
853        writer.write(indent+"<" + self.tagName)
854
855        attrs = self._get_attributes()
856        a_names = sorted(attrs.keys())
857
858        for a_name in a_names:
859            writer.write(" %s=\"" % a_name)
860            _write_data(writer, attrs[a_name].value)
861            writer.write("\"")
862        if self.childNodes:
863            writer.write(">")
864            if (len(self.childNodes) == 1 and
865                self.childNodes[0].nodeType == Node.TEXT_NODE):
866                self.childNodes[0].writexml(writer, '', '', '')
867            else:
868                writer.write(newl)
869                for node in self.childNodes:
870                    node.writexml(writer, indent+addindent, addindent, newl)
871                writer.write(indent)
872            writer.write("</%s>%s" % (self.tagName, newl))
873        else:
874            writer.write("/>%s"%(newl))
875
876    def _get_attributes(self):
877        self._ensure_attributes()
878        return NamedNodeMap(self._attrs, self._attrsNS, self)
879
880    def hasAttributes(self):
881        if self._attrs:
882            return True
883        else:
884            return False
885
886    # DOM Level 3 attributes, based on the 22 Oct 2002 draft
887
888    def setIdAttribute(self, name):
889        idAttr = self.getAttributeNode(name)
890        self.setIdAttributeNode(idAttr)
891
892    def setIdAttributeNS(self, namespaceURI, localName):
893        idAttr = self.getAttributeNodeNS(namespaceURI, localName)
894        self.setIdAttributeNode(idAttr)
895
896    def setIdAttributeNode(self, idAttr):
897        if idAttr is None or not self.isSameNode(idAttr.ownerElement):
898            raise xml.dom.NotFoundErr()
899        if _get_containing_entref(self) is not None:
900            raise xml.dom.NoModificationAllowedErr()
901        if not idAttr._is_id:
902            idAttr._is_id = True
903            self._magic_id_nodes += 1
904            self.ownerDocument._magic_id_count += 1
905            _clear_id_cache(self)
906
907defproperty(Element, "attributes",
908            doc="NamedNodeMap of attributes on the element.")
909defproperty(Element, "localName",
910            doc="Namespace-local name of this element.")
911
912
913def _set_attribute_node(element, attr):
914    _clear_id_cache(element)
915    element._ensure_attributes()
916    element._attrs[attr.name] = attr
917    element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
918
919    # This creates a circular reference, but Element.unlink()
920    # breaks the cycle since the references to the attribute
921    # dictionaries are tossed.
922    attr.ownerElement = element
923
924class Childless:
925    """Mixin that makes childless-ness easy to implement and avoids
926    the complexity of the Node methods that deal with children.
927    """
928    __slots__ = ()
929
930    attributes = None
931    childNodes = EmptyNodeList()
932    firstChild = None
933    lastChild = None
934
935    def _get_firstChild(self):
936        return None
937
938    def _get_lastChild(self):
939        return None
940
941    def appendChild(self, node):
942        raise xml.dom.HierarchyRequestErr(
943            self.nodeName + " nodes cannot have children")
944
945    def hasChildNodes(self):
946        return False
947
948    def insertBefore(self, newChild, refChild):
949        raise xml.dom.HierarchyRequestErr(
950            self.nodeName + " nodes do not have children")
951
952    def removeChild(self, oldChild):
953        raise xml.dom.NotFoundErr(
954            self.nodeName + " nodes do not have children")
955
956    def normalize(self):
957        # For childless nodes, normalize() has nothing to do.
958        pass
959
960    def replaceChild(self, newChild, oldChild):
961        raise xml.dom.HierarchyRequestErr(
962            self.nodeName + " nodes do not have children")
963
964
965class ProcessingInstruction(Childless, Node):
966    nodeType = Node.PROCESSING_INSTRUCTION_NODE
967    __slots__ = ('target', 'data')
968
969    def __init__(self, target, data):
970        self.target = target
971        self.data = data
972
973    # nodeValue is an alias for data
974    def _get_nodeValue(self):
975        return self.data
976    def _set_nodeValue(self, value):
977        self.data = value
978    nodeValue = property(_get_nodeValue, _set_nodeValue)
979
980    # nodeName is an alias for target
981    def _get_nodeName(self):
982        return self.target
983    def _set_nodeName(self, value):
984        self.target = value
985    nodeName = property(_get_nodeName, _set_nodeName)
986
987    def writexml(self, writer, indent="", addindent="", newl=""):
988        writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
989
990
991class CharacterData(Childless, Node):
992    __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
993
994    def __init__(self):
995        self.ownerDocument = self.parentNode = None
996        self.previousSibling = self.nextSibling = None
997        self._data = ''
998        Node.__init__(self)
999
1000    def _get_length(self):
1001        return len(self.data)
1002    __len__ = _get_length
1003
1004    def _get_data(self):
1005        return self._data
1006    def _set_data(self, data):
1007        self._data = data
1008
1009    data = nodeValue = property(_get_data, _set_data)
1010
1011    def __repr__(self):
1012        data = self.data
1013        if len(data) > 10:
1014            dotdotdot = "..."
1015        else:
1016            dotdotdot = ""
1017        return '<DOM %s node "%r%s">' % (
1018            self.__class__.__name__, data[0:10], dotdotdot)
1019
1020    def substringData(self, offset, count):
1021        if offset < 0:
1022            raise xml.dom.IndexSizeErr("offset cannot be negative")
1023        if offset >= len(self.data):
1024            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1025        if count < 0:
1026            raise xml.dom.IndexSizeErr("count cannot be negative")
1027        return self.data[offset:offset+count]
1028
1029    def appendData(self, arg):
1030        self.data = self.data + arg
1031
1032    def insertData(self, offset, arg):
1033        if offset < 0:
1034            raise xml.dom.IndexSizeErr("offset cannot be negative")
1035        if offset >= len(self.data):
1036            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1037        if arg:
1038            self.data = "%s%s%s" % (
1039                self.data[:offset], arg, self.data[offset:])
1040
1041    def deleteData(self, offset, count):
1042        if offset < 0:
1043            raise xml.dom.IndexSizeErr("offset cannot be negative")
1044        if offset >= len(self.data):
1045            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1046        if count < 0:
1047            raise xml.dom.IndexSizeErr("count cannot be negative")
1048        if count:
1049            self.data = self.data[:offset] + self.data[offset+count:]
1050
1051    def replaceData(self, offset, count, arg):
1052        if offset < 0:
1053            raise xml.dom.IndexSizeErr("offset cannot be negative")
1054        if offset >= len(self.data):
1055            raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
1056        if count < 0:
1057            raise xml.dom.IndexSizeErr("count cannot be negative")
1058        if count:
1059            self.data = "%s%s%s" % (
1060                self.data[:offset], arg, self.data[offset+count:])
1061
1062defproperty(CharacterData, "length", doc="Length of the string data.")
1063
1064
1065class Text(CharacterData):
1066    __slots__ = ()
1067
1068    nodeType = Node.TEXT_NODE
1069    nodeName = "#text"
1070    attributes = None
1071
1072    def splitText(self, offset):
1073        if offset < 0 or offset > len(self.data):
1074            raise xml.dom.IndexSizeErr("illegal offset value")
1075        newText = self.__class__()
1076        newText.data = self.data[offset:]
1077        newText.ownerDocument = self.ownerDocument
1078        next = self.nextSibling
1079        if self.parentNode and self in self.parentNode.childNodes:
1080            if next is None:
1081                self.parentNode.appendChild(newText)
1082            else:
1083                self.parentNode.insertBefore(newText, next)
1084        self.data = self.data[:offset]
1085        return newText
1086
1087    def writexml(self, writer, indent="", addindent="", newl=""):
1088        _write_data(writer, "%s%s%s" % (indent, self.data, newl))
1089
1090    # DOM Level 3 (WD 9 April 2002)
1091
1092    def _get_wholeText(self):
1093        L = [self.data]
1094        n = self.previousSibling
1095        while n is not None:
1096            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1097                L.insert(0, n.data)
1098                n = n.previousSibling
1099            else:
1100                break
1101        n = self.nextSibling
1102        while n is not None:
1103            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1104                L.append(n.data)
1105                n = n.nextSibling
1106            else:
1107                break
1108        return ''.join(L)
1109
1110    def replaceWholeText(self, content):
1111        # XXX This needs to be seriously changed if minidom ever
1112        # supports EntityReference nodes.
1113        parent = self.parentNode
1114        n = self.previousSibling
1115        while n is not None:
1116            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1117                next = n.previousSibling
1118                parent.removeChild(n)
1119                n = next
1120            else:
1121                break
1122        n = self.nextSibling
1123        if not content:
1124            parent.removeChild(self)
1125        while n is not None:
1126            if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
1127                next = n.nextSibling
1128                parent.removeChild(n)
1129                n = next
1130            else:
1131                break
1132        if content:
1133            self.data = content
1134            return self
1135        else:
1136            return None
1137
1138    def _get_isWhitespaceInElementContent(self):
1139        if self.data.strip():
1140            return False
1141        elem = _get_containing_element(self)
1142        if elem is None:
1143            return False
1144        info = self.ownerDocument._get_elem_info(elem)
1145        if info is None:
1146            return False
1147        else:
1148            return info.isElementContent()
1149
1150defproperty(Text, "isWhitespaceInElementContent",
1151            doc="True iff this text node contains only whitespace"
1152                " and is in element content.")
1153defproperty(Text, "wholeText",
1154            doc="The text of all logically-adjacent text nodes.")
1155
1156
1157def _get_containing_element(node):
1158    c = node.parentNode
1159    while c is not None:
1160        if c.nodeType == Node.ELEMENT_NODE:
1161            return c
1162        c = c.parentNode
1163    return None
1164
1165def _get_containing_entref(node):
1166    c = node.parentNode
1167    while c is not None:
1168        if c.nodeType == Node.ENTITY_REFERENCE_NODE:
1169            return c
1170        c = c.parentNode
1171    return None
1172
1173
1174class Comment(CharacterData):
1175    nodeType = Node.COMMENT_NODE
1176    nodeName = "#comment"
1177
1178    def __init__(self, data):
1179        CharacterData.__init__(self)
1180        self._data = data
1181
1182    def writexml(self, writer, indent="", addindent="", newl=""):
1183        if "--" in self.data:
1184            raise ValueError("'--' is not allowed in a comment node")
1185        writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
1186
1187
1188class CDATASection(Text):
1189    __slots__ = ()
1190
1191    nodeType = Node.CDATA_SECTION_NODE
1192    nodeName = "#cdata-section"
1193
1194    def writexml(self, writer, indent="", addindent="", newl=""):
1195        if self.data.find("]]>") >= 0:
1196            raise ValueError("']]>' not allowed in a CDATA section")
1197        writer.write("<![CDATA[%s]]>" % self.data)
1198
1199
1200class ReadOnlySequentialNamedNodeMap(object):
1201    __slots__ = '_seq',
1202
1203    def __init__(self, seq=()):
1204        # seq should be a list or tuple
1205        self._seq = seq
1206
1207    def __len__(self):
1208        return len(self._seq)
1209
1210    def _get_length(self):
1211        return len(self._seq)
1212
1213    def getNamedItem(self, name):
1214        for n in self._seq:
1215            if n.nodeName == name:
1216                return n
1217
1218    def getNamedItemNS(self, namespaceURI, localName):
1219        for n in self._seq:
1220            if n.namespaceURI == namespaceURI and n.localName == localName:
1221                return n
1222
1223    def __getitem__(self, name_or_tuple):
1224        if isinstance(name_or_tuple, tuple):
1225            node = self.getNamedItemNS(*name_or_tuple)
1226        else:
1227            node = self.getNamedItem(name_or_tuple)
1228        if node is None:
1229            raise KeyError(name_or_tuple)
1230        return node
1231
1232    def item(self, index):
1233        if index < 0:
1234            return None
1235        try:
1236            return self._seq[index]
1237        except IndexError:
1238            return None
1239
1240    def removeNamedItem(self, name):
1241        raise xml.dom.NoModificationAllowedErr(
1242            "NamedNodeMap instance is read-only")
1243
1244    def removeNamedItemNS(self, namespaceURI, localName):
1245        raise xml.dom.NoModificationAllowedErr(
1246            "NamedNodeMap instance is read-only")
1247
1248    def setNamedItem(self, node):
1249        raise xml.dom.NoModificationAllowedErr(
1250            "NamedNodeMap instance is read-only")
1251
1252    def setNamedItemNS(self, node):
1253        raise xml.dom.NoModificationAllowedErr(
1254            "NamedNodeMap instance is read-only")
1255
1256    def __getstate__(self):
1257        return [self._seq]
1258
1259    def __setstate__(self, state):
1260        self._seq = state[0]
1261
1262defproperty(ReadOnlySequentialNamedNodeMap, "length",
1263            doc="Number of entries in the NamedNodeMap.")
1264
1265
1266class Identified:
1267    """Mix-in class that supports the publicId and systemId attributes."""
1268
1269    __slots__ = 'publicId', 'systemId'
1270
1271    def _identified_mixin_init(self, publicId, systemId):
1272        self.publicId = publicId
1273        self.systemId = systemId
1274
1275    def _get_publicId(self):
1276        return self.publicId
1277
1278    def _get_systemId(self):
1279        return self.systemId
1280
1281class DocumentType(Identified, Childless, Node):
1282    nodeType = Node.DOCUMENT_TYPE_NODE
1283    nodeValue = None
1284    name = None
1285    publicId = None
1286    systemId = None
1287    internalSubset = None
1288
1289    def __init__(self, qualifiedName):
1290        self.entities = ReadOnlySequentialNamedNodeMap()
1291        self.notations = ReadOnlySequentialNamedNodeMap()
1292        if qualifiedName:
1293            prefix, localname = _nssplit(qualifiedName)
1294            self.name = localname
1295        self.nodeName = self.name
1296
1297    def _get_internalSubset(self):
1298        return self.internalSubset
1299
1300    def cloneNode(self, deep):
1301        if self.ownerDocument is None:
1302            # it's ok
1303            clone = DocumentType(None)
1304            clone.name = self.name
1305            clone.nodeName = self.name
1306            operation = xml.dom.UserDataHandler.NODE_CLONED
1307            if deep:
1308                clone.entities._seq = []
1309                clone.notations._seq = []
1310                for n in self.notations._seq:
1311                    notation = Notation(n.nodeName, n.publicId, n.systemId)
1312                    clone.notations._seq.append(notation)
1313                    n._call_user_data_handler(operation, n, notation)
1314                for e in self.entities._seq:
1315                    entity = Entity(e.nodeName, e.publicId, e.systemId,
1316                                    e.notationName)
1317                    entity.actualEncoding = e.actualEncoding
1318                    entity.encoding = e.encoding
1319                    entity.version = e.version
1320                    clone.entities._seq.append(entity)
1321                    e._call_user_data_handler(operation, e, entity)
1322            self._call_user_data_handler(operation, self, clone)
1323            return clone
1324        else:
1325            return None
1326
1327    def writexml(self, writer, indent="", addindent="", newl=""):
1328        writer.write("<!DOCTYPE ")
1329        writer.write(self.name)
1330        if self.publicId:
1331            writer.write("%s  PUBLIC '%s'%s  '%s'"
1332                         % (newl, self.publicId, newl, self.systemId))
1333        elif self.systemId:
1334            writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
1335        if self.internalSubset is not None:
1336            writer.write(" [")
1337            writer.write(self.internalSubset)
1338            writer.write("]")
1339        writer.write(">"+newl)
1340
1341class Entity(Identified, Node):
1342    attributes = None
1343    nodeType = Node.ENTITY_NODE
1344    nodeValue = None
1345
1346    actualEncoding = None
1347    encoding = None
1348    version = None
1349
1350    def __init__(self, name, publicId, systemId, notation):
1351        self.nodeName = name
1352        self.notationName = notation
1353        self.childNodes = NodeList()
1354        self._identified_mixin_init(publicId, systemId)
1355
1356    def _get_actualEncoding(self):
1357        return self.actualEncoding
1358
1359    def _get_encoding(self):
1360        return self.encoding
1361
1362    def _get_version(self):
1363        return self.version
1364
1365    def appendChild(self, newChild):
1366        raise xml.dom.HierarchyRequestErr(
1367            "cannot append children to an entity node")
1368
1369    def insertBefore(self, newChild, refChild):
1370        raise xml.dom.HierarchyRequestErr(
1371            "cannot insert children below an entity node")
1372
1373    def removeChild(self, oldChild):
1374        raise xml.dom.HierarchyRequestErr(
1375            "cannot remove children from an entity node")
1376
1377    def replaceChild(self, newChild, oldChild):
1378        raise xml.dom.HierarchyRequestErr(
1379            "cannot replace children of an entity node")
1380
1381class Notation(Identified, Childless, Node):
1382    nodeType = Node.NOTATION_NODE
1383    nodeValue = None
1384
1385    def __init__(self, name, publicId, systemId):
1386        self.nodeName = name
1387        self._identified_mixin_init(publicId, systemId)
1388
1389
1390class DOMImplementation(DOMImplementationLS):
1391    _features = [("core", "1.0"),
1392                 ("core", "2.0"),
1393                 ("core", None),
1394                 ("xml", "1.0"),
1395                 ("xml", "2.0"),
1396                 ("xml", None),
1397                 ("ls-load", "3.0"),
1398                 ("ls-load", None),
1399                 ]
1400
1401    def hasFeature(self, feature, version):
1402        if version == "":
1403            version = None
1404        return (feature.lower(), version) in self._features
1405
1406    def createDocument(self, namespaceURI, qualifiedName, doctype):
1407        if doctype and doctype.parentNode is not None:
1408            raise xml.dom.WrongDocumentErr(
1409                "doctype object owned by another DOM tree")
1410        doc = self._create_document()
1411
1412        add_root_element = not (namespaceURI is None
1413                                and qualifiedName is None
1414                                and doctype is None)
1415
1416        if not qualifiedName and add_root_element:
1417            # The spec is unclear what to raise here; SyntaxErr
1418            # would be the other obvious candidate. Since Xerces raises
1419            # InvalidCharacterErr, and since SyntaxErr is not listed
1420            # for createDocument, that seems to be the better choice.
1421            # XXX: need to check for illegal characters here and in
1422            # createElement.
1423
1424            # DOM Level III clears this up when talking about the return value
1425            # of this function.  If namespaceURI, qName and DocType are
1426            # Null the document is returned without a document element
1427            # Otherwise if doctype or namespaceURI are not None
1428            # Then we go back to the above problem
1429            raise xml.dom.InvalidCharacterErr("Element with no name")
1430
1431        if add_root_element:
1432            prefix, localname = _nssplit(qualifiedName)
1433            if prefix == "xml" \
1434               and namespaceURI != "http://www.w3.org/XML/1998/namespace":
1435                raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
1436            if prefix and not namespaceURI:
1437                raise xml.dom.NamespaceErr(
1438                    "illegal use of prefix without namespaces")
1439            element = doc.createElementNS(namespaceURI, qualifiedName)
1440            if doctype:
1441                doc.appendChild(doctype)
1442            doc.appendChild(element)
1443
1444        if doctype:
1445            doctype.parentNode = doctype.ownerDocument = doc
1446
1447        doc.doctype = doctype
1448        doc.implementation = self
1449        return doc
1450
1451    def createDocumentType(self, qualifiedName, publicId, systemId):
1452        doctype = DocumentType(qualifiedName)
1453        doctype.publicId = publicId
1454        doctype.systemId = systemId
1455        return doctype
1456
1457    # DOM Level 3 (WD 9 April 2002)
1458
1459    def getInterface(self, feature):
1460        if self.hasFeature(feature, None):
1461            return self
1462        else:
1463            return None
1464
1465    # internal
1466    def _create_document(self):
1467        return Document()
1468
1469class ElementInfo(object):
1470    """Object that represents content-model information for an element.
1471
1472    This implementation is not expected to be used in practice; DOM
1473    builders should provide implementations which do the right thing
1474    using information available to it.
1475
1476    """
1477
1478    __slots__ = 'tagName',
1479
1480    def __init__(self, name):
1481        self.tagName = name
1482
1483    def getAttributeType(self, aname):
1484        return _no_type
1485
1486    def getAttributeTypeNS(self, namespaceURI, localName):
1487        return _no_type
1488
1489    def isElementContent(self):
1490        return False
1491
1492    def isEmpty(self):
1493        """Returns true iff this element is declared to have an EMPTY
1494        content model."""
1495        return False
1496
1497    def isId(self, aname):
1498        """Returns true iff the named attribute is a DTD-style ID."""
1499        return False
1500
1501    def isIdNS(self, namespaceURI, localName):
1502        """Returns true iff the identified attribute is a DTD-style ID."""
1503        return False
1504
1505    def __getstate__(self):
1506        return self.tagName
1507
1508    def __setstate__(self, state):
1509        self.tagName = state
1510
1511def _clear_id_cache(node):
1512    if node.nodeType == Node.DOCUMENT_NODE:
1513        node._id_cache.clear()
1514        node._id_search_stack = None
1515    elif _in_document(node):
1516        node.ownerDocument._id_cache.clear()
1517        node.ownerDocument._id_search_stack= None
1518
1519class Document(Node, DocumentLS):
1520    __slots__ = ('_elem_info', 'doctype',
1521                 '_id_search_stack', 'childNodes', '_id_cache')
1522    _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
1523                         Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
1524
1525    implementation = DOMImplementation()
1526    nodeType = Node.DOCUMENT_NODE
1527    nodeName = "#document"
1528    nodeValue = None
1529    attributes = None
1530    parentNode = None
1531    previousSibling = nextSibling = None
1532
1533
1534    # Document attributes from Level 3 (WD 9 April 2002)
1535
1536    actualEncoding = None
1537    encoding = None
1538    standalone = None
1539    version = None
1540    strictErrorChecking = False
1541    errorHandler = None
1542    documentURI = None
1543
1544    _magic_id_count = 0
1545
1546    def __init__(self):
1547        self.doctype = None
1548        self.childNodes = NodeList()
1549        # mapping of (namespaceURI, localName) -> ElementInfo
1550        #        and tagName -> ElementInfo
1551        self._elem_info = {}
1552        self._id_cache = {}
1553        self._id_search_stack = None
1554
1555    def _get_elem_info(self, element):
1556        if element.namespaceURI:
1557            key = element.namespaceURI, element.localName
1558        else:
1559            key = element.tagName
1560        return self._elem_info.get(key)
1561
1562    def _get_actualEncoding(self):
1563        return self.actualEncoding
1564
1565    def _get_doctype(self):
1566        return self.doctype
1567
1568    def _get_documentURI(self):
1569        return self.documentURI
1570
1571    def _get_encoding(self):
1572        return self.encoding
1573
1574    def _get_errorHandler(self):
1575        return self.errorHandler
1576
1577    def _get_standalone(self):
1578        return self.standalone
1579
1580    def _get_strictErrorChecking(self):
1581        return self.strictErrorChecking
1582
1583    def _get_version(self):
1584        return self.version
1585
1586    def appendChild(self, node):
1587        if node.nodeType not in self._child_node_types:
1588            raise xml.dom.HierarchyRequestErr(
1589                "%s cannot be child of %s" % (repr(node), repr(self)))
1590        if node.parentNode is not None:
1591            # This needs to be done before the next test since this
1592            # may *be* the document element, in which case it should
1593            # end up re-ordered to the end.
1594            node.parentNode.removeChild(node)
1595
1596        if node.nodeType == Node.ELEMENT_NODE \
1597           and self._get_documentElement():
1598            raise xml.dom.HierarchyRequestErr(
1599                "two document elements disallowed")
1600        return Node.appendChild(self, node)
1601
1602    def removeChild(self, oldChild):
1603        try:
1604            self.childNodes.remove(oldChild)
1605        except ValueError:
1606            raise xml.dom.NotFoundErr()
1607        oldChild.nextSibling = oldChild.previousSibling = None
1608        oldChild.parentNode = None
1609        if self.documentElement is oldChild:
1610            self.documentElement = None
1611
1612        return oldChild
1613
1614    def _get_documentElement(self):
1615        for node in self.childNodes:
1616            if node.nodeType == Node.ELEMENT_NODE:
1617                return node
1618
1619    def unlink(self):
1620        if self.doctype is not None:
1621            self.doctype.unlink()
1622            self.doctype = None
1623        Node.unlink(self)
1624
1625    def cloneNode(self, deep):
1626        if not deep:
1627            return None
1628        clone = self.implementation.createDocument(None, None, None)
1629        clone.encoding = self.encoding
1630        clone.standalone = self.standalone
1631        clone.version = self.version
1632        for n in self.childNodes:
1633            childclone = _clone_node(n, deep, clone)
1634            assert childclone.ownerDocument.isSameNode(clone)
1635            clone.childNodes.append(childclone)
1636            if childclone.nodeType == Node.DOCUMENT_NODE:
1637                assert clone.documentElement is None
1638            elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
1639                assert clone.doctype is None
1640                clone.doctype = childclone
1641            childclone.parentNode = clone
1642        self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
1643                                     self, clone)
1644        return clone
1645
1646    def createDocumentFragment(self):
1647        d = DocumentFragment()
1648        d.ownerDocument = self
1649        return d
1650
1651    def createElement(self, tagName):
1652        e = Element(tagName)
1653        e.ownerDocument = self
1654        return e
1655
1656    def createTextNode(self, data):
1657        if not isinstance(data, str):
1658            raise TypeError("node contents must be a string")
1659        t = Text()
1660        t.data = data
1661        t.ownerDocument = self
1662        return t
1663
1664    def createCDATASection(self, data):
1665        if not isinstance(data, str):
1666            raise TypeError("node contents must be a string")
1667        c = CDATASection()
1668        c.data = data
1669        c.ownerDocument = self
1670        return c
1671
1672    def createComment(self, data):
1673        c = Comment(data)
1674        c.ownerDocument = self
1675        return c
1676
1677    def createProcessingInstruction(self, target, data):
1678        p = ProcessingInstruction(target, data)
1679        p.ownerDocument = self
1680        return p
1681
1682    def createAttribute(self, qName):
1683        a = Attr(qName)
1684        a.ownerDocument = self
1685        a.value = ""
1686        return a
1687
1688    def createElementNS(self, namespaceURI, qualifiedName):
1689        prefix, localName = _nssplit(qualifiedName)
1690        e = Element(qualifiedName, namespaceURI, prefix)
1691        e.ownerDocument = self
1692        return e
1693
1694    def createAttributeNS(self, namespaceURI, qualifiedName):
1695        prefix, localName = _nssplit(qualifiedName)
1696        a = Attr(qualifiedName, namespaceURI, localName, prefix)
1697        a.ownerDocument = self
1698        a.value = ""
1699        return a
1700
1701    # A couple of implementation-specific helpers to create node types
1702    # not supported by the W3C DOM specs:
1703
1704    def _create_entity(self, name, publicId, systemId, notationName):
1705        e = Entity(name, publicId, systemId, notationName)
1706        e.ownerDocument = self
1707        return e
1708
1709    def _create_notation(self, name, publicId, systemId):
1710        n = Notation(name, publicId, systemId)
1711        n.ownerDocument = self
1712        return n
1713
1714    def getElementById(self, id):
1715        if id in self._id_cache:
1716            return self._id_cache[id]
1717        if not (self._elem_info or self._magic_id_count):
1718            return None
1719
1720        stack = self._id_search_stack
1721        if stack is None:
1722            # we never searched before, or the cache has been cleared
1723            stack = [self.documentElement]
1724            self._id_search_stack = stack
1725        elif not stack:
1726            # Previous search was completed and cache is still valid;
1727            # no matching node.
1728            return None
1729
1730        result = None
1731        while stack:
1732            node = stack.pop()
1733            # add child elements to stack for continued searching
1734            stack.extend([child for child in node.childNodes
1735                          if child.nodeType in _nodeTypes_with_children])
1736            # check this node
1737            info = self._get_elem_info(node)
1738            if info:
1739                # We have to process all ID attributes before
1740                # returning in order to get all the attributes set to
1741                # be IDs using Element.setIdAttribute*().
1742                for attr in node.attributes.values():
1743                    if attr.namespaceURI:
1744                        if info.isIdNS(attr.namespaceURI, attr.localName):
1745                            self._id_cache[attr.value] = node
1746                            if attr.value == id:
1747                                result = node
1748                            elif not node._magic_id_nodes:
1749                                break
1750                    elif info.isId(attr.name):
1751                        self._id_cache[attr.value] = node
1752                        if attr.value == id:
1753                            result = node
1754                        elif not node._magic_id_nodes:
1755                            break
1756                    elif attr._is_id:
1757                        self._id_cache[attr.value] = node
1758                        if attr.value == id:
1759                            result = node
1760                        elif node._magic_id_nodes == 1:
1761                            break
1762            elif node._magic_id_nodes:
1763                for attr in node.attributes.values():
1764                    if attr._is_id:
1765                        self._id_cache[attr.value] = node
1766                        if attr.value == id:
1767                            result = node
1768            if result is not None:
1769                break
1770        return result
1771
1772    def getElementsByTagName(self, name):
1773        return _get_elements_by_tagName_helper(self, name, NodeList())
1774
1775    def getElementsByTagNameNS(self, namespaceURI, localName):
1776        return _get_elements_by_tagName_ns_helper(
1777            self, namespaceURI, localName, NodeList())
1778
1779    def isSupported(self, feature, version):
1780        return self.implementation.hasFeature(feature, version)
1781
1782    def importNode(self, node, deep):
1783        if node.nodeType == Node.DOCUMENT_NODE:
1784            raise xml.dom.NotSupportedErr("cannot import document nodes")
1785        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1786            raise xml.dom.NotSupportedErr("cannot import document type nodes")
1787        return _clone_node(node, deep, self)
1788
1789    def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
1790        if encoding is None:
1791            writer.write('<?xml version="1.0" ?>'+newl)
1792        else:
1793            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
1794                encoding, newl))
1795        for node in self.childNodes:
1796            node.writexml(writer, indent, addindent, newl)
1797
1798    # DOM Level 3 (WD 9 April 2002)
1799
1800    def renameNode(self, n, namespaceURI, name):
1801        if n.ownerDocument is not self:
1802            raise xml.dom.WrongDocumentErr(
1803                "cannot rename nodes from other documents;\n"
1804                "expected %s,\nfound %s" % (self, n.ownerDocument))
1805        if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
1806            raise xml.dom.NotSupportedErr(
1807                "renameNode() only applies to element and attribute nodes")
1808        if namespaceURI != EMPTY_NAMESPACE:
1809            if ':' in name:
1810                prefix, localName = name.split(':', 1)
1811                if (  prefix == "xmlns"
1812                      and namespaceURI != xml.dom.XMLNS_NAMESPACE):
1813                    raise xml.dom.NamespaceErr(
1814                        "illegal use of 'xmlns' prefix")
1815            else:
1816                if (  name == "xmlns"
1817                      and namespaceURI != xml.dom.XMLNS_NAMESPACE
1818                      and n.nodeType == Node.ATTRIBUTE_NODE):
1819                    raise xml.dom.NamespaceErr(
1820                        "illegal use of the 'xmlns' attribute")
1821                prefix = None
1822                localName = name
1823        else:
1824            prefix = None
1825            localName = None
1826        if n.nodeType == Node.ATTRIBUTE_NODE:
1827            element = n.ownerElement
1828            if element is not None:
1829                is_id = n._is_id
1830                element.removeAttributeNode(n)
1831        else:
1832            element = None
1833        n.prefix = prefix
1834        n._localName = localName
1835        n.namespaceURI = namespaceURI
1836        n.nodeName = name
1837        if n.nodeType == Node.ELEMENT_NODE:
1838            n.tagName = name
1839        else:
1840            # attribute node
1841            n.name = name
1842            if element is not None:
1843                element.setAttributeNode(n)
1844                if is_id:
1845                    element.setIdAttributeNode(n)
1846        # It's not clear from a semantic perspective whether we should
1847        # call the user data handlers for the NODE_RENAMED event since
1848        # we're re-using the existing node.  The draft spec has been
1849        # interpreted as meaning "no, don't call the handler unless a
1850        # new node is created."
1851        return n
1852
1853defproperty(Document, "documentElement",
1854            doc="Top-level element of this document.")
1855
1856
1857def _clone_node(node, deep, newOwnerDocument):
1858    """
1859    Clone a node and give it the new owner document.
1860    Called by Node.cloneNode and Document.importNode
1861    """
1862    if node.ownerDocument.isSameNode(newOwnerDocument):
1863        operation = xml.dom.UserDataHandler.NODE_CLONED
1864    else:
1865        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1866    if node.nodeType == Node.ELEMENT_NODE:
1867        clone = newOwnerDocument.createElementNS(node.namespaceURI,
1868                                                 node.nodeName)
1869        for attr in node.attributes.values():
1870            clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
1871            a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
1872            a.specified = attr.specified
1873
1874        if deep:
1875            for child in node.childNodes:
1876                c = _clone_node(child, deep, newOwnerDocument)
1877                clone.appendChild(c)
1878
1879    elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
1880        clone = newOwnerDocument.createDocumentFragment()
1881        if deep:
1882            for child in node.childNodes:
1883                c = _clone_node(child, deep, newOwnerDocument)
1884                clone.appendChild(c)
1885
1886    elif node.nodeType == Node.TEXT_NODE:
1887        clone = newOwnerDocument.createTextNode(node.data)
1888    elif node.nodeType == Node.CDATA_SECTION_NODE:
1889        clone = newOwnerDocument.createCDATASection(node.data)
1890    elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
1891        clone = newOwnerDocument.createProcessingInstruction(node.target,
1892                                                             node.data)
1893    elif node.nodeType == Node.COMMENT_NODE:
1894        clone = newOwnerDocument.createComment(node.data)
1895    elif node.nodeType == Node.ATTRIBUTE_NODE:
1896        clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
1897                                                   node.nodeName)
1898        clone.specified = True
1899        clone.value = node.value
1900    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
1901        assert node.ownerDocument is not newOwnerDocument
1902        operation = xml.dom.UserDataHandler.NODE_IMPORTED
1903        clone = newOwnerDocument.implementation.createDocumentType(
1904            node.name, node.publicId, node.systemId)
1905        clone.ownerDocument = newOwnerDocument
1906        if deep:
1907            clone.entities._seq = []
1908            clone.notations._seq = []
1909            for n in node.notations._seq:
1910                notation = Notation(n.nodeName, n.publicId, n.systemId)
1911                notation.ownerDocument = newOwnerDocument
1912                clone.notations._seq.append(notation)
1913                if hasattr(n, '_call_user_data_handler'):
1914                    n._call_user_data_handler(operation, n, notation)
1915            for e in node.entities._seq:
1916                entity = Entity(e.nodeName, e.publicId, e.systemId,
1917                                e.notationName)
1918                entity.actualEncoding = e.actualEncoding
1919                entity.encoding = e.encoding
1920                entity.version = e.version
1921                entity.ownerDocument = newOwnerDocument
1922                clone.entities._seq.append(entity)
1923                if hasattr(e, '_call_user_data_handler'):
1924                    e._call_user_data_handler(operation, e, entity)
1925    else:
1926        # Note the cloning of Document and DocumentType nodes is
1927        # implementation specific.  minidom handles those cases
1928        # directly in the cloneNode() methods.
1929        raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
1930
1931    # Check for _call_user_data_handler() since this could conceivably
1932    # used with other DOM implementations (one of the FourThought
1933    # DOMs, perhaps?).
1934    if hasattr(node, '_call_user_data_handler'):
1935        node._call_user_data_handler(operation, node, clone)
1936    return clone
1937
1938
1939def _nssplit(qualifiedName):
1940    fields = qualifiedName.split(':', 1)
1941    if len(fields) == 2:
1942        return fields
1943    else:
1944        return (None, fields[0])
1945
1946
1947def _do_pulldom_parse(func, args, kwargs):
1948    events = func(*args, **kwargs)
1949    toktype, rootNode = events.getEvent()
1950    events.expandNode(rootNode)
1951    events.clear()
1952    return rootNode
1953
1954def parse(file, parser=None, bufsize=None):
1955    """Parse a file into a DOM by filename or file object."""
1956    if parser is None and not bufsize:
1957        from xml.dom import expatbuilder
1958        return expatbuilder.parse(file)
1959    else:
1960        from xml.dom import pulldom
1961        return _do_pulldom_parse(pulldom.parse, (file,),
1962            {'parser': parser, 'bufsize': bufsize})
1963
1964def parseString(string, parser=None):
1965    """Parse a file into a DOM from a string."""
1966    if parser is None:
1967        from xml.dom import expatbuilder
1968        return expatbuilder.parseString(string)
1969    else:
1970        from xml.dom import pulldom
1971        return _do_pulldom_parse(pulldom.parseString, (string,),
1972                                 {'parser': parser})
1973
1974def getDOMImplementation(features=None):
1975    if features:
1976        if isinstance(features, str):
1977            features = domreg._parse_feature_string(features)
1978        for f, v in features:
1979            if not Document.implementation.hasFeature(f, v):
1980                return None
1981    return Document.implementation
1982