1# -*- coding: utf-8 -*-
2# Copyright 2009-2013, Peter A. Bigot
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may
5# not use this file except in compliance with the License. You may obtain a
6# copy of the License at:
7#
8#            http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations
14# under the License.
15
16"""Functions that support activities related to the Document Object Model."""
17
18import logging
19import xml.dom
20
21import pyxb
22import pyxb.namespace
23import pyxb.namespace.resolution
24import pyxb.utils.saxutils
25import pyxb.utils.saxdom
26from pyxb.utils import six
27from pyxb.utils.six.moves import xrange
28
29_log = logging.getLogger(__name__)
30
31# The DOM implementation to be used for all processing.  Default is whatever
32# your Python install uses, as long as it supports Core 2.0 (for
33# createDocument) and XML 2.0 (for NS-aware attribute manipulation).  The
34# built-in minidom works fine.
35__DOMImplementation = xml.dom.getDOMImplementation(None, (('core', '2.0'), ('xml', '2.0')))
36
37def GetDOMImplementation ():
38    """Return the DOMImplementation object used for pyxb operations.
39
40    This is primarily used as the default implementation when generating DOM
41    trees from a binding instance.  It defaults to whatever
42    xml.dom.getDOMImplementation() returns in your installation (often
43    xml.dom.minidom).  It can be overridden with SetDOMImplementation()."""
44
45    global __DOMImplementation
46    return __DOMImplementation
47
48def SetDOMImplementation (dom_implementation):
49    """Override the default DOMImplementation object."""
50    global __DOMImplementation
51    __DOMImplementation = dom_implementation
52    return __DOMImplementation
53
54# Unfortunately, the DOMImplementation interface doesn't provide a parser.  So
55# abstract this in case somebody wants to substitute a different one.  Haven't
56# decided how to express that yet.
57def StringToDOM (xml_text, **kw):
58    """Convert string to a DOM instance.
59
60    @see: L{pyxb._SetXMLStyle}."""
61
62    xmlt = xml_text
63    if pyxb.XMLStyle_minidom == pyxb._XMLStyle:
64        parser = pyxb.utils.saxutils.make_parser()
65        # minidom.parseString is broken.  In Python 2, this means don't
66        # feed it unicode.  In Python 3 this means don't feed it bytes.
67        if (six.PY2 and not isinstance(xmlt, six.binary_type)):
68            xmlt = xmlt.encode(pyxb._InputEncoding)
69        elif (six.PY3 and isinstance(xmlt, six.binary_type)):
70            xmlt = xmlt.decode(pyxb._InputEncoding)
71        return xml.dom.minidom.parseString(xmlt, parser)
72    return pyxb.utils.saxdom.parseString(xml_text, **kw)
73
74def NodeAttribute (node, attribute_ncname, attribute_ns=None):
75    """Namespace-aware search for an optional attribute in a node.
76
77    @param attribute_ncname: The local name of the attribute.
78    @type attribute_ncname: C{str} or C{unicode}
79
80    @keyword attribute_ns: The namespace of the attribute.  Defaults to None
81    since most attributes are not in a namespace.  Can be provided as either a
82    L{pyxb.namespace.Namespace} instance, or a string URI.
83    @type attribute_ns: C{None} or C{str} or C{unicode} or L{pyxb.namespace.Namespace}
84
85    @return: The value of the attribute, or C{None} if the attribute is not
86    present.  (Unless C{None}, the value will always be a (unicode) string.)
87    """
88
89    ns_uri = attribute_ns
90    if isinstance(attribute_ns, pyxb.namespace.Namespace):
91        ns_uri = attribute_ns.uri()
92    attr = node.getAttributeNodeNS(ns_uri, attribute_ncname)
93    if attr is None:
94        return None
95    return attr.value
96
97def NodeAttributeQName (node, attribute_ncname, attribute_ns=None):
98    """Like L{NodeAttribute} but where the content is a QName that must be
99    resolved in the context of the node.
100
101    @param attribute_ncname: as in L{NodeAttribute}
102    @keyword attribute_ns: as in L{NodeAttribute}
103
104    @return: The expanded name to which the value of the attribute resolves
105    given current namespaces, or C{None} if the attribute is not present
106    @rtype: L{pyxb.namespace.ExpandedName}
107    """
108    attr = NodeAttribute(node, attribute_ncname, attribute_ns)
109    if attr is None:
110        return None
111    nsc = pyxb.namespace.NamespaceContext.GetNodeContext(node)
112    return nsc.interpretQName(attr)
113
114def LocateUniqueChild (node, tag, absent_ok=True, namespace=pyxb.namespace.XMLSchema):
115    """Locate a unique child of the DOM node.
116
117    This function returns the sole child of node which is an ELEMENT_NODE
118    instance and has a tag consistent with the given tag.  If multiple nodes
119    with a matching C{tag} are found, or C{absent_ok} is C{False} and no
120    matching tag is found, an exception is raised.
121
122    @param node: An a xml.dom.Node ELEMENT_NODE instance
123    @param tag: the NCName of an element in the namespace
124    @keyword absent_ok: If C{True} (default), C{None} is returned if no match
125    can be found.  If C{False}, an exception is raised if no match can be
126    found.
127    @keyword namespace: The namespace to which the child element belongs.
128    Default is the XMLSchema namespace.
129    @rtype: C{xml.dom.Node}
130
131    @raise pyxb.SchemaValidationError: multiple elements are identified
132    @raise pyxb.SchemaValidationError: C{absent_ok} is C{False} and no element is identified.
133    """
134    candidate = None
135    for cn in node.childNodes:
136        if (xml.dom.Node.ELEMENT_NODE == cn.nodeType) and namespace.nodeIsNamed(cn, tag):
137            if candidate:
138                raise pyxb.SchemaValidationError('Multiple %s elements nested in %s' % (tag, node.nodeName))
139            candidate = cn
140    if (candidate is None) and not absent_ok:
141        raise pyxb.SchemaValidationError('Expected %s elements nested in %s' % (tag, node.nodeName))
142    return candidate
143
144def LocateMatchingChildren (node, tag, namespace=pyxb.namespace.XMLSchema):
145    """Locate all children of the DOM node that have a particular tag.
146
147    This function returns a list of children of node which are ELEMENT_NODE
148    instances and have a tag consistent with the given tag.
149
150    @param node: An a xml.dom.Node ELEMENT_NODE instance.
151    @param tag: the NCName of an element in the namespace, which defaults to the
152    XMLSchema namespace.
153    @keyword namespace: The namespace to which the child element belongs.
154    Default is the XMLSchema namespace.
155
156    @rtype: C{list(xml.dom.Node)}
157    """
158    matches = []
159    for cn in node.childNodes:
160        if (xml.dom.Node.ELEMENT_NODE == cn.nodeType) and namespace.nodeIsNamed(cn, tag):
161            matches.append(cn)
162    return matches
163
164def LocateFirstChildElement (node, absent_ok=True, require_unique=False, ignore_annotations=True):
165    """Locate the first element child of the node.
166
167
168    @param node: An a xml.dom.Node ELEMENT_NODE instance.
169    @keyword absent_ok: If C{True} (default), C{None} is returned if no match
170    can be found.  If C{False}, an exception is raised if no match can be
171    found.
172    @keyword require_unique: If C{False} (default), it is acceptable for there
173    to be multiple child elements.  If C{True}, presence of multiple child
174    elements raises an exception.
175    @keyword ignore_annotations: If C{True} (default), annotations are skipped
176    wheen looking for the first child element.  If C{False}, an annotation
177    counts as an element.
178    @rtype: C{xml.dom.Node}
179
180    @raise SchemaValidationError: C{absent_ok} is C{False} and no child
181    element was identified.
182    @raise SchemaValidationError: C{require_unique} is C{True} and multiple
183    child elements were identified
184    """
185
186    candidate = None
187    for cn in node.childNodes:
188        if xml.dom.Node.ELEMENT_NODE == cn.nodeType:
189            if ignore_annotations and pyxb.namespace.XMLSchema.nodeIsNamed(cn, 'annotation'):
190                continue
191            if require_unique:
192                if candidate:
193                    raise pyxb.SchemaValidationError('Multiple elements nested in %s' % (node.nodeName,))
194                candidate = cn
195            else:
196                return cn
197    if (candidate is None) and not absent_ok:
198        raise pyxb.SchemaValidationError('No elements nested in %s' % (node.nodeName,))
199    return candidate
200
201def HasNonAnnotationChild (node):
202    """Return True iff C{node} has an ELEMENT_NODE child that is not an
203    XMLSchema annotation node.
204
205    @rtype: C{bool}
206    """
207    for cn in node.childNodes:
208        if (xml.dom.Node.ELEMENT_NODE == cn.nodeType) and (not pyxb.namespace.XMLSchema.nodeIsNamed(cn, 'annotation')):
209            return True
210    return False
211
212def ExtractTextContent (node):
213    """Walk all the children, extracting all text content and
214    catenating it into the return value.
215
216    Returns C{None} if no text content (including whitespace) is found.
217
218    This is mainly used to strip comments out of the content of complex
219    elements with simple types.
220
221    @rtype: C{unicode} or C{str}
222    """
223    text = []
224    for cn in node.childNodes:
225        if xml.dom.Node.TEXT_NODE == cn.nodeType:
226            text.append(cn.data)
227        elif xml.dom.Node.CDATA_SECTION_NODE == cn.nodeType:
228            text.append(cn.data)
229        elif xml.dom.Node.COMMENT_NODE == cn.nodeType:
230            pass
231        else:
232            raise pyxb.NonElementValidationError(cn)
233    if 0 == len(text):
234        return None
235    return ''.join(text)
236
237class BindingDOMSupport (object):
238    """This holds DOM-related information used when generating a DOM tree from
239    a binding instance."""
240
241    def implementation (self):
242        """The DOMImplementation object to be used.
243
244        Defaults to L{pyxb.utils.domutils.GetDOMImplementation()}, but can be
245        overridden in the constructor call using the C{implementation}
246        keyword."""
247        return self.__implementation
248    __implementation = None
249
250    def document (self):
251        """Return the document generated using this instance."""
252        return self.__document
253    __document = None
254
255    def requireXSIType (self):
256        """Indicates whether {xsi:type<http://www.w3.org/TR/xmlschema-1/#xsi_type>} should be added to all elements.
257
258        Certain WSDL styles and encodings seem to require explicit notation of
259        the type of each element, even if it was specified in the schema.
260
261        This value can only be set in the constructor."""
262        return self.__requireXSIType
263    __requireXSIType = None
264
265    def reset (self):
266        """Reset this instance to the state it was when created.
267
268        This creates a new root document with no content, resets the
269        namespace-prefix map to its as-constructed content, and clears the set
270        of referenced namespace prefixes.  The defaultNamespace and
271        requireXSIType are not modified."""
272        self.__document = self.implementation().createDocument(None, None, None)
273        self.__namespaceContext.reset()
274        # For historical reasons this is also added automatically, though
275        # 'xsi' is not a bound prefix.
276        self.__namespaceContext.declareNamespace(pyxb.namespace.XMLSchema_instance, 'xsi')
277        self.__referencedNamespacePrefixes = set()
278
279    @classmethod
280    def Reset (cls):
281        """Reset the global defaults for default/prefix/namespace information."""
282        cls.__NamespaceContext.reset()
283
284    def __init__ (self, implementation=None, default_namespace=None, require_xsi_type=False, namespace_prefix_map=None):
285        """Create a new instance used for building a single document.
286
287        @keyword implementation: The C{xml.dom} implementation to use.
288        Defaults to the one selected by L{GetDOMImplementation}.
289
290        @keyword default_namespace: The namespace to configure as the default
291        for the document.  If not provided, there is no default namespace.
292        @type default_namespace: L{pyxb.namespace.Namespace}
293
294        @keyword require_xsi_type: If C{True}, an U{xsi:type
295        <http://www.w3.org/TR/xmlschema-1/#xsi_type>} attribute should be
296        placed in every element.
297        @type require_xsi_type: C{bool}
298
299        @keyword namespace_prefix_map: A map from pyxb.namespace.Namespace
300        instances to the preferred prefix to use for the namespace in xmlns
301        declarations.  The default one assigns 'xsi' for the XMLSchema
302        instance namespace.
303        @type namespace_prefix_map: C{map} from L{pyxb.namespace.Namespace} to C{str}
304
305        @raise pyxb.LogicError: the same prefix is associated with multiple
306        namespaces in the C{namespace_prefix_map}.
307
308        """
309        if implementation is None:
310            implementation = GetDOMImplementation()
311        self.__implementation = implementation
312        self.__requireXSIType = require_xsi_type
313        self.__namespaceContext = pyxb.namespace.NamespaceContext(parent_context=self.__NamespaceContext,
314                                                                  in_scope_namespaces=namespace_prefix_map)
315        if default_namespace is not None:
316            self.__namespaceContext.setDefaultNamespace(default_namespace)
317        self.reset()
318
319    # Default namespace-prefix map support
320    __NamespaceContext = pyxb.namespace.NamespaceContext()
321
322    # Instance-specific namespace-prefix map support
323    __namespaceContext = None
324
325    # Set of pairs of (namespace, prefix) identifying the declarations that
326    # must be placed in the document root so that QNames can be resolved.
327    # These are the prefixes associated with namespaces that were queried
328    # through L{namespacePrefix()} since the last reset().
329    __referencedNamespacePrefixes = None
330
331    def defaultNamespace (self):
332        """The default namespace for this instance"""
333        return self.__namespaceContext.defaultNamespace()
334    @classmethod
335    def DefaultNamespace (cls):
336        """The global default namespace (used on instance creation if not overridden)"""
337        return cls.__NamespaceContext.defaultNamespace()
338
339    def setDefaultNamespace (self, default_namespace):
340        return self.__namespaceContext.setDefaultNamespace(default_namespace)
341    @classmethod
342    def SetDefaultNamespace (cls, default_namespace):
343        return cls.__NamespaceContext.setDefaultNamespace(default_namespace)
344
345    def declareNamespace (self, namespace, prefix=None):
346        """Declare a namespace within this instance only."""
347        return self.__namespaceContext.declareNamespace(namespace, prefix)
348    @classmethod
349    def DeclareNamespace (cls, namespace, prefix=None):
350        """Declare a namespace that will made available to each created instance."""
351        return cls.__NamespaceContext.declareNamespace(namespace, prefix)
352
353    def namespacePrefix (self, namespace, enable_default_namespace=True):
354        """Return the prefix to be used for the given namespace.
355
356        This will L{declare <declareNamespace>} the namespace if it has not
357        yet been observed.  It will also ensure the mapping from the returned
358        prefix to C{namespace} is recorded for addition as an xmlns directive
359        in the final document.
360
361        @param namespace: The namespace for which a prefix is needed.  If the
362        provided namespace is C{None} or an absent namespace, the C{None}
363        value will be returned as the corresponding prefix.
364
365        @keyword enable_default_namespace: Normally if the namespace is the default
366        namespace C{None} is returned to indicate this.  If this keyword is
367        C{False} then we need a namespace prefix even if this is the default.
368        """
369        if (namespace is None) or namespace.isAbsentNamespace():
370            return None
371        if isinstance(namespace, six.string_types):
372            namespace = pyxb.namespace.NamespaceForURI(namespace, create_if_missing=True)
373        if (self.defaultNamespace() == namespace) and enable_default_namespace:
374            return None
375        pfx = self.__namespaceContext.prefixForNamespace(namespace)
376        if pfx is None:
377            pfx = self.__namespaceContext.declareNamespace(namespace)
378        self.__referencedNamespacePrefixes.add((namespace, pfx))
379        return pfx
380
381    def qnameAsText (self, qname, enable_default_namespace=True):
382        assert isinstance(qname, pyxb.namespace.ExpandedName)
383        name = qname.localName()
384        prefix = self.namespacePrefix(qname.namespace(), enable_default_namespace=enable_default_namespace)
385        if prefix is not None:
386            name = '%s:%s' % (prefix, name)
387        return name
388
389    def valueAsText (self, value, enable_default_namespace=True):
390        """Represent a simple type value as XML text.
391
392        This is essentially what C{value.xsdLiteral()} does, but this one
393        handles any special cases such as QName values where the lexical
394        representation cannot be done in isolation of external information
395        such as namespace declarations."""
396        from pyxb.binding.basis import simpleTypeDefinition, STD_list
397        if isinstance(value, pyxb.namespace.ExpandedName):
398            return self.qnameAsText(value, enable_default_namespace=enable_default_namespace)
399        if isinstance(value, STD_list):
400            return ' '.join([ self.valueAsText(_v, enable_default_namespace=enable_default_namespace) for _v in value ])
401        if isinstance(value, simpleTypeDefinition):
402            return value.xsdLiteral()
403        assert value is not None
404        return six.text_type(value)
405
406    def addAttribute (self, element, expanded_name, value):
407        """Add an attribute to the given element.
408
409        @param element: The element to which the attribute should be added
410        @type element: C{xml.dom.Element}
411        @param expanded_name: The name of the attribute.  This may be a local
412        name if the attribute is not in a namespace.
413        @type expanded_name: L{pyxb.namespace.Namespace} or C{str} or C{unicode}
414        @param value: The value of the attribute
415        @type value: C{str} or C{unicode}
416        """
417        name = expanded_name
418        ns_uri = xml.dom.EMPTY_NAMESPACE
419        if isinstance(name, pyxb.namespace.ExpandedName):
420            ns_uri = expanded_name.namespaceURI()
421            # Attribute names do not use default namespace
422            name = self.qnameAsText(expanded_name, enable_default_namespace=False)
423        element.setAttributeNS(ns_uri, name, self.valueAsText(value))
424
425    def addXMLNSDeclaration (self, element, namespace, prefix=None):
426        """Manually add an XMLNS declaration to the document element.
427
428        @param namespace: a L{pyxb.namespace.Namespace} instance
429
430        @param prefix: the prefix by which the namespace is known.  If
431        C{None}, the default prefix as previously declared will be used; if
432        C{''} (empty string) a declaration for C{namespace} as the default
433        namespace will be generated.
434
435        @return: C{prefix} as used in the added declaration.
436        """
437        if not isinstance(namespace, pyxb.namespace.Namespace):
438            raise pyxb.UsageError('addXMLNSdeclaration: must be given a namespace instance')
439        if namespace.isAbsentNamespace():
440            raise pyxb.UsageError('addXMLNSdeclaration: namespace must not be an absent namespace')
441        if prefix is None:
442            prefix = self.namespacePrefix(namespace)
443        if not prefix: # None or empty string
444            an = 'xmlns'
445        else:
446            an = 'xmlns:' + prefix
447        element.setAttributeNS(pyxb.namespace.XMLNamespaces.uri(), an, namespace.uri())
448        return prefix
449
450    def finalize (self):
451        """Do the final cleanup after generating the tree.  This makes sure
452        that the document element includes XML Namespace declarations for all
453        namespaces referenced in the tree.
454
455        @return: The document that has been created.
456        @rtype: C{xml.dom.Document}"""
457        ns = self.defaultNamespace()
458        if ns is not None:
459            self.addXMLNSDeclaration(self.document().documentElement, ns, '')
460        for (ns, pfx) in self.__referencedNamespacePrefixes:
461            self.addXMLNSDeclaration(self.document().documentElement, ns, pfx)
462        return self.document()
463
464    def createChildElement (self, expanded_name, parent=None):
465        """Create a new element node in the tree.
466
467        @param expanded_name: The name of the element.  A plain string
468        indicates a name in no namespace.
469        @type expanded_name: L{pyxb.namespace.ExpandedName} or C{str} or C{unicode}
470
471        @keyword parent: The node in the tree that will serve as the child's
472        parent.  If C{None}, the document element is used.  (If there is no
473        document element, then this call creates it as a side-effect.)
474
475        @return: A newly created DOM element
476        @rtype: C{xml.dom.Element}
477        """
478
479        if parent is None:
480            parent = self.document().documentElement
481        if parent is None:
482            parent = self.__document
483        if isinstance(expanded_name, six.string_types):
484            expanded_name = pyxb.namespace.ExpandedName(None, expanded_name)
485        if not isinstance(expanded_name, pyxb.namespace.ExpandedName):
486            raise pyxb.LogicError('Invalid type %s for expanded name' % (type(expanded_name),))
487        ns = expanded_name.namespace()
488        ns_uri = xml.dom.EMPTY_NAMESPACE
489        name = expanded_name.localName()
490        if ns is not None:
491            ns_uri = ns.uri()
492            name = self.qnameAsText(expanded_name)
493        element = self.__document.createElementNS(ns_uri, name)
494        return parent.appendChild(element)
495
496    def _makeURINodeNamePair (self, node):
497        """Convert namespace information from a DOM node to text for new DOM node.
498
499        The namespaceURI and nodeName are extracted and parsed.  The namespace
500        (if any) is registered within the document, along with any prefix from
501        the node name.  A pair is returned where the first element is the
502        namespace URI or C{None}, and the second is a QName to be used for the
503        expanded name within this document.
504
505        @param node: An xml.dom.Node instance, presumably from a wildcard match.
506        @rtype: C{( str, str )}"""
507        ns = None
508        if node.namespaceURI is not None:
509            ns = pyxb.namespace.NamespaceForURI(node.namespaceURI, create_if_missing=True)
510        if node.ELEMENT_NODE == node.nodeType:
511            name = node.tagName
512        elif node.ATTRIBUTE_NODE == node.nodeType:
513            name = node.name
514            # saxdom uses the uriTuple as the name field while minidom uses
515            # the QName.  @todo saxdom should be fixed.
516            if isinstance(name, tuple):
517                name = name[1]
518        else:
519            raise pyxb.UsageError('Unable to determine name from DOM node %s' % (node,))
520        pfx = None
521        local_name = name
522        if 0 < name.find(':'):
523            (pfx, local_name) = name.split(':', 1)
524            if ns is None:
525                raise pyxb.LogicError('QName with prefix but no available namespace')
526        ns_uri = None
527        node_name = local_name
528        if ns is not None:
529            ns_uri = ns.uri()
530            self.declareNamespace(ns, pfx)
531            node_name = self.qnameAsText(ns.createExpandedName(local_name))
532        return (ns_uri, node_name)
533
534    def _deepClone (self, node, docnode):
535        if node.ELEMENT_NODE == node.nodeType:
536            (ns_uri, node_name) = self._makeURINodeNamePair(node)
537            clone_node = docnode.createElementNS(ns_uri, node_name)
538            attrs = node.attributes
539            for ai in xrange(attrs.length):
540                clone_node.setAttributeNodeNS(self._deepClone(attrs.item(ai), docnode))
541            for child in node.childNodes:
542                clone_node.appendChild(self._deepClone(child, docnode))
543            return clone_node
544        if node.TEXT_NODE == node.nodeType:
545            return docnode.createTextNode(node.data)
546        if node.ATTRIBUTE_NODE == node.nodeType:
547            (ns_uri, node_name) = self._makeURINodeNamePair(node)
548            clone_node = docnode.createAttributeNS(ns_uri, node_name)
549            clone_node.value = node.value
550            return clone_node
551        if node.COMMENT_NODE == node.nodeType:
552            return docnode.createComment(node.data)
553        raise ValueError('DOM node not supported in clone', node)
554
555    def cloneIntoImplementation (self, node):
556        """Create a deep copy of the node in the target implementation.
557
558        Used when converting a DOM instance from one implementation (e.g.,
559        L{pyxb.utils.saxdom}) into another (e.g., L{xml.dom.minidom})."""
560        new_doc = self.implementation().createDocument(None, None, None)
561        return self._deepClone(node, new_doc)
562
563    def appendChild (self, child, parent):
564        """Add the child to the parent.
565
566        @note: If the child and the parent use different DOM implementations,
567        this operation will clone the child into a new instance, and give that
568        to the parent.
569
570        @param child: The value to be appended
571        @type child: C{xml.dom.Node}
572        @param parent: The new parent of the child
573        @type parent: C{xml.dom.Node}
574        @rtype: C{xml.dom.Node}"""
575
576        # @todo This check is incomplete; is there a standard way to find the
577        # implementation of an xml.dom.Node instance?
578        if isinstance(child, (pyxb.utils.saxdom.Node, xml.dom.minidom.Node)):
579            child = self.cloneIntoImplementation(child)
580        return parent.appendChild(child)
581
582    def appendTextChild (self, text, parent):
583        """Add the text to the parent as a text node."""
584        return parent.appendChild(self.document().createTextNode(self.valueAsText(text)))
585
586## Local Variables:
587## fill-column:78
588## End:
589