1# -*- coding: utf-8 -*- 2# Copyright 2009-2013, Peter A. Bigot 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may 5# not use this file except in compliance with the License. You may obtain a 6# copy of the License at: 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations 14# under the License. 15 16"""Functions that support activities related to the Document Object Model.""" 17 18import logging 19import xml.dom 20 21import pyxb 22import pyxb.namespace 23import pyxb.namespace.resolution 24import pyxb.utils.saxutils 25import pyxb.utils.saxdom 26from pyxb.utils import six 27from pyxb.utils.six.moves import xrange 28 29_log = logging.getLogger(__name__) 30 31# The DOM implementation to be used for all processing. Default is whatever 32# your Python install uses, as long as it supports Core 2.0 (for 33# createDocument) and XML 2.0 (for NS-aware attribute manipulation). The 34# built-in minidom works fine. 35__DOMImplementation = xml.dom.getDOMImplementation(None, (('core', '2.0'), ('xml', '2.0'))) 36 37def GetDOMImplementation (): 38 """Return the DOMImplementation object used for pyxb operations. 39 40 This is primarily used as the default implementation when generating DOM 41 trees from a binding instance. It defaults to whatever 42 xml.dom.getDOMImplementation() returns in your installation (often 43 xml.dom.minidom). It can be overridden with SetDOMImplementation().""" 44 45 global __DOMImplementation 46 return __DOMImplementation 47 48def SetDOMImplementation (dom_implementation): 49 """Override the default DOMImplementation object.""" 50 global __DOMImplementation 51 __DOMImplementation = dom_implementation 52 return __DOMImplementation 53 54# Unfortunately, the DOMImplementation interface doesn't provide a parser. So 55# abstract this in case somebody wants to substitute a different one. Haven't 56# decided how to express that yet. 57def StringToDOM (xml_text, **kw): 58 """Convert string to a DOM instance. 59 60 @see: L{pyxb._SetXMLStyle}.""" 61 62 xmlt = xml_text 63 if pyxb.XMLStyle_minidom == pyxb._XMLStyle: 64 parser = pyxb.utils.saxutils.make_parser() 65 # minidom.parseString is broken. In Python 2, this means don't 66 # feed it unicode. In Python 3 this means don't feed it bytes. 67 if (six.PY2 and not isinstance(xmlt, six.binary_type)): 68 xmlt = xmlt.encode(pyxb._InputEncoding) 69 elif (six.PY3 and isinstance(xmlt, six.binary_type)): 70 xmlt = xmlt.decode(pyxb._InputEncoding) 71 return xml.dom.minidom.parseString(xmlt, parser) 72 return pyxb.utils.saxdom.parseString(xml_text, **kw) 73 74def NodeAttribute (node, attribute_ncname, attribute_ns=None): 75 """Namespace-aware search for an optional attribute in a node. 76 77 @param attribute_ncname: The local name of the attribute. 78 @type attribute_ncname: C{str} or C{unicode} 79 80 @keyword attribute_ns: The namespace of the attribute. Defaults to None 81 since most attributes are not in a namespace. Can be provided as either a 82 L{pyxb.namespace.Namespace} instance, or a string URI. 83 @type attribute_ns: C{None} or C{str} or C{unicode} or L{pyxb.namespace.Namespace} 84 85 @return: The value of the attribute, or C{None} if the attribute is not 86 present. (Unless C{None}, the value will always be a (unicode) string.) 87 """ 88 89 ns_uri = attribute_ns 90 if isinstance(attribute_ns, pyxb.namespace.Namespace): 91 ns_uri = attribute_ns.uri() 92 attr = node.getAttributeNodeNS(ns_uri, attribute_ncname) 93 if attr is None: 94 return None 95 return attr.value 96 97def NodeAttributeQName (node, attribute_ncname, attribute_ns=None): 98 """Like L{NodeAttribute} but where the content is a QName that must be 99 resolved in the context of the node. 100 101 @param attribute_ncname: as in L{NodeAttribute} 102 @keyword attribute_ns: as in L{NodeAttribute} 103 104 @return: The expanded name to which the value of the attribute resolves 105 given current namespaces, or C{None} if the attribute is not present 106 @rtype: L{pyxb.namespace.ExpandedName} 107 """ 108 attr = NodeAttribute(node, attribute_ncname, attribute_ns) 109 if attr is None: 110 return None 111 nsc = pyxb.namespace.NamespaceContext.GetNodeContext(node) 112 return nsc.interpretQName(attr) 113 114def LocateUniqueChild (node, tag, absent_ok=True, namespace=pyxb.namespace.XMLSchema): 115 """Locate a unique child of the DOM node. 116 117 This function returns the sole child of node which is an ELEMENT_NODE 118 instance and has a tag consistent with the given tag. If multiple nodes 119 with a matching C{tag} are found, or C{absent_ok} is C{False} and no 120 matching tag is found, an exception is raised. 121 122 @param node: An a xml.dom.Node ELEMENT_NODE instance 123 @param tag: the NCName of an element in the namespace 124 @keyword absent_ok: If C{True} (default), C{None} is returned if no match 125 can be found. If C{False}, an exception is raised if no match can be 126 found. 127 @keyword namespace: The namespace to which the child element belongs. 128 Default is the XMLSchema namespace. 129 @rtype: C{xml.dom.Node} 130 131 @raise pyxb.SchemaValidationError: multiple elements are identified 132 @raise pyxb.SchemaValidationError: C{absent_ok} is C{False} and no element is identified. 133 """ 134 candidate = None 135 for cn in node.childNodes: 136 if (xml.dom.Node.ELEMENT_NODE == cn.nodeType) and namespace.nodeIsNamed(cn, tag): 137 if candidate: 138 raise pyxb.SchemaValidationError('Multiple %s elements nested in %s' % (tag, node.nodeName)) 139 candidate = cn 140 if (candidate is None) and not absent_ok: 141 raise pyxb.SchemaValidationError('Expected %s elements nested in %s' % (tag, node.nodeName)) 142 return candidate 143 144def LocateMatchingChildren (node, tag, namespace=pyxb.namespace.XMLSchema): 145 """Locate all children of the DOM node that have a particular tag. 146 147 This function returns a list of children of node which are ELEMENT_NODE 148 instances and have a tag consistent with the given tag. 149 150 @param node: An a xml.dom.Node ELEMENT_NODE instance. 151 @param tag: the NCName of an element in the namespace, which defaults to the 152 XMLSchema namespace. 153 @keyword namespace: The namespace to which the child element belongs. 154 Default is the XMLSchema namespace. 155 156 @rtype: C{list(xml.dom.Node)} 157 """ 158 matches = [] 159 for cn in node.childNodes: 160 if (xml.dom.Node.ELEMENT_NODE == cn.nodeType) and namespace.nodeIsNamed(cn, tag): 161 matches.append(cn) 162 return matches 163 164def LocateFirstChildElement (node, absent_ok=True, require_unique=False, ignore_annotations=True): 165 """Locate the first element child of the node. 166 167 168 @param node: An a xml.dom.Node ELEMENT_NODE instance. 169 @keyword absent_ok: If C{True} (default), C{None} is returned if no match 170 can be found. If C{False}, an exception is raised if no match can be 171 found. 172 @keyword require_unique: If C{False} (default), it is acceptable for there 173 to be multiple child elements. If C{True}, presence of multiple child 174 elements raises an exception. 175 @keyword ignore_annotations: If C{True} (default), annotations are skipped 176 wheen looking for the first child element. If C{False}, an annotation 177 counts as an element. 178 @rtype: C{xml.dom.Node} 179 180 @raise SchemaValidationError: C{absent_ok} is C{False} and no child 181 element was identified. 182 @raise SchemaValidationError: C{require_unique} is C{True} and multiple 183 child elements were identified 184 """ 185 186 candidate = None 187 for cn in node.childNodes: 188 if xml.dom.Node.ELEMENT_NODE == cn.nodeType: 189 if ignore_annotations and pyxb.namespace.XMLSchema.nodeIsNamed(cn, 'annotation'): 190 continue 191 if require_unique: 192 if candidate: 193 raise pyxb.SchemaValidationError('Multiple elements nested in %s' % (node.nodeName,)) 194 candidate = cn 195 else: 196 return cn 197 if (candidate is None) and not absent_ok: 198 raise pyxb.SchemaValidationError('No elements nested in %s' % (node.nodeName,)) 199 return candidate 200 201def HasNonAnnotationChild (node): 202 """Return True iff C{node} has an ELEMENT_NODE child that is not an 203 XMLSchema annotation node. 204 205 @rtype: C{bool} 206 """ 207 for cn in node.childNodes: 208 if (xml.dom.Node.ELEMENT_NODE == cn.nodeType) and (not pyxb.namespace.XMLSchema.nodeIsNamed(cn, 'annotation')): 209 return True 210 return False 211 212def ExtractTextContent (node): 213 """Walk all the children, extracting all text content and 214 catenating it into the return value. 215 216 Returns C{None} if no text content (including whitespace) is found. 217 218 This is mainly used to strip comments out of the content of complex 219 elements with simple types. 220 221 @rtype: C{unicode} or C{str} 222 """ 223 text = [] 224 for cn in node.childNodes: 225 if xml.dom.Node.TEXT_NODE == cn.nodeType: 226 text.append(cn.data) 227 elif xml.dom.Node.CDATA_SECTION_NODE == cn.nodeType: 228 text.append(cn.data) 229 elif xml.dom.Node.COMMENT_NODE == cn.nodeType: 230 pass 231 else: 232 raise pyxb.NonElementValidationError(cn) 233 if 0 == len(text): 234 return None 235 return ''.join(text) 236 237class BindingDOMSupport (object): 238 """This holds DOM-related information used when generating a DOM tree from 239 a binding instance.""" 240 241 def implementation (self): 242 """The DOMImplementation object to be used. 243 244 Defaults to L{pyxb.utils.domutils.GetDOMImplementation()}, but can be 245 overridden in the constructor call using the C{implementation} 246 keyword.""" 247 return self.__implementation 248 __implementation = None 249 250 def document (self): 251 """Return the document generated using this instance.""" 252 return self.__document 253 __document = None 254 255 def requireXSIType (self): 256 """Indicates whether {xsi:type<http://www.w3.org/TR/xmlschema-1/#xsi_type>} should be added to all elements. 257 258 Certain WSDL styles and encodings seem to require explicit notation of 259 the type of each element, even if it was specified in the schema. 260 261 This value can only be set in the constructor.""" 262 return self.__requireXSIType 263 __requireXSIType = None 264 265 def reset (self): 266 """Reset this instance to the state it was when created. 267 268 This creates a new root document with no content, resets the 269 namespace-prefix map to its as-constructed content, and clears the set 270 of referenced namespace prefixes. The defaultNamespace and 271 requireXSIType are not modified.""" 272 self.__document = self.implementation().createDocument(None, None, None) 273 self.__namespaceContext.reset() 274 # For historical reasons this is also added automatically, though 275 # 'xsi' is not a bound prefix. 276 self.__namespaceContext.declareNamespace(pyxb.namespace.XMLSchema_instance, 'xsi') 277 self.__referencedNamespacePrefixes = set() 278 279 @classmethod 280 def Reset (cls): 281 """Reset the global defaults for default/prefix/namespace information.""" 282 cls.__NamespaceContext.reset() 283 284 def __init__ (self, implementation=None, default_namespace=None, require_xsi_type=False, namespace_prefix_map=None): 285 """Create a new instance used for building a single document. 286 287 @keyword implementation: The C{xml.dom} implementation to use. 288 Defaults to the one selected by L{GetDOMImplementation}. 289 290 @keyword default_namespace: The namespace to configure as the default 291 for the document. If not provided, there is no default namespace. 292 @type default_namespace: L{pyxb.namespace.Namespace} 293 294 @keyword require_xsi_type: If C{True}, an U{xsi:type 295 <http://www.w3.org/TR/xmlschema-1/#xsi_type>} attribute should be 296 placed in every element. 297 @type require_xsi_type: C{bool} 298 299 @keyword namespace_prefix_map: A map from pyxb.namespace.Namespace 300 instances to the preferred prefix to use for the namespace in xmlns 301 declarations. The default one assigns 'xsi' for the XMLSchema 302 instance namespace. 303 @type namespace_prefix_map: C{map} from L{pyxb.namespace.Namespace} to C{str} 304 305 @raise pyxb.LogicError: the same prefix is associated with multiple 306 namespaces in the C{namespace_prefix_map}. 307 308 """ 309 if implementation is None: 310 implementation = GetDOMImplementation() 311 self.__implementation = implementation 312 self.__requireXSIType = require_xsi_type 313 self.__namespaceContext = pyxb.namespace.NamespaceContext(parent_context=self.__NamespaceContext, 314 in_scope_namespaces=namespace_prefix_map) 315 if default_namespace is not None: 316 self.__namespaceContext.setDefaultNamespace(default_namespace) 317 self.reset() 318 319 # Default namespace-prefix map support 320 __NamespaceContext = pyxb.namespace.NamespaceContext() 321 322 # Instance-specific namespace-prefix map support 323 __namespaceContext = None 324 325 # Set of pairs of (namespace, prefix) identifying the declarations that 326 # must be placed in the document root so that QNames can be resolved. 327 # These are the prefixes associated with namespaces that were queried 328 # through L{namespacePrefix()} since the last reset(). 329 __referencedNamespacePrefixes = None 330 331 def defaultNamespace (self): 332 """The default namespace for this instance""" 333 return self.__namespaceContext.defaultNamespace() 334 @classmethod 335 def DefaultNamespace (cls): 336 """The global default namespace (used on instance creation if not overridden)""" 337 return cls.__NamespaceContext.defaultNamespace() 338 339 def setDefaultNamespace (self, default_namespace): 340 return self.__namespaceContext.setDefaultNamespace(default_namespace) 341 @classmethod 342 def SetDefaultNamespace (cls, default_namespace): 343 return cls.__NamespaceContext.setDefaultNamespace(default_namespace) 344 345 def declareNamespace (self, namespace, prefix=None): 346 """Declare a namespace within this instance only.""" 347 return self.__namespaceContext.declareNamespace(namespace, prefix) 348 @classmethod 349 def DeclareNamespace (cls, namespace, prefix=None): 350 """Declare a namespace that will made available to each created instance.""" 351 return cls.__NamespaceContext.declareNamespace(namespace, prefix) 352 353 def namespacePrefix (self, namespace, enable_default_namespace=True): 354 """Return the prefix to be used for the given namespace. 355 356 This will L{declare <declareNamespace>} the namespace if it has not 357 yet been observed. It will also ensure the mapping from the returned 358 prefix to C{namespace} is recorded for addition as an xmlns directive 359 in the final document. 360 361 @param namespace: The namespace for which a prefix is needed. If the 362 provided namespace is C{None} or an absent namespace, the C{None} 363 value will be returned as the corresponding prefix. 364 365 @keyword enable_default_namespace: Normally if the namespace is the default 366 namespace C{None} is returned to indicate this. If this keyword is 367 C{False} then we need a namespace prefix even if this is the default. 368 """ 369 if (namespace is None) or namespace.isAbsentNamespace(): 370 return None 371 if isinstance(namespace, six.string_types): 372 namespace = pyxb.namespace.NamespaceForURI(namespace, create_if_missing=True) 373 if (self.defaultNamespace() == namespace) and enable_default_namespace: 374 return None 375 pfx = self.__namespaceContext.prefixForNamespace(namespace) 376 if pfx is None: 377 pfx = self.__namespaceContext.declareNamespace(namespace) 378 self.__referencedNamespacePrefixes.add((namespace, pfx)) 379 return pfx 380 381 def qnameAsText (self, qname, enable_default_namespace=True): 382 assert isinstance(qname, pyxb.namespace.ExpandedName) 383 name = qname.localName() 384 prefix = self.namespacePrefix(qname.namespace(), enable_default_namespace=enable_default_namespace) 385 if prefix is not None: 386 name = '%s:%s' % (prefix, name) 387 return name 388 389 def valueAsText (self, value, enable_default_namespace=True): 390 """Represent a simple type value as XML text. 391 392 This is essentially what C{value.xsdLiteral()} does, but this one 393 handles any special cases such as QName values where the lexical 394 representation cannot be done in isolation of external information 395 such as namespace declarations.""" 396 from pyxb.binding.basis import simpleTypeDefinition, STD_list 397 if isinstance(value, pyxb.namespace.ExpandedName): 398 return self.qnameAsText(value, enable_default_namespace=enable_default_namespace) 399 if isinstance(value, STD_list): 400 return ' '.join([ self.valueAsText(_v, enable_default_namespace=enable_default_namespace) for _v in value ]) 401 if isinstance(value, simpleTypeDefinition): 402 return value.xsdLiteral() 403 assert value is not None 404 return six.text_type(value) 405 406 def addAttribute (self, element, expanded_name, value): 407 """Add an attribute to the given element. 408 409 @param element: The element to which the attribute should be added 410 @type element: C{xml.dom.Element} 411 @param expanded_name: The name of the attribute. This may be a local 412 name if the attribute is not in a namespace. 413 @type expanded_name: L{pyxb.namespace.Namespace} or C{str} or C{unicode} 414 @param value: The value of the attribute 415 @type value: C{str} or C{unicode} 416 """ 417 name = expanded_name 418 ns_uri = xml.dom.EMPTY_NAMESPACE 419 if isinstance(name, pyxb.namespace.ExpandedName): 420 ns_uri = expanded_name.namespaceURI() 421 # Attribute names do not use default namespace 422 name = self.qnameAsText(expanded_name, enable_default_namespace=False) 423 element.setAttributeNS(ns_uri, name, self.valueAsText(value)) 424 425 def addXMLNSDeclaration (self, element, namespace, prefix=None): 426 """Manually add an XMLNS declaration to the document element. 427 428 @param namespace: a L{pyxb.namespace.Namespace} instance 429 430 @param prefix: the prefix by which the namespace is known. If 431 C{None}, the default prefix as previously declared will be used; if 432 C{''} (empty string) a declaration for C{namespace} as the default 433 namespace will be generated. 434 435 @return: C{prefix} as used in the added declaration. 436 """ 437 if not isinstance(namespace, pyxb.namespace.Namespace): 438 raise pyxb.UsageError('addXMLNSdeclaration: must be given a namespace instance') 439 if namespace.isAbsentNamespace(): 440 raise pyxb.UsageError('addXMLNSdeclaration: namespace must not be an absent namespace') 441 if prefix is None: 442 prefix = self.namespacePrefix(namespace) 443 if not prefix: # None or empty string 444 an = 'xmlns' 445 else: 446 an = 'xmlns:' + prefix 447 element.setAttributeNS(pyxb.namespace.XMLNamespaces.uri(), an, namespace.uri()) 448 return prefix 449 450 def finalize (self): 451 """Do the final cleanup after generating the tree. This makes sure 452 that the document element includes XML Namespace declarations for all 453 namespaces referenced in the tree. 454 455 @return: The document that has been created. 456 @rtype: C{xml.dom.Document}""" 457 ns = self.defaultNamespace() 458 if ns is not None: 459 self.addXMLNSDeclaration(self.document().documentElement, ns, '') 460 for (ns, pfx) in self.__referencedNamespacePrefixes: 461 self.addXMLNSDeclaration(self.document().documentElement, ns, pfx) 462 return self.document() 463 464 def createChildElement (self, expanded_name, parent=None): 465 """Create a new element node in the tree. 466 467 @param expanded_name: The name of the element. A plain string 468 indicates a name in no namespace. 469 @type expanded_name: L{pyxb.namespace.ExpandedName} or C{str} or C{unicode} 470 471 @keyword parent: The node in the tree that will serve as the child's 472 parent. If C{None}, the document element is used. (If there is no 473 document element, then this call creates it as a side-effect.) 474 475 @return: A newly created DOM element 476 @rtype: C{xml.dom.Element} 477 """ 478 479 if parent is None: 480 parent = self.document().documentElement 481 if parent is None: 482 parent = self.__document 483 if isinstance(expanded_name, six.string_types): 484 expanded_name = pyxb.namespace.ExpandedName(None, expanded_name) 485 if not isinstance(expanded_name, pyxb.namespace.ExpandedName): 486 raise pyxb.LogicError('Invalid type %s for expanded name' % (type(expanded_name),)) 487 ns = expanded_name.namespace() 488 ns_uri = xml.dom.EMPTY_NAMESPACE 489 name = expanded_name.localName() 490 if ns is not None: 491 ns_uri = ns.uri() 492 name = self.qnameAsText(expanded_name) 493 element = self.__document.createElementNS(ns_uri, name) 494 return parent.appendChild(element) 495 496 def _makeURINodeNamePair (self, node): 497 """Convert namespace information from a DOM node to text for new DOM node. 498 499 The namespaceURI and nodeName are extracted and parsed. The namespace 500 (if any) is registered within the document, along with any prefix from 501 the node name. A pair is returned where the first element is the 502 namespace URI or C{None}, and the second is a QName to be used for the 503 expanded name within this document. 504 505 @param node: An xml.dom.Node instance, presumably from a wildcard match. 506 @rtype: C{( str, str )}""" 507 ns = None 508 if node.namespaceURI is not None: 509 ns = pyxb.namespace.NamespaceForURI(node.namespaceURI, create_if_missing=True) 510 if node.ELEMENT_NODE == node.nodeType: 511 name = node.tagName 512 elif node.ATTRIBUTE_NODE == node.nodeType: 513 name = node.name 514 # saxdom uses the uriTuple as the name field while minidom uses 515 # the QName. @todo saxdom should be fixed. 516 if isinstance(name, tuple): 517 name = name[1] 518 else: 519 raise pyxb.UsageError('Unable to determine name from DOM node %s' % (node,)) 520 pfx = None 521 local_name = name 522 if 0 < name.find(':'): 523 (pfx, local_name) = name.split(':', 1) 524 if ns is None: 525 raise pyxb.LogicError('QName with prefix but no available namespace') 526 ns_uri = None 527 node_name = local_name 528 if ns is not None: 529 ns_uri = ns.uri() 530 self.declareNamespace(ns, pfx) 531 node_name = self.qnameAsText(ns.createExpandedName(local_name)) 532 return (ns_uri, node_name) 533 534 def _deepClone (self, node, docnode): 535 if node.ELEMENT_NODE == node.nodeType: 536 (ns_uri, node_name) = self._makeURINodeNamePair(node) 537 clone_node = docnode.createElementNS(ns_uri, node_name) 538 attrs = node.attributes 539 for ai in xrange(attrs.length): 540 clone_node.setAttributeNodeNS(self._deepClone(attrs.item(ai), docnode)) 541 for child in node.childNodes: 542 clone_node.appendChild(self._deepClone(child, docnode)) 543 return clone_node 544 if node.TEXT_NODE == node.nodeType: 545 return docnode.createTextNode(node.data) 546 if node.ATTRIBUTE_NODE == node.nodeType: 547 (ns_uri, node_name) = self._makeURINodeNamePair(node) 548 clone_node = docnode.createAttributeNS(ns_uri, node_name) 549 clone_node.value = node.value 550 return clone_node 551 if node.COMMENT_NODE == node.nodeType: 552 return docnode.createComment(node.data) 553 raise ValueError('DOM node not supported in clone', node) 554 555 def cloneIntoImplementation (self, node): 556 """Create a deep copy of the node in the target implementation. 557 558 Used when converting a DOM instance from one implementation (e.g., 559 L{pyxb.utils.saxdom}) into another (e.g., L{xml.dom.minidom}).""" 560 new_doc = self.implementation().createDocument(None, None, None) 561 return self._deepClone(node, new_doc) 562 563 def appendChild (self, child, parent): 564 """Add the child to the parent. 565 566 @note: If the child and the parent use different DOM implementations, 567 this operation will clone the child into a new instance, and give that 568 to the parent. 569 570 @param child: The value to be appended 571 @type child: C{xml.dom.Node} 572 @param parent: The new parent of the child 573 @type parent: C{xml.dom.Node} 574 @rtype: C{xml.dom.Node}""" 575 576 # @todo This check is incomplete; is there a standard way to find the 577 # implementation of an xml.dom.Node instance? 578 if isinstance(child, (pyxb.utils.saxdom.Node, xml.dom.minidom.Node)): 579 child = self.cloneIntoImplementation(child) 580 return parent.appendChild(child) 581 582 def appendTextChild (self, text, parent): 583 """Add the text to the parent as a text node.""" 584 return parent.appendChild(self.document().createTextNode(self.valueAsText(text))) 585 586## Local Variables: 587## fill-column:78 588## End: 589