1import logging 2import re 3import typing 4 5from lxml import etree 6 7from zeep.exceptions import XMLParseError 8from zeep.loader import absolute_location, load_external, normalize_location 9from zeep.utils import as_qname, qname_attr 10from zeep.xsd import elements as xsd_elements 11from zeep.xsd import types as xsd_types 12from zeep.xsd.const import AUTO_IMPORT_NAMESPACES, xsd_ns 13from zeep.xsd.types.unresolved import UnresolvedCustomType, UnresolvedType 14 15logger = logging.getLogger(__name__) 16 17 18class tags: 19 schema = xsd_ns("schema") 20 import_ = xsd_ns("import") 21 include = xsd_ns("include") 22 annotation = xsd_ns("annotation") 23 element = xsd_ns("element") 24 simpleType = xsd_ns("simpleType") 25 complexType = xsd_ns("complexType") 26 simpleContent = xsd_ns("simpleContent") 27 complexContent = xsd_ns("complexContent") 28 sequence = xsd_ns("sequence") 29 group = xsd_ns("group") 30 choice = xsd_ns("choice") 31 all = xsd_ns("all") 32 list = xsd_ns("list") 33 union = xsd_ns("union") 34 attribute = xsd_ns("attribute") 35 any = xsd_ns("any") 36 anyAttribute = xsd_ns("anyAttribute") 37 attributeGroup = xsd_ns("attributeGroup") 38 restriction = xsd_ns("restriction") 39 extension = xsd_ns("extension") 40 notation = xsd_ns("notations") 41 42 43class SchemaVisitor: 44 """Visitor which processes XSD files and registers global elements and 45 types in the given schema. 46 47 Notes: 48 49 TODO: include and import statements can reference other nodes. We need 50 to load these first. Always global. 51 52 53 54 55 :param schema: 56 :type schema: zeep.xsd.schema.Schema 57 :param document: 58 :type document: zeep.xsd.schema.SchemaDocument 59 60 """ 61 62 def __init__(self, schema, document): 63 self.document = document 64 self.schema = schema 65 self._includes = set() 66 67 def register_element(self, qname: etree.QName, instance: xsd_elements.Element): 68 self.document.register_element(qname, instance) 69 70 def register_attribute( 71 self, name: etree.QName, instance: xsd_elements.Attribute 72 ) -> None: 73 self.document.register_attribute(name, instance) 74 75 def register_type(self, qname: etree.QName, instance) -> None: 76 self.document.register_type(qname, instance) 77 78 def register_group(self, qname: etree.QName, instance: xsd_elements.Group): 79 self.document.register_group(qname, instance) 80 81 def register_attribute_group( 82 self, qname: etree.QName, instance: xsd_elements.AttributeGroup 83 ) -> None: 84 self.document.register_attribute_group(qname, instance) 85 86 def register_import(self, namespace, document): 87 self.document.register_import(namespace, document) 88 89 def process(self, node, parent): 90 visit_func = self.visitors.get(node.tag) 91 if not visit_func: 92 raise ValueError("No visitor defined for %r" % node.tag) 93 result = visit_func(self, node, parent) 94 return result 95 96 def process_ref_attribute(self, node, array_type=None): 97 ref = qname_attr(node, "ref") 98 if ref: 99 ref = self._create_qname(ref) 100 101 # Some wsdl's reference to xs:schema, we ignore that for now. It 102 # might be better in the future to process the actual schema file 103 # so that it is handled correctly 104 if ref.namespace == "http://www.w3.org/2001/XMLSchema": 105 return 106 return xsd_elements.RefAttribute( 107 node.tag, ref, self.schema, array_type=array_type 108 ) 109 110 def process_reference(self, node, **kwargs): 111 ref = qname_attr(node, "ref") 112 if not ref: 113 return 114 115 ref = self._create_qname(ref) 116 117 if node.tag == tags.element: 118 cls = xsd_elements.RefElement 119 elif node.tag == tags.attribute: 120 cls = xsd_elements.RefAttribute 121 elif node.tag == tags.group: 122 cls = xsd_elements.RefGroup 123 elif node.tag == tags.attributeGroup: 124 cls = xsd_elements.RefAttributeGroup 125 return cls(node.tag, ref, self.schema, **kwargs) 126 127 def visit_schema(self, node): 128 """Visit the xsd:schema element and process all the child elements 129 130 Definition:: 131 132 <schema 133 attributeFormDefault = (qualified | unqualified): unqualified 134 blockDefault = (#all | List of (extension | restriction | substitution) : '' 135 elementFormDefault = (qualified | unqualified): unqualified 136 finalDefault = (#all | List of (extension | restriction | list | union): '' 137 id = ID 138 targetNamespace = anyURI 139 version = token 140 xml:lang = language 141 {any attributes with non-schema Namespace}...> 142 Content: ( 143 (include | import | redefine | annotation)*, 144 (((simpleType | complexType | group | attributeGroup) | 145 element | attribute | notation), 146 annotation*)*) 147 </schema> 148 149 :param node: The XML node 150 :type node: lxml.etree._Element 151 152 """ 153 assert node is not None 154 155 # A schema should always have a targetNamespace attribute, otherwise 156 # it is called a chameleon schema. In that case the schema will inherit 157 # the namespace of the enclosing schema/node. 158 tns = node.get("targetNamespace") 159 if tns: 160 self.document._target_namespace = tns 161 self.document._element_form = node.get("elementFormDefault", "unqualified") 162 self.document._attribute_form = node.get("attributeFormDefault", "unqualified") 163 164 for child in node: 165 self.process(child, parent=node) 166 167 def visit_import(self, node, parent): 168 """ 169 170 Definition:: 171 172 <import 173 id = ID 174 namespace = anyURI 175 schemaLocation = anyURI 176 {any attributes with non-schema Namespace}...> 177 Content: (annotation?) 178 </import> 179 180 :param node: The XML node 181 :type node: lxml.etree._Element 182 :param parent: The parent XML node 183 :type parent: lxml.etree._Element 184 185 """ 186 schema_node = None 187 namespace = node.get("namespace") 188 location = node.get("schemaLocation") 189 if location: 190 location = normalize_location( 191 self.schema.settings, location, self.document._base_url 192 ) 193 194 if not namespace and not self.document._target_namespace: 195 raise XMLParseError( 196 "The attribute 'namespace' must be existent if the " 197 "importing schema has no target namespace.", 198 filename=self.document.location, 199 sourceline=node.sourceline, 200 ) 201 202 # We found an empty <import/> statement, this needs to trigger 4.1.2 203 # from https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-resolve 204 # for QName resolving. 205 # In essence this means we will resolve QNames without a namespace to no 206 # namespace instead of the target namespace. 207 # The following code snippet works because imports have to occur before we 208 # visit elements. 209 if not namespace and not location: 210 self.document._has_empty_import = True 211 212 # Check if the schema is already imported before based on the 213 # namespace. Schema's without namespace are registered as 'None' 214 document = self.schema.documents.get_by_namespace_and_location( 215 namespace, location 216 ) 217 if document: 218 logger.debug("Returning existing schema: %r", location) 219 self.register_import(namespace, document) 220 return document 221 222 # Hardcode the mapping between the xml namespace and the xsd for now. 223 # This seems to fix issues with exchange wsdl's, see #220 224 if not location and namespace == "http://www.w3.org/XML/1998/namespace": 225 location = "https://www.w3.org/2001/xml.xsd" 226 227 # Silently ignore import statements which we can't resolve via the 228 # namespace and doesn't have a schemaLocation attribute. 229 if not location: 230 logger.debug( 231 "Ignoring import statement for namespace %r " 232 + "(missing schemaLocation)", 233 namespace, 234 ) 235 return 236 237 # Load the XML 238 schema_node = self._retrieve_data(location, base_url=self.document._location) 239 240 # Check if the xsd:import namespace matches the targetNamespace. If 241 # the xsd:import statement didn't specify a namespace then make sure 242 # that the targetNamespace wasn't declared by another schema yet. 243 schema_tns = schema_node.get("targetNamespace") 244 if namespace and schema_tns and namespace != schema_tns: 245 raise XMLParseError( 246 ( 247 "The namespace defined on the xsd:import doesn't match the " 248 "imported targetNamespace located at %r " 249 ) 250 % (location), 251 filename=self.document._location, 252 sourceline=node.sourceline, 253 ) 254 255 # If the imported schema doesn't define a target namespace and the 256 # node doesn't specify it either then inherit the existing target 257 # namespace. 258 elif not schema_tns and not namespace: 259 namespace = self.document._target_namespace 260 261 schema = self.schema.create_new_document( 262 schema_node, location, target_namespace=namespace 263 ) 264 self.register_import(namespace, schema) 265 return schema 266 267 def visit_include(self, node, parent): 268 """ 269 270 Definition:: 271 272 <include 273 id = ID 274 schemaLocation = anyURI 275 {any attributes with non-schema Namespace}...> 276 Content: (annotation?) 277 </include> 278 279 :param node: The XML node 280 :type node: lxml.etree._Element 281 :param parent: The parent XML node 282 :type parent: lxml.etree._Element 283 284 """ 285 if not node.get("schemaLocation"): 286 raise NotImplementedError("schemaLocation is required") 287 location = node.get("schemaLocation") 288 289 if location in self._includes: 290 return 291 292 schema_node = self._retrieve_data(location, base_url=self.document._base_url) 293 self._includes.add(location) 294 295 # When the included document has no default namespace defined but the 296 # parent document does have this then we should (atleast for #360) 297 # transfer the default namespace to the included schema. We can't 298 # update the nsmap of elements in lxml so we create a new schema with 299 # the correct nsmap and move all the content there. 300 301 # Included schemas must have targetNamespace equal to parent schema (the including) or None. 302 # If included schema doesn't have default ns, then it should be set to parent's targetNs. 303 # See Chameleon Inclusion https://www.w3.org/TR/xmlschema11-1/#chameleon-xslt 304 if not schema_node.nsmap.get(None) and ( 305 node.nsmap.get(None) or parent.attrib.get("targetNamespace") 306 ): 307 nsmap = {None: node.nsmap.get(None) or parent.attrib["targetNamespace"]} 308 nsmap.update(schema_node.nsmap) 309 new = etree.Element(schema_node.tag, nsmap=nsmap) 310 for child in schema_node: 311 new.append(child) 312 for key, value in schema_node.attrib.items(): 313 new.set(key, value) 314 if not new.attrib.get("targetNamespace"): 315 new.attrib["targetNamespace"] = parent.attrib["targetNamespace"] 316 schema_node = new 317 318 # Use the element/attribute form defaults from the schema while 319 # processing the nodes. 320 element_form_default = self.document._element_form 321 attribute_form_default = self.document._attribute_form 322 base_url = self.document._base_url 323 324 self.document._element_form = schema_node.get( 325 "elementFormDefault", "unqualified" 326 ) 327 self.document._attribute_form = schema_node.get( 328 "attributeFormDefault", "unqualified" 329 ) 330 self.document._base_url = absolute_location(location, self.document._base_url) 331 332 # Iterate directly over the children. 333 for child in schema_node: 334 self.process(child, parent=schema_node) 335 336 self.document._element_form = element_form_default 337 self.document._attribute_form = attribute_form_default 338 self.document._base_url = base_url 339 340 def visit_element(self, node, parent): 341 """ 342 343 Definition:: 344 345 <element 346 abstract = Boolean : false 347 block = (#all | List of (extension | restriction | substitution)) 348 default = string 349 final = (#all | List of (extension | restriction)) 350 fixed = string 351 form = (qualified | unqualified) 352 id = ID 353 maxOccurs = (nonNegativeInteger | unbounded) : 1 354 minOccurs = nonNegativeInteger : 1 355 name = NCName 356 nillable = Boolean : false 357 ref = QName 358 substitutionGroup = QName 359 type = QName 360 {any attributes with non-schema Namespace}...> 361 Content: (annotation?, ( 362 (simpleType | complexType)?, (unique | key | keyref)*)) 363 </element> 364 365 :param node: The XML node 366 :type node: lxml.etree._Element 367 :param parent: The parent XML node 368 :type parent: lxml.etree._Element 369 370 """ 371 is_global = parent.tag == tags.schema 372 373 # minOccurs / maxOccurs are not allowed on global elements 374 if not is_global: 375 min_occurs, max_occurs = _process_occurs_attrs(node) 376 else: 377 max_occurs = 1 378 min_occurs = 1 379 380 # If the element has a ref attribute then all other attributes cannot 381 # be present. Short circuit that here. 382 # Ref is prohibited on global elements (parent = schema) 383 if not is_global: 384 # Naive workaround to mark fields which are part of a choice element 385 # as optional 386 if parent.tag == tags.choice: 387 min_occurs = 0 388 result = self.process_reference( 389 node, min_occurs=min_occurs, max_occurs=max_occurs 390 ) 391 if result: 392 return result 393 394 element_form = node.get("form", self.document._element_form) 395 if element_form == "qualified" or is_global: 396 qname = qname_attr(node, "name", self.document._target_namespace) 397 else: 398 qname = etree.QName(node.get("name").strip()) 399 400 children = list(node) 401 xsd_type = None 402 if children: 403 value = None 404 405 for child in children: 406 if child.tag == tags.annotation: 407 continue 408 409 elif child.tag in (tags.simpleType, tags.complexType): 410 assert not value 411 412 xsd_type = self.process(child, node) 413 414 if not xsd_type: 415 node_type = qname_attr(node, "type") 416 if node_type: 417 xsd_type = self._get_type(node_type.text) 418 else: 419 xsd_type = xsd_types.AnyType() 420 421 nillable = node.get("nillable") == "true" 422 default = node.get("default") 423 element = xsd_elements.Element( 424 name=qname, 425 type_=xsd_type, 426 min_occurs=min_occurs, 427 max_occurs=max_occurs, 428 nillable=nillable, 429 default=default, 430 is_global=is_global, 431 ) 432 433 # Only register global elements 434 if is_global: 435 self.register_element(qname, element) 436 return element 437 438 def visit_attribute( 439 self, node: etree._Element, parent: etree._Element 440 ) -> typing.Union[xsd_elements.Attribute, xsd_elements.RefAttribute]: 441 """Declares an attribute. 442 443 Definition:: 444 445 <attribute 446 default = string 447 fixed = string 448 form = (qualified | unqualified) 449 id = ID 450 name = NCName 451 ref = QName 452 type = QName 453 use = (optional | prohibited | required): optional 454 {any attributes with non-schema Namespace...}> 455 Content: (annotation?, (simpleType?)) 456 </attribute> 457 458 :param node: The XML node 459 :type node: lxml.etree._Element 460 :param parent: The parent XML node 461 :type parent: lxml.etree._Element 462 463 """ 464 is_global = parent.tag == tags.schema 465 466 # Check of wsdl:arayType 467 array_type = node.get("{http://schemas.xmlsoap.org/wsdl/}arrayType") 468 if array_type: 469 match = re.match(r"([^\[]+)", array_type) 470 if match: 471 array_type = match.groups()[0] 472 qname = as_qname(array_type, node.nsmap) 473 array_type = UnresolvedType(qname, self.schema) 474 475 # If the elment has a ref attribute then all other attributes cannot 476 # be present. Short circuit that here. 477 # Ref is prohibited on global elements (parent = schema) 478 if not is_global: 479 result = self.process_ref_attribute(node, array_type=array_type) 480 if result: 481 return result 482 483 attribute_form = node.get("form", self.document._attribute_form) 484 if attribute_form == "qualified" or is_global: 485 name = qname_attr(node, "name", self.document._target_namespace) 486 else: 487 name = etree.QName(node.get("name")) 488 489 annotation, items = self._pop_annotation(list(node)) 490 if items: 491 xsd_type = self.visit_simple_type(items[0], node) 492 else: 493 node_type = qname_attr(node, "type") 494 if node_type: 495 xsd_type = self._get_type(node_type) 496 else: 497 xsd_type = xsd_types.AnyType() 498 499 # TODO: We ignore 'prohobited' for now 500 required = node.get("use") == "required" 501 default = node.get("default") 502 503 attr = xsd_elements.Attribute( 504 name, type_=xsd_type, default=default, required=required 505 ) 506 507 # Only register global elements 508 if is_global: 509 assert name is not None 510 self.register_attribute(name, attr) 511 return attr 512 513 def visit_simple_type(self, node, parent): 514 """ 515 Definition:: 516 517 <simpleType 518 final = (#all | (list | union | restriction)) 519 id = ID 520 name = NCName 521 {any attributes with non-schema Namespace}...> 522 Content: (annotation?, (restriction | list | union)) 523 </simpleType> 524 525 :param node: The XML node 526 :type node: lxml.etree._Element 527 :param parent: The parent XML node 528 :type parent: lxml.etree._Element 529 530 """ 531 532 if parent.tag == tags.schema: 533 name = node.get("name") 534 is_global = True 535 else: 536 name = parent.get("name", "Anonymous") 537 is_global = False 538 base_type = "{http://www.w3.org/2001/XMLSchema}string" 539 qname = as_qname(name, node.nsmap, self.document._target_namespace) 540 541 annotation, items = self._pop_annotation(list(node)) 542 child = items[0] 543 if child.tag == tags.restriction: 544 base_type = self.visit_restriction_simple_type(child, node) 545 xsd_type = UnresolvedCustomType(qname, base_type, self.schema) 546 547 elif child.tag == tags.list: 548 xsd_type = self.visit_list(child, node) 549 550 elif child.tag == tags.union: 551 xsd_type = self.visit_union(child, node) 552 else: 553 raise AssertionError("Unexpected child: %r" % child.tag) 554 555 assert xsd_type is not None 556 if is_global: 557 self.register_type(qname, xsd_type) 558 return xsd_type 559 560 def visit_complex_type(self, node, parent): 561 """ 562 Definition:: 563 564 <complexType 565 abstract = Boolean : false 566 block = (#all | List of (extension | restriction)) 567 final = (#all | List of (extension | restriction)) 568 id = ID 569 mixed = Boolean : false 570 name = NCName 571 {any attributes with non-schema Namespace...}> 572 Content: (annotation?, (simpleContent | complexContent | 573 ((group | all | choice | sequence)?, 574 ((attribute | attributeGroup)*, anyAttribute?)))) 575 </complexType> 576 577 :param node: The XML node 578 :type node: lxml.etree._Element 579 :param parent: The parent XML node 580 :type parent: lxml.etree._Element 581 582 """ 583 children = [] 584 base_type = "{http://www.w3.org/2001/XMLSchema}anyType" 585 586 # If the complexType's parent is an element then this type is 587 # anonymous and should have no name defined. Otherwise it's global 588 if parent.tag == tags.schema: 589 name = node.get("name") 590 is_global = True 591 else: 592 name = parent.get("name") 593 is_global = False 594 595 qname = as_qname(name, node.nsmap, self.document._target_namespace) 596 cls_attributes = {"__module__": "zeep.xsd.dynamic_types", "_xsd_name": qname} 597 xsd_cls = type(name, (xsd_types.ComplexType,), cls_attributes) 598 xsd_type = None 599 600 # Process content 601 annotation, children = self._pop_annotation(list(node)) 602 first_tag = children[0].tag if children else None 603 604 if first_tag == tags.simpleContent: 605 base_type, attributes = self.visit_simple_content(children[0], node) 606 607 xsd_type = xsd_cls( 608 attributes=attributes, 609 extension=base_type, 610 qname=qname, 611 is_global=is_global, 612 ) 613 614 elif first_tag == tags.complexContent: 615 kwargs = self.visit_complex_content(children[0], node) 616 xsd_type = xsd_cls(qname=qname, is_global=is_global, **kwargs) 617 618 elif first_tag: 619 element = None 620 621 if first_tag in (tags.group, tags.all, tags.choice, tags.sequence): 622 child = children.pop(0) 623 element = self.process(child, node) 624 625 attributes = self._process_attributes(node, children) 626 xsd_type = xsd_cls( 627 element=element, attributes=attributes, qname=qname, is_global=is_global 628 ) 629 else: 630 xsd_type = xsd_cls(qname=qname, is_global=is_global) 631 632 if is_global: 633 self.register_type(qname, xsd_type) 634 return xsd_type 635 636 def visit_complex_content(self, node, parent): 637 """The complexContent element defines extensions or restrictions on a 638 complex type that contains mixed content or elements only. 639 640 Definition:: 641 642 <complexContent 643 id = ID 644 mixed = Boolean 645 {any attributes with non-schema Namespace}...> 646 Content: (annotation?, (restriction | extension)) 647 </complexContent> 648 649 :param node: The XML node 650 :type node: lxml.etree._Element 651 :param parent: The parent XML node 652 :type parent: lxml.etree._Element 653 654 """ 655 children = list(node) 656 child = children[-1] 657 658 if child.tag == tags.restriction: 659 base, element, attributes = self.visit_restriction_complex_content( 660 child, node 661 ) 662 return {"attributes": attributes, "element": element, "restriction": base} 663 elif child.tag == tags.extension: 664 base, element, attributes = self.visit_extension_complex_content( 665 child, node 666 ) 667 return {"attributes": attributes, "element": element, "extension": base} 668 669 def visit_simple_content(self, node, parent): 670 """Contains extensions or restrictions on a complexType element with 671 character data or a simpleType element as content and contains no 672 elements. 673 674 Definition:: 675 676 <simpleContent 677 id = ID 678 {any attributes with non-schema Namespace}...> 679 Content: (annotation?, (restriction | extension)) 680 </simpleContent> 681 682 :param node: The XML node 683 :type node: lxml.etree._Element 684 :param parent: The parent XML node 685 :type parent: lxml.etree._Element 686 687 """ 688 689 children = list(node) 690 child = children[-1] 691 692 if child.tag == tags.restriction: 693 return self.visit_restriction_simple_content(child, node) 694 elif child.tag == tags.extension: 695 return self.visit_extension_simple_content(child, node) 696 raise AssertionError("Expected restriction or extension") 697 698 def visit_restriction_simple_type(self, node, parent): 699 """ 700 Definition:: 701 702 <restriction 703 base = QName 704 id = ID 705 {any attributes with non-schema Namespace}...> 706 Content: (annotation?, 707 (simpleType?, ( 708 minExclusive | minInclusive | maxExclusive | maxInclusive | 709 totalDigits |fractionDigits | length | minLength | 710 maxLength | enumeration | whiteSpace | pattern)*)) 711 </restriction> 712 713 :param node: The XML node 714 :type node: lxml.etree._Element 715 :param parent: The parent XML node 716 :type parent: lxml.etree._Element 717 718 """ 719 base_name = qname_attr(node, "base") 720 if base_name: 721 return self._get_type(base_name) 722 723 annotation, children = self._pop_annotation(list(node)) 724 if children[0].tag == tags.simpleType: 725 return self.visit_simple_type(children[0], node) 726 727 def visit_restriction_simple_content(self, node, parent): 728 """ 729 Definition:: 730 731 <restriction 732 base = QName 733 id = ID 734 {any attributes with non-schema Namespace}...> 735 Content: (annotation?, 736 (simpleType?, ( 737 minExclusive | minInclusive | maxExclusive | maxInclusive | 738 totalDigits |fractionDigits | length | minLength | 739 maxLength | enumeration | whiteSpace | pattern)* 740 )?, ((attribute | attributeGroup)*, anyAttribute?)) 741 </restriction> 742 743 :param node: The XML node 744 :type node: lxml.etree._Element 745 :param parent: The parent XML node 746 :type parent: lxml.etree._Element 747 748 """ 749 base_name = qname_attr(node, "base") 750 base_type = self._get_type(base_name) 751 return base_type, [] 752 753 def visit_restriction_complex_content(self, node, parent): 754 """ 755 756 Definition:: 757 758 <restriction 759 base = QName 760 id = ID 761 {any attributes with non-schema Namespace}...> 762 Content: (annotation?, (group | all | choice | sequence)?, 763 ((attribute | attributeGroup)*, anyAttribute?)) 764 </restriction> 765 766 :param node: The XML node 767 :type node: lxml.etree._Element 768 :param parent: The parent XML node 769 :type parent: lxml.etree._Element 770 771 """ 772 base_name = qname_attr(node, "base") 773 base_type = self._get_type(base_name) 774 annotation, children = self._pop_annotation(list(node)) 775 776 element = None 777 attributes = [] 778 779 if children: 780 child = children[0] 781 if child.tag in (tags.group, tags.all, tags.choice, tags.sequence): 782 children.pop(0) 783 element = self.process(child, node) 784 attributes = self._process_attributes(node, children) 785 return base_type, element, attributes 786 787 def visit_extension_complex_content(self, node, parent): 788 """ 789 790 Definition:: 791 792 <extension 793 base = QName 794 id = ID 795 {any attributes with non-schema Namespace}...> 796 Content: (annotation?, ( 797 (group | all | choice | sequence)?, 798 ((attribute | attributeGroup)*, anyAttribute?))) 799 </extension> 800 801 :param node: The XML node 802 :type node: lxml.etree._Element 803 :param parent: The parent XML node 804 :type parent: lxml.etree._Element 805 806 """ 807 base_name = qname_attr(node, "base") 808 base_type = self._get_type(base_name) 809 annotation, children = self._pop_annotation(list(node)) 810 811 element = None 812 attributes = [] 813 814 if children: 815 child = children[0] 816 if child.tag in (tags.group, tags.all, tags.choice, tags.sequence): 817 children.pop(0) 818 element = self.process(child, node) 819 attributes = self._process_attributes(node, children) 820 821 return base_type, element, attributes 822 823 def visit_extension_simple_content(self, node, parent): 824 """ 825 826 Definition:: 827 828 <extension 829 base = QName 830 id = ID 831 {any attributes with non-schema Namespace}...> 832 Content: (annotation?, ((attribute | attributeGroup)*, anyAttribute?)) 833 </extension> 834 """ 835 base_name = qname_attr(node, "base") 836 base_type = self._get_type(base_name) 837 annotation, children = self._pop_annotation(list(node)) 838 attributes = self._process_attributes(node, children) 839 840 return base_type, attributes 841 842 def visit_annotation(self, node, parent): 843 """Defines an annotation. 844 845 Definition:: 846 847 <annotation 848 id = ID 849 {any attributes with non-schema Namespace}...> 850 Content: (appinfo | documentation)* 851 </annotation> 852 853 :param node: The XML node 854 :type node: lxml.etree._Element 855 :param parent: The parent XML node 856 :type parent: lxml.etree._Element 857 858 """ 859 return 860 861 def visit_any(self, node, parent): 862 """ 863 864 Definition:: 865 866 <any 867 id = ID 868 maxOccurs = (nonNegativeInteger | unbounded) : 1 869 minOccurs = nonNegativeInteger : 1 870 namespace = "(##any | ##other) | 871 List of (anyURI | (##targetNamespace | ##local))) : ##any 872 processContents = (lax | skip | strict) : strict 873 {any attributes with non-schema Namespace...}> 874 Content: (annotation?) 875 </any> 876 877 :param node: The XML node 878 :type node: lxml.etree._Element 879 :param parent: The parent XML node 880 :type parent: lxml.etree._Element 881 882 """ 883 min_occurs, max_occurs = _process_occurs_attrs(node) 884 process_contents = node.get("processContents", "strict") 885 return xsd_elements.Any( 886 max_occurs=max_occurs, 887 min_occurs=min_occurs, 888 process_contents=process_contents, 889 ) 890 891 def visit_sequence(self, node, parent): 892 """ 893 Definition:: 894 895 <sequence 896 id = ID 897 maxOccurs = (nonNegativeInteger | unbounded) : 1 898 minOccurs = nonNegativeInteger : 1 899 {any attributes with non-schema Namespace}...> 900 Content: (annotation?, 901 (element | group | choice | sequence | any)*) 902 </sequence> 903 904 :param node: The XML node 905 :type node: lxml.etree._Element 906 :param parent: The parent XML node 907 :type parent: lxml.etree._Element 908 909 """ 910 911 sub_types = [ 912 tags.annotation, 913 tags.any, 914 tags.choice, 915 tags.element, 916 tags.group, 917 tags.sequence, 918 ] 919 min_occurs, max_occurs = _process_occurs_attrs(node) 920 result = xsd_elements.Sequence(min_occurs=min_occurs, max_occurs=max_occurs) 921 922 annotation, children = self._pop_annotation(list(node)) 923 for child in children: 924 if child.tag not in sub_types: 925 raise self._create_error( 926 "Unexpected element %s in xsd:sequence" % child.tag, child 927 ) 928 929 item = self.process(child, node) 930 assert item is not None 931 result.append(item) 932 933 assert None not in result 934 return result 935 936 def visit_all(self, node, parent): 937 """Allows the elements in the group to appear (or not appear) in any 938 order in the containing element. 939 940 Definition:: 941 942 <all 943 id = ID 944 maxOccurs= 1: 1 945 minOccurs= (0 | 1): 1 946 {any attributes with non-schema Namespace...}> 947 Content: (annotation?, element*) 948 </all> 949 950 :param node: The XML node 951 :type node: lxml.etree._Element 952 :param parent: The parent XML node 953 :type parent: lxml.etree._Element 954 955 """ 956 957 sub_types = [tags.annotation, tags.element] 958 result = xsd_elements.All() 959 960 annotation, children = self._pop_annotation(list(node)) 961 for child in children: 962 assert child.tag in sub_types, child 963 item = self.process(child, node) 964 result.append(item) 965 966 assert None not in result 967 return result 968 969 def visit_group(self, node, parent): 970 """Groups a set of element declarations so that they can be 971 incorporated as a group into complex type definitions. 972 973 Definition:: 974 975 <group 976 name= NCName 977 id = ID 978 maxOccurs = (nonNegativeInteger | unbounded) : 1 979 minOccurs = nonNegativeInteger : 1 980 name = NCName 981 ref = QName 982 {any attributes with non-schema Namespace}...> 983 Content: (annotation?, (all | choice | sequence)) 984 </group> 985 986 :param node: The XML node 987 :type node: lxml.etree._Element 988 :param parent: The parent XML node 989 :type parent: lxml.etree._Element 990 991 """ 992 min_occurs, max_occurs = _process_occurs_attrs(node) 993 994 result = self.process_reference( 995 node, min_occurs=min_occurs, max_occurs=max_occurs 996 ) 997 if result: 998 return result 999 1000 qname = qname_attr(node, "name", self.document._target_namespace) 1001 1002 # There should be only max nodes, first node (annotation) is irrelevant 1003 annotation, children = self._pop_annotation(list(node)) 1004 child = children[0] 1005 1006 item = self.process(child, parent) 1007 elm = xsd_elements.Group(name=qname, child=item) 1008 1009 if parent.tag == tags.schema: 1010 self.register_group(qname, elm) 1011 return elm 1012 1013 def visit_list(self, node, parent): 1014 """ 1015 Definition:: 1016 1017 <list 1018 id = ID 1019 itemType = QName 1020 {any attributes with non-schema Namespace}...> 1021 Content: (annotation?, (simpleType?)) 1022 </list> 1023 1024 The use of the simpleType element child and the itemType attribute is 1025 mutually exclusive. 1026 1027 :param node: The XML node 1028 :type node: lxml.etree._Element 1029 :param parent: The parent XML node 1030 :type parent: lxml.etree._Element 1031 1032 1033 """ 1034 item_type = qname_attr(node, "itemType") 1035 if item_type: 1036 sub_type = self._get_type(item_type.text) 1037 else: 1038 subnodes = list(node) 1039 child = subnodes[-1] # skip annotation 1040 sub_type = self.visit_simple_type(child, node) 1041 return xsd_types.ListType(sub_type) 1042 1043 def visit_choice(self, node, parent): 1044 """ 1045 Definition:: 1046 1047 <choice 1048 id = ID 1049 maxOccurs= (nonNegativeInteger | unbounded) : 1 1050 minOccurs= nonNegativeInteger : 1 1051 {any attributes with non-schema Namespace}...> 1052 Content: (annotation?, (element | group | choice | sequence | any)*) 1053 </choice> 1054 """ 1055 min_occurs, max_occurs = _process_occurs_attrs(node) 1056 1057 annotation, children = self._pop_annotation(list(node)) 1058 1059 choices = [] 1060 for child in children: 1061 elm = self.process(child, node) 1062 choices.append(elm) 1063 return xsd_elements.Choice( 1064 choices, min_occurs=min_occurs, max_occurs=max_occurs 1065 ) 1066 1067 def visit_union(self, node, parent): 1068 """Defines a collection of multiple simpleType definitions. 1069 1070 Definition:: 1071 1072 <union 1073 id = ID 1074 memberTypes = List of QNames 1075 {any attributes with non-schema Namespace}...> 1076 Content: (annotation?, (simpleType*)) 1077 </union> 1078 1079 :param node: The XML node 1080 :type node: lxml.etree._Element 1081 :param parent: The parent XML node 1082 :type parent: lxml.etree._Element 1083 1084 """ 1085 # TODO 1086 members = node.get("memberTypes") 1087 types = [] 1088 if members: 1089 for member in members.split(): 1090 qname = as_qname(member, node.nsmap) 1091 xsd_type = self._get_type(qname) 1092 types.append(xsd_type) 1093 else: 1094 annotation, types = self._pop_annotation(list(node)) 1095 types = [self.visit_simple_type(t, node) for t in types] 1096 return xsd_types.UnionType(types) 1097 1098 def visit_unique(self, node, parent): 1099 """Specifies that an attribute or element value (or a combination of 1100 attribute or element values) must be unique within the specified scope. 1101 The value must be unique or nil. 1102 1103 Definition:: 1104 1105 <unique 1106 id = ID 1107 name = NCName 1108 {any attributes with non-schema Namespace}...> 1109 Content: (annotation?, (selector, field+)) 1110 </unique> 1111 1112 :param node: The XML node 1113 :type node: lxml.etree._Element 1114 :param parent: The parent XML node 1115 :type parent: lxml.etree._Element 1116 1117 """ 1118 # TODO 1119 pass 1120 1121 def visit_attribute_group(self, node, parent): 1122 """ 1123 Definition:: 1124 1125 <attributeGroup 1126 id = ID 1127 name = NCName 1128 ref = QName 1129 {any attributes with non-schema Namespace...}> 1130 Content: (annotation?), 1131 ((attribute | attributeGroup)*, anyAttribute?)) 1132 </attributeGroup> 1133 1134 :param node: The XML node 1135 :type node: lxml.etree._Element 1136 :param parent: The parent XML node 1137 :type parent: lxml.etree._Element 1138 1139 """ 1140 ref = self.process_reference(node) 1141 if ref: 1142 return ref 1143 1144 qname = qname_attr(node, "name", self.document._target_namespace) 1145 annotation, children = self._pop_annotation(list(node)) 1146 1147 attributes = self._process_attributes(node, children) 1148 attribute_group = xsd_elements.AttributeGroup(qname, attributes) 1149 self.register_attribute_group(qname, attribute_group) 1150 1151 def visit_any_attribute(self, node, parent): 1152 """ 1153 Definition:: 1154 1155 <anyAttribute 1156 id = ID 1157 namespace = ((##any | ##other) | 1158 List of (anyURI | (##targetNamespace | ##local))) : ##any 1159 processContents = (lax | skip | strict): strict 1160 {any attributes with non-schema Namespace...}> 1161 Content: (annotation?) 1162 </anyAttribute> 1163 1164 :param node: The XML node 1165 :type node: lxml.etree._Element 1166 :param parent: The parent XML node 1167 :type parent: lxml.etree._Element 1168 1169 """ 1170 process_contents = node.get("processContents", "strict") 1171 return xsd_elements.AnyAttribute(process_contents=process_contents) 1172 1173 def visit_notation(self, node, parent): 1174 """Contains the definition of a notation to describe the format of 1175 non-XML data within an XML document. An XML Schema notation declaration 1176 is a reconstruction of XML 1.0 NOTATION declarations. 1177 1178 Definition:: 1179 1180 <notation 1181 id = ID 1182 name = NCName 1183 public = Public identifier per ISO 8879 1184 system = anyURI 1185 {any attributes with non-schema Namespace}...> 1186 Content: (annotation?) 1187 </notation> 1188 1189 :param node: The XML node 1190 :type node: lxml.etree._Element 1191 :param parent: The parent XML node 1192 :type parent: lxml.etree._Element 1193 1194 """ 1195 pass 1196 1197 def _retrieve_data(self, url: typing.IO, base_url=None): 1198 return load_external( 1199 url, self.schema._transport, base_url, settings=self.schema.settings 1200 ) 1201 1202 def _get_type(self, name): 1203 assert name is not None 1204 name = self._create_qname(name) 1205 return UnresolvedType(name, self.schema) 1206 1207 def _create_qname(self, name): 1208 if not isinstance(name, etree.QName): 1209 name = etree.QName(name) 1210 1211 # Handle reserved namespace 1212 if name.namespace == "xml": 1213 name = etree.QName("http://www.w3.org/XML/1998/namespace", name.localname) 1214 1215 # Various xsd builders assume that some schema's are available by 1216 # default (actually this is mostly just the soap-enc ns). So live with 1217 # that fact and handle it by auto-importing the schema if it is 1218 # referenced. 1219 if name.namespace in AUTO_IMPORT_NAMESPACES and not self.document.is_imported( 1220 name.namespace 1221 ): 1222 logger.debug("Auto importing missing known schema: %s", name.namespace) 1223 import_node = etree.Element( 1224 tags.import_, namespace=name.namespace, schemaLocation=name.namespace 1225 ) 1226 self.visit_import(import_node, None) 1227 1228 if ( 1229 not name.namespace 1230 and self.document._element_form == "qualified" 1231 and self.document._target_namespace 1232 and not self.document._has_empty_import 1233 ): 1234 name = etree.QName(self.document._target_namespace, name.localname) 1235 return name 1236 1237 def _pop_annotation(self, items): 1238 if not len(items): 1239 return None, [] 1240 1241 if items[0].tag == tags.annotation: 1242 annotation = self.visit_annotation(items[0], None) 1243 return annotation, items[1:] 1244 return None, items 1245 1246 def _process_attributes(self, node, items): 1247 attributes = [] 1248 for child in items: 1249 if child.tag in (tags.attribute, tags.attributeGroup, tags.anyAttribute): 1250 attribute = self.process(child, node) 1251 attributes.append(attribute) 1252 else: 1253 raise self._create_error("Unexpected tag `%s`" % (child.tag), node) 1254 return attributes 1255 1256 def _create_error(self, message, node): 1257 return XMLParseError( 1258 message, filename=self.document._location, sourceline=node.sourceline 1259 ) 1260 1261 visitors = { 1262 tags.any: visit_any, 1263 tags.element: visit_element, 1264 tags.choice: visit_choice, 1265 tags.simpleType: visit_simple_type, 1266 tags.anyAttribute: visit_any_attribute, 1267 tags.complexType: visit_complex_type, 1268 tags.simpleContent: None, 1269 tags.complexContent: None, 1270 tags.sequence: visit_sequence, 1271 tags.all: visit_all, 1272 tags.group: visit_group, 1273 tags.attribute: visit_attribute, 1274 tags.import_: visit_import, 1275 tags.include: visit_include, 1276 tags.annotation: visit_annotation, 1277 tags.attributeGroup: visit_attribute_group, 1278 tags.notation: visit_notation, 1279 } 1280 1281 1282def _process_occurs_attrs(node): 1283 """Process the min/max occurrence indicators""" 1284 max_occurs = node.get("maxOccurs", "1") 1285 min_occurs = int(node.get("minOccurs", "1")) 1286 if max_occurs == "unbounded": 1287 max_occurs = "unbounded" 1288 else: 1289 max_occurs = int(max_occurs) 1290 1291 return min_occurs, max_occurs 1292