1import logging
2import re
3import typing
4
5from lxml import etree
6
7from zeep.exceptions import XMLParseError
8from zeep.loader import absolute_location, load_external, normalize_location
9from zeep.utils import as_qname, qname_attr
10from zeep.xsd import elements as xsd_elements
11from zeep.xsd import types as xsd_types
12from zeep.xsd.const import AUTO_IMPORT_NAMESPACES, xsd_ns
13from zeep.xsd.types.unresolved import UnresolvedCustomType, UnresolvedType
14
15logger = logging.getLogger(__name__)
16
17
18class tags:
19    schema = xsd_ns("schema")
20    import_ = xsd_ns("import")
21    include = xsd_ns("include")
22    annotation = xsd_ns("annotation")
23    element = xsd_ns("element")
24    simpleType = xsd_ns("simpleType")
25    complexType = xsd_ns("complexType")
26    simpleContent = xsd_ns("simpleContent")
27    complexContent = xsd_ns("complexContent")
28    sequence = xsd_ns("sequence")
29    group = xsd_ns("group")
30    choice = xsd_ns("choice")
31    all = xsd_ns("all")
32    list = xsd_ns("list")
33    union = xsd_ns("union")
34    attribute = xsd_ns("attribute")
35    any = xsd_ns("any")
36    anyAttribute = xsd_ns("anyAttribute")
37    attributeGroup = xsd_ns("attributeGroup")
38    restriction = xsd_ns("restriction")
39    extension = xsd_ns("extension")
40    notation = xsd_ns("notations")
41
42
43class SchemaVisitor:
44    """Visitor which processes XSD files and registers global elements and
45    types in the given schema.
46
47    Notes:
48
49    TODO: include and import statements can reference other nodes. We need
50    to load these first. Always global.
51
52
53
54
55    :param schema:
56    :type schema: zeep.xsd.schema.Schema
57    :param document:
58    :type document: zeep.xsd.schema.SchemaDocument
59
60    """
61
62    def __init__(self, schema, document):
63        self.document = document
64        self.schema = schema
65        self._includes = set()
66
67    def register_element(self, qname: etree.QName, instance: xsd_elements.Element):
68        self.document.register_element(qname, instance)
69
70    def register_attribute(
71        self, name: etree.QName, instance: xsd_elements.Attribute
72    ) -> None:
73        self.document.register_attribute(name, instance)
74
75    def register_type(self, qname: etree.QName, instance) -> None:
76        self.document.register_type(qname, instance)
77
78    def register_group(self, qname: etree.QName, instance: xsd_elements.Group):
79        self.document.register_group(qname, instance)
80
81    def register_attribute_group(
82        self, qname: etree.QName, instance: xsd_elements.AttributeGroup
83    ) -> None:
84        self.document.register_attribute_group(qname, instance)
85
86    def register_import(self, namespace, document):
87        self.document.register_import(namespace, document)
88
89    def process(self, node, parent):
90        visit_func = self.visitors.get(node.tag)
91        if not visit_func:
92            raise ValueError("No visitor defined for %r" % node.tag)
93        result = visit_func(self, node, parent)
94        return result
95
96    def process_ref_attribute(self, node, array_type=None):
97        ref = qname_attr(node, "ref")
98        if ref:
99            ref = self._create_qname(ref)
100
101            # Some wsdl's reference to xs:schema, we ignore that for now. It
102            # might be better in the future to process the actual schema file
103            # so that it is handled correctly
104            if ref.namespace == "http://www.w3.org/2001/XMLSchema":
105                return
106            return xsd_elements.RefAttribute(
107                node.tag, ref, self.schema, array_type=array_type
108            )
109
110    def process_reference(self, node, **kwargs):
111        ref = qname_attr(node, "ref")
112        if not ref:
113            return
114
115        ref = self._create_qname(ref)
116
117        if node.tag == tags.element:
118            cls = xsd_elements.RefElement
119        elif node.tag == tags.attribute:
120            cls = xsd_elements.RefAttribute
121        elif node.tag == tags.group:
122            cls = xsd_elements.RefGroup
123        elif node.tag == tags.attributeGroup:
124            cls = xsd_elements.RefAttributeGroup
125        return cls(node.tag, ref, self.schema, **kwargs)
126
127    def visit_schema(self, node):
128        """Visit the xsd:schema element and process all the child elements
129
130        Definition::
131
132            <schema
133              attributeFormDefault = (qualified | unqualified): unqualified
134              blockDefault = (#all | List of (extension | restriction | substitution) : ''
135              elementFormDefault = (qualified | unqualified): unqualified
136              finalDefault = (#all | List of (extension | restriction | list | union): ''
137              id = ID
138              targetNamespace = anyURI
139              version = token
140              xml:lang = language
141              {any attributes with non-schema Namespace}...>
142            Content: (
143                (include | import | redefine | annotation)*,
144                (((simpleType | complexType | group | attributeGroup) |
145                  element | attribute | notation),
146                 annotation*)*)
147            </schema>
148
149        :param node: The XML node
150        :type node: lxml.etree._Element
151
152        """
153        assert node is not None
154
155        # A schema should always have a targetNamespace attribute, otherwise
156        # it is called a chameleon schema. In that case the schema will inherit
157        # the namespace of the enclosing schema/node.
158        tns = node.get("targetNamespace")
159        if tns:
160            self.document._target_namespace = tns
161        self.document._element_form = node.get("elementFormDefault", "unqualified")
162        self.document._attribute_form = node.get("attributeFormDefault", "unqualified")
163
164        for child in node:
165            self.process(child, parent=node)
166
167    def visit_import(self, node, parent):
168        """
169
170        Definition::
171
172            <import
173              id = ID
174              namespace = anyURI
175              schemaLocation = anyURI
176              {any attributes with non-schema Namespace}...>
177            Content: (annotation?)
178            </import>
179
180        :param node: The XML node
181        :type node: lxml.etree._Element
182        :param parent: The parent XML node
183        :type parent: lxml.etree._Element
184
185        """
186        schema_node = None
187        namespace = node.get("namespace")
188        location = node.get("schemaLocation")
189        if location:
190            location = normalize_location(
191                self.schema.settings, location, self.document._base_url
192            )
193
194        if not namespace and not self.document._target_namespace:
195            raise XMLParseError(
196                "The attribute 'namespace' must be existent if the "
197                "importing schema has no target namespace.",
198                filename=self.document.location,
199                sourceline=node.sourceline,
200            )
201
202        # We found an empty <import/> statement, this needs to trigger 4.1.2
203        # from https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-resolve
204        # for QName resolving.
205        # In essence this means we will resolve QNames without a namespace to no
206        # namespace instead of the target namespace.
207        # The following code snippet works because imports have to occur before we
208        # visit elements.
209        if not namespace and not location:
210            self.document._has_empty_import = True
211
212        # Check if the schema is already imported before based on the
213        # namespace. Schema's without namespace are registered as 'None'
214        document = self.schema.documents.get_by_namespace_and_location(
215            namespace, location
216        )
217        if document:
218            logger.debug("Returning existing schema: %r", location)
219            self.register_import(namespace, document)
220            return document
221
222        # Hardcode the mapping between the xml namespace and the xsd for now.
223        # This seems to fix issues with exchange wsdl's, see #220
224        if not location and namespace == "http://www.w3.org/XML/1998/namespace":
225            location = "https://www.w3.org/2001/xml.xsd"
226
227        # Silently ignore import statements which we can't resolve via the
228        # namespace and doesn't have a schemaLocation attribute.
229        if not location:
230            logger.debug(
231                "Ignoring import statement for namespace %r "
232                + "(missing schemaLocation)",
233                namespace,
234            )
235            return
236
237        # Load the XML
238        schema_node = self._retrieve_data(location, base_url=self.document._location)
239
240        # Check if the xsd:import namespace matches the targetNamespace. If
241        # the xsd:import statement didn't specify a namespace then make sure
242        # that the targetNamespace wasn't declared by another schema yet.
243        schema_tns = schema_node.get("targetNamespace")
244        if namespace and schema_tns and namespace != schema_tns:
245            raise XMLParseError(
246                (
247                    "The namespace defined on the xsd:import doesn't match the "
248                    "imported targetNamespace located at %r "
249                )
250                % (location),
251                filename=self.document._location,
252                sourceline=node.sourceline,
253            )
254
255        # If the imported schema doesn't define a target namespace and the
256        # node doesn't specify it either then inherit the existing target
257        # namespace.
258        elif not schema_tns and not namespace:
259            namespace = self.document._target_namespace
260
261        schema = self.schema.create_new_document(
262            schema_node, location, target_namespace=namespace
263        )
264        self.register_import(namespace, schema)
265        return schema
266
267    def visit_include(self, node, parent):
268        """
269
270        Definition::
271
272            <include
273              id = ID
274              schemaLocation = anyURI
275              {any attributes with non-schema Namespace}...>
276            Content: (annotation?)
277            </include>
278
279        :param node: The XML node
280        :type node: lxml.etree._Element
281        :param parent: The parent XML node
282        :type parent: lxml.etree._Element
283
284        """
285        if not node.get("schemaLocation"):
286            raise NotImplementedError("schemaLocation is required")
287        location = node.get("schemaLocation")
288
289        if location in self._includes:
290            return
291
292        schema_node = self._retrieve_data(location, base_url=self.document._base_url)
293        self._includes.add(location)
294
295        # When the included document has no default namespace defined but the
296        # parent document does have this then we should (atleast for #360)
297        # transfer the default namespace to the included schema. We can't
298        # update the nsmap of elements in lxml so we create a new schema with
299        # the correct nsmap and move all the content there.
300
301        # Included schemas must have targetNamespace equal to parent schema (the including) or None.
302        # If included schema doesn't have default ns, then it should be set to parent's targetNs.
303        # See Chameleon Inclusion https://www.w3.org/TR/xmlschema11-1/#chameleon-xslt
304        if not schema_node.nsmap.get(None) and (
305            node.nsmap.get(None) or parent.attrib.get("targetNamespace")
306        ):
307            nsmap = {None: node.nsmap.get(None) or parent.attrib["targetNamespace"]}
308            nsmap.update(schema_node.nsmap)
309            new = etree.Element(schema_node.tag, nsmap=nsmap)
310            for child in schema_node:
311                new.append(child)
312            for key, value in schema_node.attrib.items():
313                new.set(key, value)
314            if not new.attrib.get("targetNamespace"):
315                new.attrib["targetNamespace"] = parent.attrib["targetNamespace"]
316            schema_node = new
317
318        # Use the element/attribute form defaults from the schema while
319        # processing the nodes.
320        element_form_default = self.document._element_form
321        attribute_form_default = self.document._attribute_form
322        base_url = self.document._base_url
323
324        self.document._element_form = schema_node.get(
325            "elementFormDefault", "unqualified"
326        )
327        self.document._attribute_form = schema_node.get(
328            "attributeFormDefault", "unqualified"
329        )
330        self.document._base_url = absolute_location(location, self.document._base_url)
331
332        # Iterate directly over the children.
333        for child in schema_node:
334            self.process(child, parent=schema_node)
335
336        self.document._element_form = element_form_default
337        self.document._attribute_form = attribute_form_default
338        self.document._base_url = base_url
339
340    def visit_element(self, node, parent):
341        """
342
343        Definition::
344
345            <element
346              abstract = Boolean : false
347              block = (#all | List of (extension | restriction | substitution))
348              default = string
349              final = (#all | List of (extension | restriction))
350              fixed = string
351              form = (qualified | unqualified)
352              id = ID
353              maxOccurs = (nonNegativeInteger | unbounded) : 1
354              minOccurs = nonNegativeInteger : 1
355              name = NCName
356              nillable = Boolean : false
357              ref = QName
358              substitutionGroup = QName
359              type = QName
360              {any attributes with non-schema Namespace}...>
361            Content: (annotation?, (
362                      (simpleType | complexType)?, (unique | key | keyref)*))
363            </element>
364
365        :param node: The XML node
366        :type node: lxml.etree._Element
367        :param parent: The parent XML node
368        :type parent: lxml.etree._Element
369
370        """
371        is_global = parent.tag == tags.schema
372
373        # minOccurs / maxOccurs are not allowed on global elements
374        if not is_global:
375            min_occurs, max_occurs = _process_occurs_attrs(node)
376        else:
377            max_occurs = 1
378            min_occurs = 1
379
380        # If the element has a ref attribute then all other attributes cannot
381        # be present. Short circuit that here.
382        # Ref is prohibited on global elements (parent = schema)
383        if not is_global:
384            # Naive workaround to mark fields which are part of a choice element
385            # as optional
386            if parent.tag == tags.choice:
387                min_occurs = 0
388            result = self.process_reference(
389                node, min_occurs=min_occurs, max_occurs=max_occurs
390            )
391            if result:
392                return result
393
394        element_form = node.get("form", self.document._element_form)
395        if element_form == "qualified" or is_global:
396            qname = qname_attr(node, "name", self.document._target_namespace)
397        else:
398            qname = etree.QName(node.get("name").strip())
399
400        children = list(node)
401        xsd_type = None
402        if children:
403            value = None
404
405            for child in children:
406                if child.tag == tags.annotation:
407                    continue
408
409                elif child.tag in (tags.simpleType, tags.complexType):
410                    assert not value
411
412                    xsd_type = self.process(child, node)
413
414        if not xsd_type:
415            node_type = qname_attr(node, "type")
416            if node_type:
417                xsd_type = self._get_type(node_type.text)
418            else:
419                xsd_type = xsd_types.AnyType()
420
421        nillable = node.get("nillable") == "true"
422        default = node.get("default")
423        element = xsd_elements.Element(
424            name=qname,
425            type_=xsd_type,
426            min_occurs=min_occurs,
427            max_occurs=max_occurs,
428            nillable=nillable,
429            default=default,
430            is_global=is_global,
431        )
432
433        # Only register global elements
434        if is_global:
435            self.register_element(qname, element)
436        return element
437
438    def visit_attribute(
439        self, node: etree._Element, parent: etree._Element
440    ) -> typing.Union[xsd_elements.Attribute, xsd_elements.RefAttribute]:
441        """Declares an attribute.
442
443        Definition::
444
445            <attribute
446              default = string
447              fixed = string
448              form = (qualified | unqualified)
449              id = ID
450              name = NCName
451              ref = QName
452              type = QName
453              use = (optional | prohibited | required): optional
454              {any attributes with non-schema Namespace...}>
455            Content: (annotation?, (simpleType?))
456            </attribute>
457
458        :param node: The XML node
459        :type node: lxml.etree._Element
460        :param parent: The parent XML node
461        :type parent: lxml.etree._Element
462
463        """
464        is_global = parent.tag == tags.schema
465
466        # Check of wsdl:arayType
467        array_type = node.get("{http://schemas.xmlsoap.org/wsdl/}arrayType")
468        if array_type:
469            match = re.match(r"([^\[]+)", array_type)
470            if match:
471                array_type = match.groups()[0]
472                qname = as_qname(array_type, node.nsmap)
473                array_type = UnresolvedType(qname, self.schema)
474
475        # If the elment has a ref attribute then all other attributes cannot
476        # be present. Short circuit that here.
477        # Ref is prohibited on global elements (parent = schema)
478        if not is_global:
479            result = self.process_ref_attribute(node, array_type=array_type)
480            if result:
481                return result
482
483        attribute_form = node.get("form", self.document._attribute_form)
484        if attribute_form == "qualified" or is_global:
485            name = qname_attr(node, "name", self.document._target_namespace)
486        else:
487            name = etree.QName(node.get("name"))
488
489        annotation, items = self._pop_annotation(list(node))
490        if items:
491            xsd_type = self.visit_simple_type(items[0], node)
492        else:
493            node_type = qname_attr(node, "type")
494            if node_type:
495                xsd_type = self._get_type(node_type)
496            else:
497                xsd_type = xsd_types.AnyType()
498
499        # TODO: We ignore 'prohobited' for now
500        required = node.get("use") == "required"
501        default = node.get("default")
502
503        attr = xsd_elements.Attribute(
504            name, type_=xsd_type, default=default, required=required
505        )
506
507        # Only register global elements
508        if is_global:
509            assert name is not None
510            self.register_attribute(name, attr)
511        return attr
512
513    def visit_simple_type(self, node, parent):
514        """
515        Definition::
516
517            <simpleType
518              final = (#all | (list | union | restriction))
519              id = ID
520              name = NCName
521              {any attributes with non-schema Namespace}...>
522            Content: (annotation?, (restriction | list | union))
523            </simpleType>
524
525        :param node: The XML node
526        :type node: lxml.etree._Element
527        :param parent: The parent XML node
528        :type parent: lxml.etree._Element
529
530        """
531
532        if parent.tag == tags.schema:
533            name = node.get("name")
534            is_global = True
535        else:
536            name = parent.get("name", "Anonymous")
537            is_global = False
538        base_type = "{http://www.w3.org/2001/XMLSchema}string"
539        qname = as_qname(name, node.nsmap, self.document._target_namespace)
540
541        annotation, items = self._pop_annotation(list(node))
542        child = items[0]
543        if child.tag == tags.restriction:
544            base_type = self.visit_restriction_simple_type(child, node)
545            xsd_type = UnresolvedCustomType(qname, base_type, self.schema)
546
547        elif child.tag == tags.list:
548            xsd_type = self.visit_list(child, node)
549
550        elif child.tag == tags.union:
551            xsd_type = self.visit_union(child, node)
552        else:
553            raise AssertionError("Unexpected child: %r" % child.tag)
554
555        assert xsd_type is not None
556        if is_global:
557            self.register_type(qname, xsd_type)
558        return xsd_type
559
560    def visit_complex_type(self, node, parent):
561        """
562        Definition::
563
564            <complexType
565              abstract = Boolean : false
566              block = (#all | List of (extension | restriction))
567              final = (#all | List of (extension | restriction))
568              id = ID
569              mixed = Boolean : false
570              name = NCName
571              {any attributes with non-schema Namespace...}>
572            Content: (annotation?, (simpleContent | complexContent |
573                      ((group | all | choice | sequence)?,
574                      ((attribute | attributeGroup)*, anyAttribute?))))
575            </complexType>
576
577        :param node: The XML node
578        :type node: lxml.etree._Element
579        :param parent: The parent XML node
580        :type parent: lxml.etree._Element
581
582        """
583        children = []
584        base_type = "{http://www.w3.org/2001/XMLSchema}anyType"
585
586        # If the complexType's parent is an element then this type is
587        # anonymous and should have no name defined. Otherwise it's global
588        if parent.tag == tags.schema:
589            name = node.get("name")
590            is_global = True
591        else:
592            name = parent.get("name")
593            is_global = False
594
595        qname = as_qname(name, node.nsmap, self.document._target_namespace)
596        cls_attributes = {"__module__": "zeep.xsd.dynamic_types", "_xsd_name": qname}
597        xsd_cls = type(name, (xsd_types.ComplexType,), cls_attributes)
598        xsd_type = None
599
600        # Process content
601        annotation, children = self._pop_annotation(list(node))
602        first_tag = children[0].tag if children else None
603
604        if first_tag == tags.simpleContent:
605            base_type, attributes = self.visit_simple_content(children[0], node)
606
607            xsd_type = xsd_cls(
608                attributes=attributes,
609                extension=base_type,
610                qname=qname,
611                is_global=is_global,
612            )
613
614        elif first_tag == tags.complexContent:
615            kwargs = self.visit_complex_content(children[0], node)
616            xsd_type = xsd_cls(qname=qname, is_global=is_global, **kwargs)
617
618        elif first_tag:
619            element = None
620
621            if first_tag in (tags.group, tags.all, tags.choice, tags.sequence):
622                child = children.pop(0)
623                element = self.process(child, node)
624
625            attributes = self._process_attributes(node, children)
626            xsd_type = xsd_cls(
627                element=element, attributes=attributes, qname=qname, is_global=is_global
628            )
629        else:
630            xsd_type = xsd_cls(qname=qname, is_global=is_global)
631
632        if is_global:
633            self.register_type(qname, xsd_type)
634        return xsd_type
635
636    def visit_complex_content(self, node, parent):
637        """The complexContent element defines extensions or restrictions on a
638        complex type that contains mixed content or elements only.
639
640        Definition::
641
642            <complexContent
643              id = ID
644              mixed = Boolean
645              {any attributes with non-schema Namespace}...>
646            Content: (annotation?,  (restriction | extension))
647            </complexContent>
648
649        :param node: The XML node
650        :type node: lxml.etree._Element
651        :param parent: The parent XML node
652        :type parent: lxml.etree._Element
653
654        """
655        children = list(node)
656        child = children[-1]
657
658        if child.tag == tags.restriction:
659            base, element, attributes = self.visit_restriction_complex_content(
660                child, node
661            )
662            return {"attributes": attributes, "element": element, "restriction": base}
663        elif child.tag == tags.extension:
664            base, element, attributes = self.visit_extension_complex_content(
665                child, node
666            )
667            return {"attributes": attributes, "element": element, "extension": base}
668
669    def visit_simple_content(self, node, parent):
670        """Contains extensions or restrictions on a complexType element with
671        character data or a simpleType element as content and contains no
672        elements.
673
674        Definition::
675
676            <simpleContent
677              id = ID
678              {any attributes with non-schema Namespace}...>
679            Content: (annotation?, (restriction | extension))
680            </simpleContent>
681
682        :param node: The XML node
683        :type node: lxml.etree._Element
684        :param parent: The parent XML node
685        :type parent: lxml.etree._Element
686
687        """
688
689        children = list(node)
690        child = children[-1]
691
692        if child.tag == tags.restriction:
693            return self.visit_restriction_simple_content(child, node)
694        elif child.tag == tags.extension:
695            return self.visit_extension_simple_content(child, node)
696        raise AssertionError("Expected restriction or extension")
697
698    def visit_restriction_simple_type(self, node, parent):
699        """
700        Definition::
701
702            <restriction
703              base = QName
704              id = ID
705              {any attributes with non-schema Namespace}...>
706            Content: (annotation?,
707                (simpleType?, (
708                    minExclusive | minInclusive | maxExclusive | maxInclusive |
709                    totalDigits |fractionDigits | length | minLength |
710                    maxLength | enumeration | whiteSpace | pattern)*))
711            </restriction>
712
713        :param node: The XML node
714        :type node: lxml.etree._Element
715        :param parent: The parent XML node
716        :type parent: lxml.etree._Element
717
718        """
719        base_name = qname_attr(node, "base")
720        if base_name:
721            return self._get_type(base_name)
722
723        annotation, children = self._pop_annotation(list(node))
724        if children[0].tag == tags.simpleType:
725            return self.visit_simple_type(children[0], node)
726
727    def visit_restriction_simple_content(self, node, parent):
728        """
729        Definition::
730
731            <restriction
732              base = QName
733              id = ID
734              {any attributes with non-schema Namespace}...>
735            Content: (annotation?,
736                (simpleType?, (
737                    minExclusive | minInclusive | maxExclusive | maxInclusive |
738                    totalDigits |fractionDigits | length | minLength |
739                    maxLength | enumeration | whiteSpace | pattern)*
740                )?, ((attribute | attributeGroup)*, anyAttribute?))
741            </restriction>
742
743        :param node: The XML node
744        :type node: lxml.etree._Element
745        :param parent: The parent XML node
746        :type parent: lxml.etree._Element
747
748        """
749        base_name = qname_attr(node, "base")
750        base_type = self._get_type(base_name)
751        return base_type, []
752
753    def visit_restriction_complex_content(self, node, parent):
754        """
755
756        Definition::
757
758            <restriction
759              base = QName
760              id = ID
761              {any attributes with non-schema Namespace}...>
762            Content: (annotation?, (group | all | choice | sequence)?,
763                    ((attribute | attributeGroup)*, anyAttribute?))
764            </restriction>
765
766        :param node: The XML node
767        :type node: lxml.etree._Element
768        :param parent: The parent XML node
769        :type parent: lxml.etree._Element
770
771        """
772        base_name = qname_attr(node, "base")
773        base_type = self._get_type(base_name)
774        annotation, children = self._pop_annotation(list(node))
775
776        element = None
777        attributes = []
778
779        if children:
780            child = children[0]
781            if child.tag in (tags.group, tags.all, tags.choice, tags.sequence):
782                children.pop(0)
783                element = self.process(child, node)
784            attributes = self._process_attributes(node, children)
785        return base_type, element, attributes
786
787    def visit_extension_complex_content(self, node, parent):
788        """
789
790        Definition::
791
792            <extension
793              base = QName
794              id = ID
795              {any attributes with non-schema Namespace}...>
796            Content: (annotation?, (
797                        (group | all | choice | sequence)?,
798                        ((attribute | attributeGroup)*, anyAttribute?)))
799            </extension>
800
801        :param node: The XML node
802        :type node: lxml.etree._Element
803        :param parent: The parent XML node
804        :type parent: lxml.etree._Element
805
806        """
807        base_name = qname_attr(node, "base")
808        base_type = self._get_type(base_name)
809        annotation, children = self._pop_annotation(list(node))
810
811        element = None
812        attributes = []
813
814        if children:
815            child = children[0]
816            if child.tag in (tags.group, tags.all, tags.choice, tags.sequence):
817                children.pop(0)
818                element = self.process(child, node)
819            attributes = self._process_attributes(node, children)
820
821        return base_type, element, attributes
822
823    def visit_extension_simple_content(self, node, parent):
824        """
825
826        Definition::
827
828            <extension
829              base = QName
830              id = ID
831              {any attributes with non-schema Namespace}...>
832            Content: (annotation?, ((attribute | attributeGroup)*, anyAttribute?))
833            </extension>
834        """
835        base_name = qname_attr(node, "base")
836        base_type = self._get_type(base_name)
837        annotation, children = self._pop_annotation(list(node))
838        attributes = self._process_attributes(node, children)
839
840        return base_type, attributes
841
842    def visit_annotation(self, node, parent):
843        """Defines an annotation.
844
845        Definition::
846
847            <annotation
848              id = ID
849              {any attributes with non-schema Namespace}...>
850            Content: (appinfo | documentation)*
851            </annotation>
852
853        :param node: The XML node
854        :type node: lxml.etree._Element
855        :param parent: The parent XML node
856        :type parent: lxml.etree._Element
857
858        """
859        return
860
861    def visit_any(self, node, parent):
862        """
863
864        Definition::
865
866            <any
867              id = ID
868              maxOccurs = (nonNegativeInteger | unbounded) : 1
869              minOccurs = nonNegativeInteger : 1
870              namespace = "(##any | ##other) |
871                List of (anyURI | (##targetNamespace |  ##local))) : ##any
872              processContents = (lax | skip | strict) : strict
873              {any attributes with non-schema Namespace...}>
874            Content: (annotation?)
875            </any>
876
877        :param node: The XML node
878        :type node: lxml.etree._Element
879        :param parent: The parent XML node
880        :type parent: lxml.etree._Element
881
882        """
883        min_occurs, max_occurs = _process_occurs_attrs(node)
884        process_contents = node.get("processContents", "strict")
885        return xsd_elements.Any(
886            max_occurs=max_occurs,
887            min_occurs=min_occurs,
888            process_contents=process_contents,
889        )
890
891    def visit_sequence(self, node, parent):
892        """
893        Definition::
894
895            <sequence
896              id = ID
897              maxOccurs = (nonNegativeInteger | unbounded) : 1
898              minOccurs = nonNegativeInteger : 1
899              {any attributes with non-schema Namespace}...>
900            Content: (annotation?,
901                      (element | group | choice | sequence | any)*)
902            </sequence>
903
904        :param node: The XML node
905        :type node: lxml.etree._Element
906        :param parent: The parent XML node
907        :type parent: lxml.etree._Element
908
909        """
910
911        sub_types = [
912            tags.annotation,
913            tags.any,
914            tags.choice,
915            tags.element,
916            tags.group,
917            tags.sequence,
918        ]
919        min_occurs, max_occurs = _process_occurs_attrs(node)
920        result = xsd_elements.Sequence(min_occurs=min_occurs, max_occurs=max_occurs)
921
922        annotation, children = self._pop_annotation(list(node))
923        for child in children:
924            if child.tag not in sub_types:
925                raise self._create_error(
926                    "Unexpected element %s in xsd:sequence" % child.tag, child
927                )
928
929            item = self.process(child, node)
930            assert item is not None
931            result.append(item)
932
933        assert None not in result
934        return result
935
936    def visit_all(self, node, parent):
937        """Allows the elements in the group to appear (or not appear) in any
938        order in the containing element.
939
940        Definition::
941
942            <all
943              id = ID
944              maxOccurs= 1: 1
945              minOccurs= (0 | 1): 1
946              {any attributes with non-schema Namespace...}>
947            Content: (annotation?, element*)
948            </all>
949
950        :param node: The XML node
951        :type node: lxml.etree._Element
952        :param parent: The parent XML node
953        :type parent: lxml.etree._Element
954
955        """
956
957        sub_types = [tags.annotation, tags.element]
958        result = xsd_elements.All()
959
960        annotation, children = self._pop_annotation(list(node))
961        for child in children:
962            assert child.tag in sub_types, child
963            item = self.process(child, node)
964            result.append(item)
965
966        assert None not in result
967        return result
968
969    def visit_group(self, node, parent):
970        """Groups a set of element declarations so that they can be
971        incorporated as a group into complex type definitions.
972
973        Definition::
974
975            <group
976              name= NCName
977              id = ID
978              maxOccurs = (nonNegativeInteger | unbounded) : 1
979              minOccurs = nonNegativeInteger : 1
980              name = NCName
981              ref = QName
982              {any attributes with non-schema Namespace}...>
983            Content: (annotation?, (all | choice | sequence))
984            </group>
985
986        :param node: The XML node
987        :type node: lxml.etree._Element
988        :param parent: The parent XML node
989        :type parent: lxml.etree._Element
990
991        """
992        min_occurs, max_occurs = _process_occurs_attrs(node)
993
994        result = self.process_reference(
995            node, min_occurs=min_occurs, max_occurs=max_occurs
996        )
997        if result:
998            return result
999
1000        qname = qname_attr(node, "name", self.document._target_namespace)
1001
1002        # There should be only max nodes, first node (annotation) is irrelevant
1003        annotation, children = self._pop_annotation(list(node))
1004        child = children[0]
1005
1006        item = self.process(child, parent)
1007        elm = xsd_elements.Group(name=qname, child=item)
1008
1009        if parent.tag == tags.schema:
1010            self.register_group(qname, elm)
1011        return elm
1012
1013    def visit_list(self, node, parent):
1014        """
1015        Definition::
1016
1017            <list
1018              id = ID
1019              itemType = QName
1020              {any attributes with non-schema Namespace}...>
1021            Content: (annotation?, (simpleType?))
1022            </list>
1023
1024        The use of the simpleType element child and the itemType attribute is
1025        mutually exclusive.
1026
1027        :param node: The XML node
1028        :type node: lxml.etree._Element
1029        :param parent: The parent XML node
1030        :type parent: lxml.etree._Element
1031
1032
1033        """
1034        item_type = qname_attr(node, "itemType")
1035        if item_type:
1036            sub_type = self._get_type(item_type.text)
1037        else:
1038            subnodes = list(node)
1039            child = subnodes[-1]  # skip annotation
1040            sub_type = self.visit_simple_type(child, node)
1041        return xsd_types.ListType(sub_type)
1042
1043    def visit_choice(self, node, parent):
1044        """
1045        Definition::
1046
1047            <choice
1048              id = ID
1049              maxOccurs= (nonNegativeInteger | unbounded) : 1
1050              minOccurs= nonNegativeInteger : 1
1051              {any attributes with non-schema Namespace}...>
1052            Content: (annotation?, (element | group | choice | sequence | any)*)
1053            </choice>
1054        """
1055        min_occurs, max_occurs = _process_occurs_attrs(node)
1056
1057        annotation, children = self._pop_annotation(list(node))
1058
1059        choices = []
1060        for child in children:
1061            elm = self.process(child, node)
1062            choices.append(elm)
1063        return xsd_elements.Choice(
1064            choices, min_occurs=min_occurs, max_occurs=max_occurs
1065        )
1066
1067    def visit_union(self, node, parent):
1068        """Defines a collection of multiple simpleType definitions.
1069
1070        Definition::
1071
1072            <union
1073              id = ID
1074              memberTypes = List of QNames
1075              {any attributes with non-schema Namespace}...>
1076            Content: (annotation?, (simpleType*))
1077            </union>
1078
1079        :param node: The XML node
1080        :type node: lxml.etree._Element
1081        :param parent: The parent XML node
1082        :type parent: lxml.etree._Element
1083
1084        """
1085        # TODO
1086        members = node.get("memberTypes")
1087        types = []
1088        if members:
1089            for member in members.split():
1090                qname = as_qname(member, node.nsmap)
1091                xsd_type = self._get_type(qname)
1092                types.append(xsd_type)
1093        else:
1094            annotation, types = self._pop_annotation(list(node))
1095            types = [self.visit_simple_type(t, node) for t in types]
1096        return xsd_types.UnionType(types)
1097
1098    def visit_unique(self, node, parent):
1099        """Specifies that an attribute or element value (or a combination of
1100        attribute or element values) must be unique within the specified scope.
1101        The value must be unique or nil.
1102
1103        Definition::
1104
1105            <unique
1106              id = ID
1107              name = NCName
1108              {any attributes with non-schema Namespace}...>
1109            Content: (annotation?, (selector, field+))
1110            </unique>
1111
1112        :param node: The XML node
1113        :type node: lxml.etree._Element
1114        :param parent: The parent XML node
1115        :type parent: lxml.etree._Element
1116
1117        """
1118        # TODO
1119        pass
1120
1121    def visit_attribute_group(self, node, parent):
1122        """
1123        Definition::
1124
1125            <attributeGroup
1126              id = ID
1127              name = NCName
1128              ref = QName
1129              {any attributes with non-schema Namespace...}>
1130            Content: (annotation?),
1131                     ((attribute | attributeGroup)*, anyAttribute?))
1132            </attributeGroup>
1133
1134        :param node: The XML node
1135        :type node: lxml.etree._Element
1136        :param parent: The parent XML node
1137        :type parent: lxml.etree._Element
1138
1139        """
1140        ref = self.process_reference(node)
1141        if ref:
1142            return ref
1143
1144        qname = qname_attr(node, "name", self.document._target_namespace)
1145        annotation, children = self._pop_annotation(list(node))
1146
1147        attributes = self._process_attributes(node, children)
1148        attribute_group = xsd_elements.AttributeGroup(qname, attributes)
1149        self.register_attribute_group(qname, attribute_group)
1150
1151    def visit_any_attribute(self, node, parent):
1152        """
1153        Definition::
1154
1155            <anyAttribute
1156              id = ID
1157              namespace = ((##any | ##other) |
1158                List of (anyURI | (##targetNamespace | ##local))) : ##any
1159              processContents = (lax | skip | strict): strict
1160              {any attributes with non-schema Namespace...}>
1161            Content: (annotation?)
1162            </anyAttribute>
1163
1164        :param node: The XML node
1165        :type node: lxml.etree._Element
1166        :param parent: The parent XML node
1167        :type parent: lxml.etree._Element
1168
1169        """
1170        process_contents = node.get("processContents", "strict")
1171        return xsd_elements.AnyAttribute(process_contents=process_contents)
1172
1173    def visit_notation(self, node, parent):
1174        """Contains the definition of a notation to describe the format of
1175        non-XML data within an XML document. An XML Schema notation declaration
1176        is a reconstruction of XML 1.0 NOTATION declarations.
1177
1178        Definition::
1179
1180            <notation
1181              id = ID
1182              name = NCName
1183              public = Public identifier per ISO 8879
1184              system = anyURI
1185              {any attributes with non-schema Namespace}...>
1186            Content: (annotation?)
1187            </notation>
1188
1189        :param node: The XML node
1190        :type node: lxml.etree._Element
1191        :param parent: The parent XML node
1192        :type parent: lxml.etree._Element
1193
1194        """
1195        pass
1196
1197    def _retrieve_data(self, url: typing.IO, base_url=None):
1198        return load_external(
1199            url, self.schema._transport, base_url, settings=self.schema.settings
1200        )
1201
1202    def _get_type(self, name):
1203        assert name is not None
1204        name = self._create_qname(name)
1205        return UnresolvedType(name, self.schema)
1206
1207    def _create_qname(self, name):
1208        if not isinstance(name, etree.QName):
1209            name = etree.QName(name)
1210
1211        # Handle reserved namespace
1212        if name.namespace == "xml":
1213            name = etree.QName("http://www.w3.org/XML/1998/namespace", name.localname)
1214
1215        # Various xsd builders assume that some schema's are available by
1216        # default (actually this is mostly just the soap-enc ns). So live with
1217        # that fact and handle it by auto-importing the schema if it is
1218        # referenced.
1219        if name.namespace in AUTO_IMPORT_NAMESPACES and not self.document.is_imported(
1220            name.namespace
1221        ):
1222            logger.debug("Auto importing missing known schema: %s", name.namespace)
1223            import_node = etree.Element(
1224                tags.import_, namespace=name.namespace, schemaLocation=name.namespace
1225            )
1226            self.visit_import(import_node, None)
1227
1228        if (
1229            not name.namespace
1230            and self.document._element_form == "qualified"
1231            and self.document._target_namespace
1232            and not self.document._has_empty_import
1233        ):
1234            name = etree.QName(self.document._target_namespace, name.localname)
1235        return name
1236
1237    def _pop_annotation(self, items):
1238        if not len(items):
1239            return None, []
1240
1241        if items[0].tag == tags.annotation:
1242            annotation = self.visit_annotation(items[0], None)
1243            return annotation, items[1:]
1244        return None, items
1245
1246    def _process_attributes(self, node, items):
1247        attributes = []
1248        for child in items:
1249            if child.tag in (tags.attribute, tags.attributeGroup, tags.anyAttribute):
1250                attribute = self.process(child, node)
1251                attributes.append(attribute)
1252            else:
1253                raise self._create_error("Unexpected tag `%s`" % (child.tag), node)
1254        return attributes
1255
1256    def _create_error(self, message, node):
1257        return XMLParseError(
1258            message, filename=self.document._location, sourceline=node.sourceline
1259        )
1260
1261    visitors = {
1262        tags.any: visit_any,
1263        tags.element: visit_element,
1264        tags.choice: visit_choice,
1265        tags.simpleType: visit_simple_type,
1266        tags.anyAttribute: visit_any_attribute,
1267        tags.complexType: visit_complex_type,
1268        tags.simpleContent: None,
1269        tags.complexContent: None,
1270        tags.sequence: visit_sequence,
1271        tags.all: visit_all,
1272        tags.group: visit_group,
1273        tags.attribute: visit_attribute,
1274        tags.import_: visit_import,
1275        tags.include: visit_include,
1276        tags.annotation: visit_annotation,
1277        tags.attributeGroup: visit_attribute_group,
1278        tags.notation: visit_notation,
1279    }
1280
1281
1282def _process_occurs_attrs(node):
1283    """Process the min/max occurrence indicators"""
1284    max_occurs = node.get("maxOccurs", "1")
1285    min_occurs = int(node.get("minOccurs", "1"))
1286    if max_occurs == "unbounded":
1287        max_occurs = "unbounded"
1288    else:
1289        max_occurs = int(max_occurs)
1290
1291    return min_occurs, max_occurs
1292