1# 2# Copyright (c), 2016-2020, SISSA (International School for Advanced Studies). 3# All rights reserved. 4# This file is distributed under the terms of the MIT License. 5# See the file 'LICENSE' in the root directory of the present 6# distribution, or http://opensource.org/licenses/MIT. 7# 8# @author Davide Brunato <brunato@sissa.it> 9# 10""" 11This module contains XMLSchema classes creator for xmlschema package. 12 13Two schema classes are created at the end of this module, XMLSchema10 for XSD 1.0 and 14XMLSchema11 for XSD 1.1. The latter class parses also XSD 1.0 schemas, as prescribed by 15the standard. 16""" 17import sys 18if sys.version_info < (3, 7): 19 from typing import GenericMeta as ABCMeta 20else: 21 from abc import ABCMeta 22 23import os 24import logging 25import threading 26import warnings 27import re 28import sys 29from copy import copy 30from itertools import chain 31from typing import cast, Callable, ItemsView, List, Optional, Dict, Any, \ 32 Set, Union, Tuple, Type, Iterator, Counter 33 34from elementpath import XPathToken 35 36from ..exceptions import XMLSchemaTypeError, XMLSchemaKeyError, XMLSchemaRuntimeError, \ 37 XMLSchemaValueError, XMLSchemaNamespaceError 38from ..names import VC_MIN_VERSION, VC_MAX_VERSION, VC_TYPE_AVAILABLE, \ 39 VC_TYPE_UNAVAILABLE, VC_FACET_AVAILABLE, VC_FACET_UNAVAILABLE, XSD_NOTATION, \ 40 XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_GROUP, XSD_SIMPLE_TYPE, XSI_TYPE, \ 41 XSD_COMPLEX_TYPE, XSD_ELEMENT, XSD_SEQUENCE, XSD_CHOICE, XSD_ALL, XSD_ANY, \ 42 XSD_ANY_ATTRIBUTE, XSD_ANY_TYPE, XSD_NAMESPACE, XML_NAMESPACE, XSI_NAMESPACE, \ 43 VC_NAMESPACE, SCHEMAS_DIR, LOCATION_HINTS, XSD_ANNOTATION, XSD_INCLUDE, \ 44 XSD_IMPORT, XSD_REDEFINE, XSD_OVERRIDE, XSD_DEFAULT_OPEN_CONTENT, \ 45 XSD_ANY_SIMPLE_TYPE, XSD_UNION, XSD_LIST, XSD_RESTRICTION 46from ..etree import etree_element, ParseError 47from ..aliases import ElementType, XMLSourceType, NamespacesType, LocationsType, \ 48 SchemaType, SchemaSourceType, ConverterType, ComponentClassType, DecodeType, \ 49 EncodeType, BaseXsdType, AtomicValueType, ExtraValidatorType, SchemaGlobalType 50from ..helpers import prune_etree, get_namespace, get_qname 51from ..namespaces import NamespaceResourcesMap, NamespaceView 52from ..resources import is_local_url, is_remote_url, url_path_is_file, \ 53 normalize_locations, fetch_resource, normalize_url, XMLResource 54from ..converters import XMLSchemaConverter 55from ..xpath import XMLSchemaProtocol, XMLSchemaProxy, ElementPathMixin 56from .. import dataobjects 57 58from .exceptions import XMLSchemaParseError, XMLSchemaValidationError, XMLSchemaEncodeError, \ 59 XMLSchemaNotBuiltError, XMLSchemaIncludeWarning, XMLSchemaImportWarning 60from .helpers import get_xsd_derivation_attribute 61from .xsdbase import check_validation_mode, XsdValidator, XsdComponent, XsdAnnotation 62from .notations import XsdNotation 63from .identities import XsdIdentity, XsdKey, XsdKeyref, XsdUnique, \ 64 Xsd11Key, Xsd11Unique, Xsd11Keyref, IdentityCounter, KeyrefCounter, IdentityMapType 65from .facets import XSD_10_FACETS, XSD_11_FACETS 66from .simple_types import XsdSimpleType, XsdList, XsdUnion, XsdAtomicRestriction, \ 67 Xsd11AtomicRestriction, Xsd11Union 68from .attributes import XsdAttribute, XsdAttributeGroup, Xsd11Attribute 69from .complex_types import XsdComplexType, Xsd11ComplexType 70from .groups import XsdGroup, Xsd11Group 71from .elements import XsdElement, Xsd11Element 72from .wildcards import XsdAnyElement, XsdAnyAttribute, Xsd11AnyElement, \ 73 Xsd11AnyAttribute, XsdDefaultOpenContent 74from .global_maps import XsdGlobals 75 76logger = logging.getLogger('xmlschema') 77 78XSD_VERSION_PATTERN = re.compile(r'^\d+\.\d+$') 79DRIVE_PATTERN = re.compile(r'^[a-zA-Z]:$') 80 81# Elements for building dummy groups 82ATTRIBUTE_GROUP_ELEMENT = etree_element(XSD_ATTRIBUTE_GROUP) 83ANY_ATTRIBUTE_ELEMENT = etree_element( 84 XSD_ANY_ATTRIBUTE, attrib={'namespace': '##any', 'processContents': 'lax'} 85) 86SEQUENCE_ELEMENT = etree_element(XSD_SEQUENCE) 87ANY_ELEMENT = etree_element( 88 XSD_ANY, 89 attrib={ 90 'namespace': '##any', 91 'processContents': 'lax', 92 'minOccurs': '0', 93 'maxOccurs': 'unbounded' 94 }) 95 96GLOBAL_TAGS = frozenset((XSD_NOTATION, XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE, 97 XSD_ATTRIBUTE, XSD_ATTRIBUTE_GROUP, XSD_GROUP, XSD_ELEMENT)) 98 99 100class XMLSchemaMeta(ABCMeta): 101 XSD_VERSION: str 102 create_meta_schema: Callable[['XMLSchemaMeta', Optional[str]], SchemaType] 103 104 def __new__(mcs, name: str, bases: Tuple[Type[Any], ...], dict_: Dict[str, Any]) \ 105 -> 'XMLSchemaMeta': 106 assert bases, "a base class is mandatory" 107 base_class = bases[0] 108 109 # For backward compatibility (will be removed in v2.0) 110 if 'BUILDERS' in dict_: 111 msg = "'BUILDERS' will be removed in v2.0, provide the appropriate " \ 112 "attributes instead (eg. xsd_element_class = Xsd11Element)" 113 warnings.warn(msg, DeprecationWarning, stacklevel=1) 114 115 for k, v in dict_['BUILDERS'].items(): 116 if k == 'simple_type_factory': 117 dict_['simple_type_factory'] = staticmethod(v) 118 continue 119 120 attr_name = 'xsd_{}'.format(k) 121 if not hasattr(base_class, attr_name): 122 continue 123 elif getattr(base_class, attr_name) is not v: 124 dict_[attr_name] = v 125 126 if isinstance(dict_.get('meta_schema'), str): 127 # Build a new meta-schema class and register it into module's globals 128 meta_schema_file: str = dict_.pop('meta_schema') 129 meta_schema_class_name = 'Meta' + name 130 131 meta_schema: Optional[SchemaType] 132 meta_schema = getattr(base_class, 'meta_schema', None) 133 if meta_schema is None: 134 meta_bases = bases 135 else: 136 # Use base's meta_schema class as base for the new meta-schema 137 meta_bases = (meta_schema.__class__,) 138 if len(bases) > 1: 139 meta_bases += bases[1:] 140 141 meta_schema_class = super(XMLSchemaMeta, mcs).__new__( 142 mcs, meta_schema_class_name, meta_bases, dict_ 143 ) 144 meta_schema_class.__qualname__ = meta_schema_class_name 145 module = sys.modules[dict_['__module__']] 146 setattr(module, meta_schema_class_name, meta_schema_class) 147 148 meta_schema = meta_schema_class.create_meta_schema(meta_schema_file) 149 dict_['meta_schema'] = meta_schema 150 151 # Create the class and check some basic attributes 152 cls = super(XMLSchemaMeta, mcs).__new__(mcs, name, bases, dict_) 153 if cls.XSD_VERSION not in ('1.0', '1.1'): 154 raise XMLSchemaValueError("XSD_VERSION must be '1.0' or '1.1'") 155 return cls 156 157 158class XMLSchemaBase(XsdValidator, ElementPathMixin[Union[SchemaType, XsdElement]], 159 metaclass=XMLSchemaMeta): 160 """ 161 Base class for an XML Schema instance. 162 163 :param source: an URI that reference to a resource or a file path or a file-like \ 164 object or a string containing the schema or an Element or an ElementTree document \ 165 or an :class:`XMLResource` instance. A multi source initialization is supported \ 166 providing a not empty list of XSD sources. 167 :param namespace: is an optional argument that contains the URI of the namespace \ 168 that has to used in case the schema has no namespace (chameleon schema). For other \ 169 cases, when specified, it must be equal to the *targetNamespace* of the schema. 170 :param validation: the XSD validation mode to use for build the schema, \ 171 that can be 'strict' (default), 'lax' or 'skip'. 172 :param global_maps: is an optional argument containing an :class:`XsdGlobals` \ 173 instance, a mediator object for sharing declaration data between dependents \ 174 schema instances. 175 :param converter: is an optional argument that can be an :class:`XMLSchemaConverter` \ 176 subclass or instance, used for defining the default XML data converter for XML Schema instance. 177 :param locations: schema extra location hints, that can include custom resource locations \ 178 (eg. local XSD file instead of remote resource) or additional namespaces to import after \ 179 processing schema's import statements. Can be a dictionary or a sequence of couples \ 180 (namespace URI, resource URL). Extra locations passed using a tuple container are not \ 181 normalized. 182 :param base_url: is an optional base URL, used for the normalization of relative paths \ 183 when the URL of the schema resource can't be obtained from the source argument. 184 :param allow: defines the security mode for accessing resource locations. Can be \ 185 'all', 'remote', 'local' or 'sandbox'. Default is 'all' that means all types of \ 186 URLs are allowed. With 'remote' only remote resource URLs are allowed. With 'local' \ 187 only file paths and URLs are allowed. With 'sandbox' only file paths and URLs that \ 188 are under the directory path identified by source or by the *base_url* argument \ 189 are allowed. 190 :param defuse: defines when to defuse XML data using a `SafeXMLParser`. Can be \ 191 'always', 'remote' or 'never'. For default defuses only remote XML data. 192 :param timeout: the timeout in seconds for fetching resources. Default is `300`. 193 :param build: defines whether build the schema maps. Default is `True`. 194 :param use_meta: if `True` the schema processor uses the validator meta-schema, \ 195 otherwise a new meta-schema is added at the end. In the latter case the meta-schema \ 196 is rebuilt if any base namespace has been overridden by an import. Ignored if the \ 197 argument *global_maps* is provided. 198 :param use_fallback: if `True` the schema processor uses the validator fallback \ 199 location hints to load well-known namespaces (eg. xhtml). 200 :param loglevel: for setting a different logging level for schema initialization \ 201 and building. For default is WARNING (30). For INFO level set it with 20, for \ 202 DEBUG level with 10. The default loglevel is restored after schema building, \ 203 when exiting the initialization method. 204 205 :cvar XSD_VERSION: store the XSD version (1.0 or 1.1). 206 :cvar BASE_SCHEMAS: a dictionary from namespace to schema resource for meta-schema bases. 207 :cvar fallback_locations: fallback schema location hints for other standard namespaces. 208 :cvar meta_schema: the XSD meta-schema instance. 209 :cvar attribute_form_default: the schema's *attributeFormDefault* attribute. \ 210 Default is 'unqualified'. 211 :cvar element_form_default: the schema's *elementFormDefault* attribute. \ 212 Default is 'unqualified'. 213 :cvar block_default: the schema's *blockDefault* attribute. Default is ''. 214 :cvar final_default: the schema's *finalDefault* attribute. Default is ''. 215 :cvar default_attributes: the XSD 1.1 schema's *defaultAttributes* attribute. \ 216 Default is ``None``. 217 :cvar xpath_tokens: symbol table for schema bound XPath 2.0 parsers. Initially set to \ 218 ``None`` it's redefined at instance level with a dictionary at first use of the XPath \ 219 selector. The parser symbol table is extended with schema types constructors. 220 221 :ivar target_namespace: is the *targetNamespace* of the schema, the namespace to which \ 222 belong the declarations/definitions of the schema. If it's empty no namespace is associated \ 223 with the schema. In this case the schema declarations can be reused from other namespaces as \ 224 *chameleon* definitions. 225 :ivar validation: validation mode, can be 'strict', 'lax' or 'skip'. 226 :ivar maps: XSD global declarations/definitions maps. This is an instance of \ 227 :class:`XsdGlobal`, that stores the *global_maps* argument or a new object \ 228 when this argument is not provided. 229 :ivar converter: the default converter used for XML data decoding/encoding. 230 :ivar locations: schema location hints. 231 :ivar namespaces: a dictionary that maps from the prefixes used by the schema \ 232 into namespace URI. 233 :ivar imports: a dictionary of namespace imports of the schema, that maps namespace \ 234 URI to imported schema object, or `None` in case of unsuccessful import. 235 :ivar includes: a dictionary of included schemas, that maps a schema location to an \ 236 included schema. It also comprehend schemas included by "xs:redefine" or \ 237 "xs:override" statements. 238 :ivar warnings: warning messages about failure of import and include elements. 239 240 :ivar notations: `xsd:notation` declarations. 241 :vartype notations: NamespaceView 242 :ivar types: `xsd:simpleType` and `xsd:complexType` global declarations. 243 :vartype types: NamespaceView 244 :ivar attributes: `xsd:attribute` global declarations. 245 :vartype attributes: NamespaceView 246 :ivar attribute_groups: `xsd:attributeGroup` definitions. 247 :vartype attribute_groups: NamespaceView 248 :ivar groups: `xsd:group` global definitions. 249 :vartype groups: NamespaceView 250 :ivar elements: `xsd:element` global declarations. 251 :vartype elements: NamespaceView 252 """ 253 # Instance attributes annotations 254 source: XMLResource 255 namespaces: NamespacesType 256 converter: Union[ConverterType] 257 locations: NamespaceResourcesMap 258 maps: XsdGlobals 259 imports: Dict[str, Optional[SchemaType]] 260 includes: Dict[str, SchemaType] 261 warnings: List[str] 262 263 notations: NamespaceView[XsdNotation] 264 types: NamespaceView[BaseXsdType] 265 attributes: NamespaceView[XsdAttribute] 266 attribute_groups: NamespaceView[XsdAttributeGroup] 267 groups: NamespaceView[XsdGroup] 268 elements: NamespaceView[XsdElement] 269 substitution_groups: NamespaceView[List[XsdElement]] 270 identities: NamespaceView[XsdIdentity] 271 272 XSD_VERSION: str = '1.0' 273 meta_schema: Optional['XMLSchemaBase'] = None 274 BASE_SCHEMAS: Dict[str, str] = {} 275 fallback_locations: Dict[str, str] = LOCATION_HINTS.copy() 276 _locations: Tuple[Tuple[str, str], ...] = () 277 _annotations = None 278 279 # XSD components classes 280 xsd_notation_class = XsdNotation 281 xsd_complex_type_class = XsdComplexType 282 xsd_attribute_class = XsdAttribute 283 xsd_any_attribute_class = XsdAnyAttribute 284 xsd_attribute_group_class = XsdAttributeGroup 285 xsd_group_class = XsdGroup 286 xsd_element_class = XsdElement 287 xsd_any_class = XsdAnyElement 288 xsd_atomic_restriction_class = XsdAtomicRestriction 289 xsd_list_class = XsdList 290 xsd_union_class = XsdUnion 291 xsd_key_class = XsdKey 292 xsd_keyref_class = XsdKeyref 293 xsd_unique_class = XsdUnique 294 295 # Schema defaults 296 target_namespace = '' 297 attribute_form_default = 'unqualified' 298 element_form_default = 'unqualified' 299 block_default = '' 300 final_default = '' 301 redefine = None 302 303 # Additional defaults for XSD 1.1 304 default_attributes: Optional[Union[str, XsdAttributeGroup]] = None 305 default_open_content = None 306 override = None 307 308 # Store XPath constructors tokens (for schema and its assertions) 309 xpath_tokens: Optional[Dict[str, Type[XPathToken]]] = None 310 311 def __init__(self, source: Union[SchemaSourceType, List[SchemaSourceType]], 312 namespace: Optional[str] = None, 313 validation: str = 'strict', 314 global_maps: Optional[XsdGlobals] = None, 315 converter: Optional[ConverterType] = None, 316 locations: Optional[LocationsType] = None, 317 base_url: Optional[str] = None, 318 allow: str = 'all', 319 defuse: str = 'remote', 320 timeout: int = 300, 321 build: bool = True, 322 use_meta: bool = True, 323 use_fallback: bool = True, 324 loglevel: Optional[Union[str, int]] = None) -> None: 325 326 super(XMLSchemaBase, self).__init__(validation) 327 self.lock = threading.Lock() # Lock for build operations 328 329 if loglevel is not None: 330 if isinstance(loglevel, str): 331 level = loglevel.strip().upper() 332 if level not in {'DEBUG', 'INFO', 'WARN', 'WARNING', 'ERROR', 'CRITICAL'}: 333 raise XMLSchemaValueError("{!r} is not a valid loglevel".format(loglevel)) 334 logger.setLevel(getattr(logging, level)) 335 else: 336 logger.setLevel(loglevel) 337 elif build and global_maps is None: 338 logger.setLevel(logging.WARNING) 339 340 if allow == 'sandbox' and base_url is None and is_local_url(source): 341 # Allow sandbox mode without a base_url using the initial schema URL as base 342 assert isinstance(source, str) 343 base_url = os.path.dirname(normalize_url(source)) 344 345 other_sources: List[SchemaSourceType] 346 if isinstance(source, list): 347 if not source: 348 raise XMLSchemaValueError("no XSD source provided!") 349 other_sources = source[1:] 350 source = source[0] 351 else: 352 other_sources = [] 353 354 if isinstance(source, XMLResource): 355 self.source = source 356 else: 357 self.source = XMLResource(source, base_url, allow, defuse, timeout) 358 359 logger.debug("Read schema from %r", self.source.url or self.source.source) 360 361 self.imports = {} 362 self.includes = {} 363 self.warnings = [] 364 self._root_elements = None # type: Optional[Set[str]] 365 366 self.name = self.source.name 367 root = self.source.root 368 369 # Initialize schema's namespaces, the XML namespace is implicitly declared. 370 self.namespaces = self.source.get_namespaces({'xml': XML_NAMESPACE}, root_only=True) 371 372 if 'targetNamespace' in root.attrib: 373 self.target_namespace = root.attrib['targetNamespace'].strip() 374 if not self.target_namespace: 375 # https://www.w3.org/TR/2004/REC-xmlschema-1-20041028/structures.html#element-schema 376 self.parse_error("the attribute 'targetNamespace' cannot be an empty string", root) 377 elif namespace is not None and self.target_namespace != namespace: 378 msg = "wrong namespace (%r instead of %r) for XSD resource %s" 379 self.parse_error(msg % (self.target_namespace, namespace, self.url), root) 380 381 if not self.target_namespace and namespace is not None: 382 # Chameleon schema case 383 self.target_namespace = namespace 384 if '' not in self.namespaces: 385 self.namespaces[''] = namespace 386 387 elif '' not in self.namespaces: 388 # If not declared map the default namespace to no namespace 389 self.namespaces[''] = '' 390 391 logger.debug("Schema targetNamespace is %r", self.target_namespace) 392 logger.debug("Declared namespaces: %r", self.namespaces) 393 394 # Parses the schema defaults 395 if 'attributeFormDefault' in root.attrib: 396 self.attribute_form_default = root.attrib['attributeFormDefault'] 397 398 if 'elementFormDefault' in root.attrib: 399 self.element_form_default = root.attrib['elementFormDefault'] 400 401 if 'blockDefault' in root.attrib: 402 if self.meta_schema is None: 403 pass # Skip for XSD 1.0 meta-schema that has blockDefault="#all" 404 else: 405 try: 406 self.block_default = get_xsd_derivation_attribute( 407 root, 'blockDefault', {'extension', 'restriction', 'substitution'} 408 ) 409 except ValueError as err: 410 self.parse_error(err, root) 411 412 if 'finalDefault' in root.attrib: 413 try: 414 self.final_default = get_xsd_derivation_attribute(root, 'finalDefault') 415 except ValueError as err: 416 self.parse_error(err, root) 417 418 if converter is None: 419 self.converter = XMLSchemaConverter 420 else: 421 self.converter = self.get_converter(converter) 422 423 if self.meta_schema is None: 424 self.locations = NamespaceResourcesMap() 425 426 # Meta-schema maps creation (MetaXMLSchema10/11 classes) 427 self.maps = global_maps or XsdGlobals(self) 428 for child in self.source.root: 429 if child.tag == XSD_OVERRIDE: 430 self.include_schema(child.attrib['schemaLocation'], self.base_url) 431 return # Meta-schemas don't need to be checked and don't process imports 432 433 # Completes the namespaces map with internal declarations, remapping same prefixes. 434 self.namespaces = self.source.get_namespaces(self.namespaces) 435 436 if locations: 437 if isinstance(locations, tuple): 438 self._locations = locations 439 else: 440 self._locations = tuple(normalize_locations(locations, self.base_url)) 441 442 self.locations = NamespaceResourcesMap(self.source.get_locations(self._locations)) 443 if not use_fallback: 444 self.fallback_locations = {} 445 446 with self.meta_schema.lock: 447 if not self.meta_schema.maps.types: 448 self.meta_schema.maps.build() 449 450 # Create or set the XSD global maps instance 451 if isinstance(global_maps, XsdGlobals): 452 self.maps = global_maps 453 elif global_maps is not None: 454 raise XMLSchemaTypeError("'global_maps' argument must be an %r instance" % XsdGlobals) 455 elif use_meta and self.target_namespace not in self.meta_schema.maps.namespaces: 456 self.maps = self.meta_schema.maps.copy(self, validation) 457 else: 458 self.maps = XsdGlobals(self, validation) 459 460 if any(ns == VC_NAMESPACE for ns in self.namespaces.values()): 461 # For XSD 1.1+ apply versioning filter to schema tree. See the paragraph 462 # 4.2.2 of XSD 1.1 (Part 1: Structures) definition for details. 463 # Ref: https://www.w3.org/TR/xmlschema11-1/#cip 464 if prune_etree(root, selector=lambda x: not self.version_check(x)): 465 for k in list(root.attrib): 466 if k not in {'targetNamespace', VC_MIN_VERSION, VC_MAX_VERSION}: 467 del root.attrib[k] 468 469 # Validate the schema document (transforming validation errors to parse errors) 470 if validation != 'skip': 471 for e in self.meta_schema.iter_errors(root, namespaces=self.namespaces): 472 self.parse_error(e.reason or e, elem=e.elem) 473 474 self._parse_inclusions() 475 self._parse_imports() 476 477 # Imports by argument (usually from xsi:schemaLocation attribute). 478 for ns in self.locations: 479 if ns not in self.maps.namespaces: 480 self._import_namespace(ns, self.locations[ns]) 481 482 # XSD 1.1 default declarations (defaultAttributes, defaultOpenContent, 483 # xpathDefaultNamespace) 484 if self.XSD_VERSION > '1.0': 485 self.xpath_default_namespace = self._parse_xpath_default_namespace(root) 486 if 'defaultAttributes' in root.attrib: 487 try: 488 self.default_attributes = self.resolve_qname(root.attrib['defaultAttributes']) 489 except (ValueError, KeyError, RuntimeError) as err: 490 self.parse_error(err, root) 491 492 for child in root: 493 if child.tag == XSD_DEFAULT_OPEN_CONTENT: 494 self.default_open_content = XsdDefaultOpenContent(child, self) 495 break 496 497 _source: Union[SchemaSourceType, XMLResource] 498 for _source in other_sources: 499 if not isinstance(_source, XMLResource): 500 _source = XMLResource(_source, base_url, allow, defuse, timeout) 501 502 if not _source.root.get('targetNamespace') and self.target_namespace: 503 # Adding a chameleon schema: set the namespace with targetNamespace 504 self.add_schema(_source, namespace=self.target_namespace) 505 else: 506 self.add_schema(_source) 507 508 try: 509 if build: 510 self.maps.build() 511 finally: 512 if loglevel is not None: 513 logger.setLevel(logging.WARNING) # Restore default logging 514 515 def __getstate__(self) -> Dict[str, Any]: 516 state = self.__dict__.copy() 517 state.pop('lock', None) 518 state.pop('xpath_tokens', None) 519 return state 520 521 def __setstate__(self, state: Dict[str, Any]) -> None: 522 self.__dict__.update(state) 523 self.lock = threading.Lock() 524 525 def __repr__(self) -> str: 526 if self.url: 527 return '%s(name=%r, namespace=%r)' % ( 528 self.__class__.__name__, self.name, self.target_namespace 529 ) 530 return '%s(namespace=%r)' % (self.__class__.__name__, self.target_namespace) 531 532 def __setattr__(self, name: str, value: Any) -> None: 533 if name == 'maps': 534 if self.meta_schema is None and hasattr(self, 'maps'): 535 msg = "cannot change the global maps instance of a meta-schema" 536 raise XMLSchemaValueError(msg) 537 538 super(XMLSchemaBase, self).__setattr__(name, value) 539 self.notations = NamespaceView(value.notations, self.target_namespace) 540 self.types = NamespaceView(value.types, self.target_namespace) 541 self.attributes = NamespaceView(value.attributes, self.target_namespace) 542 self.attribute_groups = NamespaceView(value.attribute_groups, 543 self.target_namespace) 544 self.groups = NamespaceView(value.groups, self.target_namespace) 545 self.elements = NamespaceView(value.elements, self.target_namespace) 546 self.substitution_groups = NamespaceView(value.substitution_groups, 547 self.target_namespace) 548 self.identities = NamespaceView(value.identities, self.target_namespace) 549 value.register(self) 550 else: 551 if name == 'validation': 552 check_validation_mode(value) 553 super(XMLSchemaBase, self).__setattr__(name, value) 554 555 def __iter__(self) -> Iterator[XsdElement]: 556 yield from sorted(self.elements.values(), key=lambda x: x.name) 557 558 def __reversed__(self) -> Iterator[XsdElement]: 559 yield from sorted(self.elements.values(), key=lambda x: x.name, reverse=True) 560 561 def __len__(self) -> int: 562 return len(self.elements) 563 564 @property 565 def xpath_proxy(self) -> XMLSchemaProxy: 566 return XMLSchemaProxy(cast(XMLSchemaProtocol, self)) 567 568 @property 569 def xsd_version(self) -> str: 570 """Compatibility property that returns the class attribute XSD_VERSION.""" 571 return self.XSD_VERSION 572 573 # XML resource attributes access 574 @property 575 def root(self) -> ElementType: 576 """Root element of the schema.""" 577 return self.source.root 578 579 def get_text(self) -> str: 580 """Returns the source text of the XSD schema.""" 581 return self.source.get_text() 582 583 @property 584 def url(self) -> Optional[str]: 585 """Schema resource URL, is `None` if the schema is built from an Element or a string.""" 586 return self.source.url 587 588 @property 589 def base_url(self) -> Optional[str]: 590 """The base URL of the source of the schema.""" 591 return self.source.base_url 592 593 @property 594 def filepath(self) -> Optional[str]: 595 """The filepath if the schema is loaded from a local XSD file, `None` otherwise.""" 596 return self.source.filepath 597 598 @property 599 def allow(self) -> str: 600 """Defines the resource access security mode, can be 'all', 'local' or 'sandbox'.""" 601 return self.source.allow 602 603 @property 604 def defuse(self) -> str: 605 """Defines when to defuse XML data, can be 'always', 'remote' or 'never'.""" 606 return self.source.defuse 607 608 @property 609 def timeout(self) -> int: 610 """Timeout in seconds for fetching resources.""" 611 return self.source.timeout 612 613 @property 614 def use_meta(self) -> bool: 615 """Returns `True` if the class meta-schema is used.""" 616 return self.meta_schema is self.__class__.meta_schema 617 618 # Schema root attributes 619 @property 620 def tag(self) -> str: 621 """Schema root tag. For compatibility with the ElementTree API.""" 622 return self.source.root.tag 623 624 @property 625 def id(self) -> Optional[str]: 626 """The schema's *id* attribute, defaults to ``None``.""" 627 return self.source.root.get('id') 628 629 @property 630 def version(self) -> Optional[str]: 631 """The schema's *version* attribute, defaults to ``None``.""" 632 return self.source.root.get('version') 633 634 @property 635 def schema_location(self) -> List[Tuple[str, str]]: 636 """ 637 A list of location hints extracted from the *xsi:schemaLocation* attribute of the schema. 638 """ 639 return [(k, v) for k, v in self.source.iter_location_hints() if k] 640 641 @property 642 def no_namespace_schema_location(self) -> Optional[str]: 643 """ 644 A location hint extracted from the *xsi:noNamespaceSchemaLocation* attribute of the schema. 645 """ 646 for k, v in self.source.iter_location_hints(): 647 if not k: 648 return v 649 return None 650 651 @property 652 def default_namespace(self) -> Optional[str]: 653 """The namespace associated to the empty prefix ''.""" 654 return self.namespaces.get('') 655 656 @property 657 def target_prefix(self) -> str: 658 """The prefix associated to the *targetNamespace*.""" 659 for prefix, namespace in self.namespaces.items(): 660 if namespace == self.target_namespace: 661 return prefix 662 return '' 663 664 @classmethod 665 def builtin_types(cls) -> NamespaceView[BaseXsdType]: 666 """Returns the XSD built-in types of the meta-schema.""" 667 if cls.meta_schema is None: 668 raise XMLSchemaRuntimeError("meta-schema unavailable for %r" % cls) 669 670 try: 671 meta_schema: SchemaType = cls.meta_schema.maps.namespaces[XSD_NAMESPACE][0] 672 builtin_types = meta_schema.types 673 except KeyError: 674 raise XMLSchemaNotBuiltError(cls.meta_schema, "missing XSD namespace in meta-schema") 675 else: 676 if not builtin_types: 677 cls.meta_schema.build() 678 return builtin_types 679 680 @property 681 def annotations(self) -> List[XsdAnnotation]: 682 if self._annotations is None: 683 self._annotations = [ 684 XsdAnnotation(child, self) for child in self.source.root 685 if child.tag == XSD_ANNOTATION 686 ] 687 return self._annotations 688 689 @property 690 def root_elements(self) -> List[XsdElement]: 691 """ 692 The list of global elements that are not used by reference in any model of the schema. 693 This is implemented as lazy property because it's computationally expensive to build 694 when the schema model is complex. 695 """ 696 if not self.elements: 697 return [] 698 elif len(self.elements) == 1: 699 return list(self.elements.values()) 700 elif self._root_elements is None: 701 names = set(e.name for e in self.elements.values()) 702 for xsd_element in self.elements.values(): 703 for e in xsd_element.iter(): 704 if e is xsd_element or isinstance(e, XsdAnyElement): 705 continue 706 elif e.ref or e.parent is None: 707 if e.name in names: 708 names.discard(e.name) 709 if not names: 710 break 711 self._root_elements = set(names) 712 713 assert self._root_elements is not None 714 return [e for e in self.elements.values() if e.name in self._root_elements] 715 716 @property 717 def simple_types(self) -> List[XsdSimpleType]: 718 """Returns a list containing the global simple types.""" 719 return [x for x in self.types.values() if isinstance(x, XsdSimpleType)] 720 721 @property 722 def complex_types(self) -> List[XsdComplexType]: 723 """Returns a list containing the global complex types.""" 724 return [x for x in self.types.values() if isinstance(x, XsdComplexType)] 725 726 @classmethod 727 def create_meta_schema(cls, source: Optional[str] = None, 728 base_schemas: Union[None, Dict[str, str], 729 List[Tuple[str, str]]] = None, 730 global_maps: Optional[XsdGlobals] = None) -> SchemaType: 731 """ 732 Creates a new meta-schema instance. 733 734 :param source: an optional argument referencing to or containing the XSD meta-schema \ 735 resource. Required if the schema class doesn't already have a meta-schema. 736 :param base_schemas: an optional dictionary that contains namespace URIs and \ 737 schema locations. If provided is used as substitute for class BASE_SCHEMAS. \ 738 Also a sequence of (namespace, location) items can be provided if there are more \ 739 schema documents for one or more namespaces. 740 :param global_maps: is an optional argument containing an :class:`XsdGlobals` \ 741 instance for the new meta schema. If not provided a new map is created. 742 """ 743 if source is None: 744 if cls.meta_schema is None or cls.meta_schema.url: 745 raise XMLSchemaValueError("Missing meta-schema source URL") 746 source = cast(str, cls.meta_schema.url) 747 748 _base_schemas: Union[ItemsView[str, str], List[Tuple[str, str]]] 749 if base_schemas is None: 750 _base_schemas = cls.BASE_SCHEMAS.items() 751 elif isinstance(base_schemas, dict): 752 _base_schemas = base_schemas.items() 753 else: 754 try: 755 _base_schemas = [(n, l) for n, l in base_schemas] 756 except ValueError: 757 raise ValueError( 758 "The argument 'base_schemas' is not a dictionary nor a sequence of items" 759 ) 760 761 meta_schema: SchemaType 762 meta_schema_class = cls if cls.meta_schema is None else cls.meta_schema.__class__ 763 meta_schema = meta_schema_class(source, XSD_NAMESPACE, global_maps=global_maps, 764 defuse='never', build=False) 765 for ns, location in _base_schemas: 766 if ns == XSD_NAMESPACE: 767 meta_schema.include_schema(location=location) 768 else: 769 meta_schema.import_schema(namespace=ns, location=location) 770 return meta_schema 771 772 def simple_type_factory(self, elem: ElementType, 773 schema: Optional[SchemaType] = None, 774 parent: Optional[XsdComponent] = None) -> XsdSimpleType: 775 """ 776 Factory function for XSD simple types. Parses the xs:simpleType element and its 777 child component, that can be a restriction, a list or an union. Annotations are 778 linked to simple type instance, omitting the inner annotation if both are given. 779 """ 780 if schema is None: 781 schema = self 782 783 annotation = None 784 try: 785 child = elem[0] 786 except IndexError: 787 return cast(XsdSimpleType, self.maps.types[XSD_ANY_SIMPLE_TYPE]) 788 else: 789 if child.tag == XSD_ANNOTATION: 790 annotation = XsdAnnotation(child, schema, parent) 791 try: 792 child = elem[1] 793 except IndexError: 794 self.parse_error("(restriction | list | union) expected", elem) 795 return cast(XsdSimpleType, self.maps.types[XSD_ANY_SIMPLE_TYPE]) 796 797 xsd_type: XsdSimpleType 798 if child.tag == XSD_RESTRICTION: 799 xsd_type = self.xsd_atomic_restriction_class(child, schema, parent) 800 elif child.tag == XSD_LIST: 801 xsd_type = self.xsd_list_class(child, schema, parent) 802 elif child.tag == XSD_UNION: 803 xsd_type = self.xsd_union_class(child, schema, parent) 804 else: 805 self.parse_error("(restriction | list | union) expected", elem) 806 return cast(XsdSimpleType, self.maps.types[XSD_ANY_SIMPLE_TYPE]) 807 808 if annotation is not None: 809 xsd_type._annotation = annotation 810 811 try: 812 xsd_type.name = get_qname(self.target_namespace, elem.attrib['name']) 813 except KeyError: 814 if parent is None: 815 self.parse_error("missing attribute 'name' in a global simpleType", elem) 816 xsd_type.name = 'nameless_%s' % str(id(xsd_type)) 817 else: 818 if parent is not None: 819 self.parse_error("attribute 'name' not allowed for a local simpleType", elem) 820 xsd_type.name = None 821 822 if 'final' in elem.attrib: 823 try: 824 xsd_type._final = get_xsd_derivation_attribute(elem, 'final') 825 except ValueError as err: 826 xsd_type.parse_error(err, elem) 827 828 return xsd_type 829 830 def create_any_content_group(self, parent: Union[XsdComplexType, XsdGroup], 831 any_element: Optional[XsdAnyElement] = None) -> XsdGroup: 832 """ 833 Creates a model group related to schema instance that accepts any content. 834 835 :param parent: the parent component to set for the any content group. 836 :param any_element: an optional any element to use for the content group. \ 837 When provided it's copied, linked to the group and the minOccurs/maxOccurs \ 838 are set to 0 and 'unbounded'. 839 """ 840 group: XsdGroup = self.xsd_group_class(SEQUENCE_ELEMENT, self, parent) 841 842 if isinstance(any_element, XsdAnyElement): 843 particle = any_element.copy() 844 particle.min_occurs = 0 845 particle.max_occurs = None 846 particle.parent = group 847 group.append(particle) 848 else: 849 group.append(self.xsd_any_class(ANY_ELEMENT, self, group)) 850 851 return group 852 853 def create_empty_content_group(self, parent: Union[XsdComplexType, XsdGroup], 854 model: str = 'sequence', **attrib: Any) -> XsdGroup: 855 if model == 'sequence': 856 group_elem = etree_element(XSD_SEQUENCE, **attrib) 857 elif model == 'choice': 858 group_elem = etree_element(XSD_CHOICE, **attrib) 859 elif model == 'all': 860 group_elem = etree_element(XSD_ALL, **attrib) 861 else: 862 raise XMLSchemaValueError("'model' argument must be (sequence | choice | all)") 863 864 group_elem.text = '\n ' 865 return self.xsd_group_class(group_elem, self, parent) 866 867 def create_any_attribute_group(self, parent: Union[XsdComplexType, XsdElement]) \ 868 -> XsdAttributeGroup: 869 """ 870 Creates an attribute group related to schema instance that accepts any attribute. 871 872 :param parent: the parent component to set for the any attribute group. 873 """ 874 attribute_group = self.xsd_attribute_group_class( 875 ATTRIBUTE_GROUP_ELEMENT, self, parent 876 ) 877 attribute_group[None] = self.xsd_any_attribute_class( 878 ANY_ATTRIBUTE_ELEMENT, self, attribute_group 879 ) 880 return attribute_group 881 882 def create_empty_attribute_group(self, parent: Union[XsdComplexType, XsdElement]) \ 883 -> XsdAttributeGroup: 884 """ 885 Creates an empty attribute group related to schema instance. 886 887 :param parent: the parent component to set for the any attribute group. 888 """ 889 return self.xsd_attribute_group_class(ATTRIBUTE_GROUP_ELEMENT, self, parent) 890 891 def create_any_type(self) -> XsdComplexType: 892 """ 893 Creates an xs:anyType equivalent type related with the wildcards 894 connected to global maps of the schema instance in order to do a 895 correct namespace lookup during wildcards validation. 896 """ 897 schema = self.meta_schema or self 898 any_type = self.xsd_complex_type_class( 899 elem=etree_element(XSD_COMPLEX_TYPE, name=XSD_ANY_TYPE), 900 schema=schema, parent=None, mixed=True, block='', final='' 901 ) 902 assert isinstance(any_type.content, XsdGroup) 903 any_type.content.append(self.xsd_any_class( 904 ANY_ELEMENT, schema, any_type.content 905 )) 906 any_type.attributes[None] = self.xsd_any_attribute_class( 907 ANY_ATTRIBUTE_ELEMENT, schema, any_type.attributes 908 ) 909 any_type.maps = any_type.content.maps = any_type.content[0].maps = \ 910 any_type.attributes[None].maps = self.maps 911 return any_type 912 913 def create_element(self, name: str, parent: Optional[XsdComponent] = None, 914 text: Optional[str] = None, **attrib: Any) -> XsdElement: 915 """ 916 Creates an xs:element instance related to schema component. 917 Used as dummy element for validation/decoding/encoding 918 operations of wildcards and complex types. 919 """ 920 elem = etree_element(XSD_ELEMENT, name=name, **attrib) 921 if text is not None: 922 elem.text = text 923 return self.xsd_element_class(elem=elem, schema=self, parent=parent) 924 925 def copy(self) -> SchemaType: 926 """ 927 Makes a copy of the schema instance. The new instance has independent maps 928 of shared XSD components. 929 """ 930 schema: SchemaType = object.__new__(self.__class__) 931 schema.__dict__.update(self.__dict__) 932 schema.source = copy(self.source) 933 schema.errors = self.errors[:] 934 schema.warnings = self.warnings[:] 935 schema.namespaces = dict(self.namespaces) 936 schema.locations = NamespaceResourcesMap(self.locations) 937 schema.imports = self.imports.copy() 938 schema.includes = self.includes.copy() 939 schema.maps = self.maps.copy(validator=schema) 940 return schema 941 942 __copy__ = copy 943 944 @classmethod 945 def check_schema(cls, schema: SchemaType, 946 namespaces: Optional[NamespacesType] = None) -> None: 947 """ 948 Validates the given schema against the XSD meta-schema (:attr:`meta_schema`). 949 950 :param schema: the schema instance that has to be validated. 951 :param namespaces: is an optional mapping from namespace prefix to URI. 952 953 :raises: :exc:`XMLSchemaValidationError` if the schema is invalid. 954 """ 955 if cls.meta_schema is None: 956 raise XMLSchemaRuntimeError("meta-schema unavailable for %r" % cls) 957 elif not cls.meta_schema.maps.types: 958 cls.meta_schema.maps.build() 959 960 for error in cls.meta_schema.iter_errors(schema.source, namespaces=namespaces): 961 raise error 962 963 def check_validator(self, validation: str = 'strict') -> None: 964 """Checks the status of a schema validator against a validation mode.""" 965 check_validation_mode(validation) 966 967 if self.built: 968 pass 969 elif self.meta_schema is None: 970 self.build() # Meta-schema lazy build 971 elif validation == 'skip' and self.validation == 'skip' and \ 972 any(isinstance(comp, tuple) or comp.validation_attempted == 'partial' 973 for comp in self.iter_globals()): 974 pass 975 else: 976 raise XMLSchemaNotBuiltError(self, "schema %r is not built" % self) 977 978 def build(self) -> None: 979 """Builds the schema's XSD global maps.""" 980 self.maps.build() 981 982 def clear(self) -> None: 983 """Clears the schema's XSD global maps.""" 984 self.maps.clear() 985 986 @property 987 def built(self) -> bool: 988 if any(not isinstance(g, XsdComponent) or not g.built for g in self.iter_globals()): 989 return False 990 for _ in self.iter_globals(): 991 return True 992 if self.meta_schema is None: 993 return False 994 995 # No XSD globals: check with a lookup of schema child elements. 996 prefix = '{%s}' % self.target_namespace if self.target_namespace else '' 997 for child in self.source.root: 998 if child.tag in {XSD_REDEFINE, XSD_OVERRIDE}: 999 for e in filter(lambda x: x.tag in GLOBAL_TAGS, child): 1000 name = e.get('name') 1001 if name is not None: 1002 try: 1003 if not self.maps.lookup(e.tag, prefix + name if prefix else name).built: 1004 return False 1005 except KeyError: 1006 return False 1007 elif child.tag in GLOBAL_TAGS: 1008 name = child.get('name') 1009 if name is not None: 1010 try: 1011 if not self.maps.lookup(child.tag, prefix + name if prefix else name).built: 1012 return False 1013 except KeyError: 1014 return False 1015 return True 1016 1017 @property 1018 def validation_attempted(self) -> str: 1019 if self.built: 1020 return 'full' 1021 elif any(isinstance(comp, tuple) or comp.validation_attempted == 'partial' 1022 for comp in self.iter_globals()): 1023 return 'partial' 1024 else: 1025 return 'none' 1026 1027 def iter_globals(self, schema: Optional[SchemaType] = None) \ 1028 -> Iterator[Union[SchemaGlobalType, Tuple[Any, ...]]]: 1029 """ 1030 Creates an iterator for XSD global definitions/declarations related to schema namespace. 1031 1032 :param schema: Optional argument for filtering only globals related to a schema instance. 1033 """ 1034 if schema is None: 1035 yield from self.notations.values() 1036 yield from self.types.values() 1037 yield from self.attributes.values() 1038 yield from self.attribute_groups.values() 1039 yield from self.groups.values() 1040 yield from self.elements.values() 1041 else: 1042 def schema_filter(x: Union[XsdComponent, Tuple[ElementType, SchemaType]]) -> bool: 1043 if isinstance(x, tuple): 1044 return x[1] is schema 1045 return x.schema is schema 1046 1047 yield from filter(schema_filter, self.notations.values()) 1048 yield from filter(schema_filter, self.types.values()) 1049 yield from filter(schema_filter, self.attributes.values()) 1050 yield from filter(schema_filter, self.attribute_groups.values()) 1051 yield from filter(schema_filter, self.groups.values()) 1052 yield from filter(schema_filter, self.elements.values()) 1053 1054 def iter_components(self, xsd_classes: ComponentClassType = None) \ 1055 -> Iterator[Union[XsdComponent, SchemaType]]: 1056 """ 1057 Iterates yielding the schema and its components. For default 1058 includes all the relevant components of the schema, excluding 1059 only facets and empty attribute groups. The first returned 1060 component is the schema itself. 1061 1062 :param xsd_classes: provide a class or a tuple of classes to \ 1063 restrict the range of component types yielded. 1064 """ 1065 if xsd_classes is None or isinstance(self, xsd_classes): 1066 yield self 1067 for xsd_global in self.iter_globals(self): 1068 if not isinstance(xsd_global, tuple): 1069 yield from xsd_global.iter_components(xsd_classes) 1070 1071 def get_schema(self, namespace: str) -> SchemaType: 1072 """ 1073 Returns the first schema loaded for a namespace. Raises a 1074 `KeyError` if the requested namespace is not loaded. 1075 """ 1076 try: 1077 return cast(SchemaType, self.maps.namespaces[namespace][0]) 1078 except KeyError: 1079 if not namespace: 1080 return self 1081 raise XMLSchemaKeyError('the namespace {!r} is not loaded'.format(namespace)) from None 1082 1083 def get_converter(self, converter: Optional[ConverterType] = None, 1084 **kwargs: Any) -> XMLSchemaConverter: 1085 """ 1086 Returns a new converter instance. 1087 1088 :param converter: can be a converter class or instance. If it's an instance \ 1089 the new instance is copied from it and configured with the provided arguments. 1090 :param kwargs: optional arguments for initialize the converter instance. 1091 :return: a converter instance. 1092 """ 1093 if converter is None: 1094 converter = self.converter 1095 1096 if isinstance(converter, XMLSchemaConverter): 1097 return converter.copy(**kwargs) 1098 elif issubclass(converter, XMLSchemaConverter): 1099 # noinspection PyCallingNonCallable 1100 return converter(**kwargs) 1101 else: 1102 msg = "'converter' argument must be a %r subclass or instance: %r" 1103 raise XMLSchemaTypeError(msg % (XMLSchemaConverter, converter)) 1104 1105 def get_locations(self, namespace: str) -> List[str]: 1106 """Get a list of location hints for a namespace.""" 1107 try: 1108 return list(self.locations[namespace]) 1109 except KeyError: 1110 return [] 1111 1112 def get_element(self, tag: str, path: Optional[str] = None, 1113 namespaces: Optional[NamespacesType] = None) -> Optional[XsdElement]: 1114 if not path: 1115 xsd_element = self.find(tag) 1116 return xsd_element if isinstance(xsd_element, XsdElement) else None 1117 elif path[-1] == '*': 1118 xsd_element = self.find(path[:-1] + tag, namespaces) 1119 if isinstance(xsd_element, XsdElement): 1120 return xsd_element 1121 1122 obj = self.maps.elements.get(tag) 1123 return obj if isinstance(obj, XsdElement) else None 1124 else: 1125 xsd_element = self.find(path, namespaces) 1126 return xsd_element if isinstance(xsd_element, XsdElement) else None 1127 1128 def create_bindings(self, *bases: type, **attrs: Any) -> None: 1129 """ 1130 Creates data object bindings for XSD elements of the schema. 1131 1132 :param bases: base classes to use for creating the binding classes. 1133 :param attrs: attribute and method definitions for the binding classes body. 1134 """ 1135 for xsd_component in self.iter_components(): 1136 if isinstance(xsd_component, XsdElement): 1137 xsd_component.get_binding(*bases, replace_existing=True, **attrs) 1138 1139 def _parse_inclusions(self) -> None: 1140 """Processes schema document inclusions and redefinitions/overrides.""" 1141 for child in self.source.root: 1142 if child.tag == XSD_INCLUDE: 1143 try: 1144 location = child.attrib['schemaLocation'].strip() 1145 logger.info("Include schema from %r", location) 1146 self.include_schema(location, self.base_url) 1147 except KeyError: 1148 # Attribute missing error already found by validation against meta-schema 1149 pass 1150 except (OSError, IOError) as err: 1151 # It is not an error if the location fail to resolve: 1152 # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#compound-schema 1153 # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-include 1154 self.warnings.append("Include schema failed: %s." % str(err)) 1155 warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3) 1156 except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: 1157 msg = 'cannot include schema %r: %s' % (child.attrib['schemaLocation'], err) 1158 if isinstance(err, (XMLSchemaParseError, ParseError)): 1159 self.parse_error(msg) 1160 else: 1161 raise type(err)(msg) 1162 1163 elif child.tag == XSD_REDEFINE: 1164 try: 1165 location = child.attrib['schemaLocation'].strip() 1166 logger.info("Redefine schema %r", location) 1167 schema = self.include_schema(location, self.base_url) 1168 except KeyError: 1169 # Attribute missing error already found by validation against meta-schema 1170 pass 1171 except (OSError, IOError) as err: 1172 # If the redefine doesn't contain components (annotation excluded) 1173 # the statement is equivalent to an include, so no error is generated. 1174 # Otherwise fails. 1175 self.warnings.append("Redefine schema failed: %s." % str(err)) 1176 warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3) 1177 if any(e.tag != XSD_ANNOTATION and not callable(e.tag) for e in child): 1178 self.parse_error(err, child) 1179 except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: 1180 msg = 'cannot redefine schema %r: %s' % (child.attrib['schemaLocation'], err) 1181 if isinstance(err, (XMLSchemaParseError, ParseError)): 1182 self.parse_error(msg, child) 1183 else: 1184 raise type(err)(msg) 1185 else: 1186 schema.redefine = self 1187 1188 elif child.tag == XSD_OVERRIDE and self.XSD_VERSION != '1.0': 1189 try: 1190 location = child.attrib['schemaLocation'].strip() 1191 logger.info("Override schema %r", location) 1192 schema = self.include_schema(location, self.base_url) 1193 except KeyError: 1194 # Attribute missing error already found by validation against meta-schema 1195 pass 1196 except (OSError, IOError) as err: 1197 # If the override doesn't contain components (annotation excluded) 1198 # the statement is equivalent to an include, so no error is generated. 1199 # Otherwise fails. 1200 self.warnings.append("Override schema failed: %s." % str(err)) 1201 warnings.warn(self.warnings[-1], XMLSchemaIncludeWarning, stacklevel=3) 1202 if any(e.tag != XSD_ANNOTATION and not callable(e.tag) for e in child): 1203 self.parse_error(str(err), child) 1204 else: 1205 schema.override = self 1206 1207 def include_schema(self, location: str, base_url: Optional[str] = None, 1208 build: bool = False) -> SchemaType: 1209 """ 1210 Includes a schema for the same namespace, from a specific URL. 1211 1212 :param location: is the URL of the schema. 1213 :param base_url: is an optional base URL for fetching the schema resource. 1214 :param build: defines when to build the imported schema, the default is to not build. 1215 :return: the included :class:`XMLSchema` instance. 1216 """ 1217 schema: SchemaType 1218 schema_url = fetch_resource(location, base_url) 1219 for schema in self.maps.namespaces[self.target_namespace]: 1220 if schema_url == schema.url: 1221 logger.info("Resource %r is already loaded", location) 1222 break 1223 else: 1224 schema = type(self)( 1225 source=schema_url, 1226 namespace=self.target_namespace, 1227 validation=self.validation, 1228 global_maps=self.maps, 1229 converter=self.converter, 1230 locations=self._locations, 1231 base_url=self.base_url, 1232 allow=self.allow, 1233 defuse=self.defuse, 1234 timeout=self.timeout, 1235 build=build, 1236 ) 1237 1238 if schema is self: 1239 return self 1240 elif location not in self.includes: 1241 self.includes[location] = schema 1242 elif self.includes[location] is not schema: 1243 self.includes[schema_url] = schema 1244 return schema 1245 1246 def _parse_imports(self) -> None: 1247 """ 1248 Parse namespace import elements. Imports are done on namespace basis, not on 1249 single resource. A warning is generated for a failure of a namespace import. 1250 """ 1251 namespace_imports = NamespaceResourcesMap(map( 1252 lambda x: (x.get('namespace'), x.get('schemaLocation')), 1253 filter(lambda x: x.tag == XSD_IMPORT, self.source.root) 1254 )) 1255 1256 for namespace, locations in namespace_imports.items(): 1257 1258 # Checks the namespace against the targetNamespace of the schema 1259 if namespace is None: 1260 namespace = '' 1261 if namespace == self.target_namespace: 1262 self.parse_error("if the 'namespace' attribute is not present on " 1263 "the import statement then the importing schema " 1264 "must have a 'targetNamespace'") 1265 continue 1266 elif namespace == self.target_namespace: 1267 self.parse_error("the attribute 'namespace' must be different from " 1268 "schema's 'targetNamespace'") 1269 continue 1270 1271 # Skip import of already imported namespaces 1272 if self.imports.get(namespace) is not None: 1273 continue 1274 elif namespace in self.maps.namespaces: 1275 self.imports[namespace] = self.maps.namespaces[namespace][0] 1276 continue 1277 1278 locations = [url for url in locations if url] 1279 if not namespace: 1280 pass 1281 elif not locations: 1282 locations = self.get_locations(namespace) 1283 elif all(is_remote_url(url) for url in locations): 1284 # If all import schema locations are remote URLs and there are local hints 1285 # that match a local file path, try the local hints before schema locations. 1286 # This is not the standard processing for XSD imports, but resolve the problem 1287 # of local processing of schemas tested to work from a http server, providing 1288 # explicit local hints. 1289 local_hints = [url for url in self.get_locations(namespace) 1290 if url and url_path_is_file(url)] 1291 if local_hints: 1292 locations = local_hints + locations 1293 1294 if namespace in self.fallback_locations: 1295 locations.append(self.fallback_locations[namespace]) 1296 1297 self._import_namespace(namespace, locations) 1298 1299 def _import_namespace(self, namespace: str, locations: List[str]) -> None: 1300 import_error = None 1301 for url in locations: 1302 try: 1303 logger.debug("Import namespace %r from %r", namespace, url) 1304 self.import_schema(namespace, url, self.base_url) 1305 except (OSError, IOError) as err: 1306 # It's not an error if the location access fails (ref. section 4.2.6.2): 1307 # https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#composition-schemaImport 1308 logger.debug('%s', err) 1309 if import_error is None: 1310 import_error = err 1311 except (XMLSchemaParseError, XMLSchemaTypeError, ParseError) as err: 1312 if namespace: 1313 msg = "cannot import namespace %r: %s." % (namespace, err) 1314 else: 1315 msg = "cannot import chameleon schema: %s." % err 1316 if isinstance(err, (XMLSchemaParseError, ParseError)): 1317 self.parse_error(msg) 1318 else: 1319 raise type(err)(msg) 1320 except XMLSchemaValueError as err: 1321 self.parse_error(err) 1322 else: 1323 logger.info("Namespace %r imported from %r", namespace, url) 1324 break 1325 else: 1326 if import_error is not None: 1327 msg = "Import of namespace {!r} from {!r} failed: {}." 1328 self.warnings.append(msg.format(namespace, locations, str(import_error))) 1329 warnings.warn(self.warnings[-1], XMLSchemaImportWarning, stacklevel=4) 1330 self.imports[namespace] = None 1331 1332 def import_schema(self, namespace: str, location: str, base_url: Optional[str] = None, 1333 force: bool = False, build: bool = False) -> Optional[SchemaType]: 1334 """ 1335 Imports a schema for an external namespace, from a specific URL. 1336 1337 :param namespace: is the URI of the external namespace. 1338 :param location: is the URL of the schema. 1339 :param base_url: is an optional base URL for fetching the schema resource. 1340 :param force: if set to `True` imports the schema also if the namespace is already imported. 1341 :param build: defines when to build the imported schema, the default is to not build. 1342 :return: the imported :class:`XMLSchema` instance. 1343 """ 1344 if location == self.url: 1345 return self 1346 1347 if not force: 1348 if self.imports.get(namespace) is not None: 1349 return self.imports[namespace] 1350 elif namespace in self.maps.namespaces: 1351 self.imports[namespace] = self.maps.namespaces[namespace][0] 1352 return self.imports[namespace] 1353 1354 schema: SchemaType 1355 schema_url = fetch_resource(location, base_url) 1356 imported_ns = self.imports.get(namespace) 1357 if imported_ns is not None and imported_ns.url == schema_url: 1358 return imported_ns 1359 elif namespace in self.maps.namespaces: 1360 for schema in self.maps.namespaces[namespace]: 1361 if schema_url == schema.url: 1362 self.imports[namespace] = schema 1363 return schema 1364 1365 schema = type(self)( 1366 source=schema_url, 1367 validation=self.validation, 1368 global_maps=self.maps, 1369 converter=self.converter, 1370 locations=self._locations, 1371 base_url=self.base_url, 1372 allow=self.allow, 1373 defuse=self.defuse, 1374 timeout=self.timeout, 1375 build=build, 1376 ) 1377 if schema.target_namespace != namespace: 1378 raise XMLSchemaValueError( 1379 'imported schema %r has an unmatched namespace %r' % (location, namespace) 1380 ) 1381 self.imports[namespace] = schema 1382 return schema 1383 1384 def add_schema(self, source: SchemaSourceType, 1385 namespace: Optional[str] = None, build: bool = False) -> SchemaType: 1386 """ 1387 Add another schema source to the maps of the instance. 1388 1389 :param source: an URI that reference to a resource or a file path or a file-like \ 1390 object or a string containing the schema or an Element or an ElementTree document. 1391 :param namespace: is an optional argument that contains the URI of the namespace \ 1392 that has to used in case the schema has no namespace (chameleon schema). For other \ 1393 cases, when specified, it must be equal to the *targetNamespace* of the schema. 1394 :param build: defines when to build the imported schema, the default is to not build. 1395 :return: the added :class:`XMLSchema` instance. 1396 """ 1397 return type(self)( 1398 source=source, 1399 namespace=namespace, 1400 validation=self.validation, 1401 global_maps=self.maps, 1402 converter=self.converter, 1403 locations=self._locations, 1404 base_url=self.base_url, 1405 allow=self.allow, 1406 defuse=self.defuse, 1407 timeout=self.timeout, 1408 build=build, 1409 ) 1410 1411 def export(self, target: str, save_remote: bool = False) -> None: 1412 """ 1413 Exports a schema instance. The schema instance is exported to a 1414 directory with also the hierarchy of imported/included schemas. 1415 1416 :param target: a path to a local empty directory. 1417 :param save_remote: if `True` is provided saves also remote schemas. 1418 """ 1419 import pathlib 1420 from urllib.parse import urlsplit 1421 1422 target_path = pathlib.Path(target) 1423 if target_path.is_dir(): 1424 if list(target_path.iterdir()): 1425 raise XMLSchemaValueError("target directory {!r} is not empty".format(target)) 1426 elif target_path.exists(): 1427 msg = "target {} is not a directory" 1428 raise XMLSchemaValueError(msg.format(target_path.parent)) 1429 elif not target_path.parent.exists(): 1430 msg = "target parent directory {} does not exist" 1431 raise XMLSchemaValueError(msg.format(target_path.parent)) 1432 elif not target_path.parent.is_dir(): 1433 msg = "target parent {} is not a directory" 1434 raise XMLSchemaValueError(msg.format(target_path.parent)) 1435 1436 url = self.url or 'schema.xsd' 1437 basename = pathlib.Path(urlsplit(url).path).name 1438 exports: Any = {self: [target_path.joinpath(basename), self.get_text()]} 1439 path: Any 1440 1441 while True: 1442 current_length = len(exports) 1443 1444 for schema in list(exports): 1445 dir_path = exports[schema][0].parent 1446 imports_items = [(x.url, x) for x in schema.imports.values() if x is not None] 1447 1448 for location, ref_schema in chain(schema.includes.items(), imports_items): 1449 if ref_schema in exports: 1450 continue 1451 1452 if is_remote_url(location): 1453 if not save_remote: 1454 continue 1455 url_parts = urlsplit(location) 1456 netloc, path = url_parts.netloc, url_parts.path 1457 path = pathlib.Path().joinpath(netloc).joinpath(path.lstrip('/')) 1458 else: 1459 if location.startswith('file:/'): 1460 location = urlsplit(location).path 1461 1462 path = pathlib.Path(location) 1463 if path.is_absolute(): 1464 location = '/'.join(path.parts[-2:]) 1465 try: 1466 schema_path = pathlib.Path(schema.filepath) 1467 except TypeError: 1468 pass 1469 else: 1470 try: 1471 path = path.relative_to(schema_path.parent) 1472 except ValueError: 1473 parts = path.parts 1474 if parts[:-2] == schema_path.parts[:-2]: 1475 path = pathlib.Path(location) 1476 else: 1477 path = dir_path.joinpath(path) 1478 exports[ref_schema] = [path, ref_schema.get_text()] 1479 continue 1480 1481 elif not str(path).startswith('..'): 1482 path = dir_path.joinpath(path) 1483 exports[ref_schema] = [path, ref_schema.get_text()] 1484 continue 1485 1486 if DRIVE_PATTERN.match(path.parts[0]): 1487 path = pathlib.Path().joinpath(path.parts[1:]) 1488 1489 for strip_path in ('/', '\\', '..'): 1490 while True: 1491 try: 1492 path = path.relative_to(strip_path) 1493 except ValueError: 1494 break 1495 1496 path = target_path.joinpath(path) 1497 repl = 'schemaLocation="{}"'.format(path.as_posix()) 1498 schema_text = exports[schema][1] 1499 pattern = r'\bschemaLocation\s*=\s*[\'\"].*%s.*[\'"]' % re.escape(location) 1500 exports[schema][1] = re.sub(pattern, repl, schema_text) 1501 exports[ref_schema] = [path, ref_schema.get_text()] 1502 1503 if current_length == len(exports): 1504 break 1505 1506 for schema, (path, text) in exports.items(): 1507 if not path.parent.exists(): 1508 path.parent.mkdir(parents=True) 1509 1510 with path.open(mode='w') as fp: 1511 fp.write(text) 1512 1513 def version_check(self, elem: ElementType) -> bool: 1514 """ 1515 Checks if the element is compatible with the version of the validator and XSD 1516 types/facets availability. Invalid vc attributes are not detected in XSD 1.0. 1517 1518 :param elem: an Element of the schema. 1519 :return: `True` if the schema element is compatible with the validator, \ 1520 `False` otherwise. 1521 """ 1522 if VC_MIN_VERSION in elem.attrib: 1523 vc_min_version = elem.attrib[VC_MIN_VERSION] 1524 if not XSD_VERSION_PATTERN.match(vc_min_version): 1525 if self.XSD_VERSION > '1.0': 1526 self.parse_error("invalid attribute vc:minVersion value", elem) 1527 elif vc_min_version > self.XSD_VERSION: 1528 return False 1529 1530 if VC_MAX_VERSION in elem.attrib: 1531 vc_max_version = elem.attrib[VC_MAX_VERSION] 1532 if not XSD_VERSION_PATTERN.match(vc_max_version): 1533 if self.XSD_VERSION > '1.0': 1534 self.parse_error("invalid attribute vc:maxVersion value", elem) 1535 elif vc_max_version <= self.XSD_VERSION: 1536 return False 1537 1538 if VC_TYPE_AVAILABLE in elem.attrib: 1539 for qname in elem.attrib[VC_TYPE_AVAILABLE].split(): 1540 try: 1541 if self.resolve_qname(qname) not in self.maps.types: 1542 return False 1543 except XMLSchemaNamespaceError: 1544 return False 1545 except (KeyError, ValueError) as err: 1546 self.parse_error(str(err), elem) 1547 1548 if VC_TYPE_UNAVAILABLE in elem.attrib: 1549 for qname in elem.attrib[VC_TYPE_UNAVAILABLE].split(): 1550 try: 1551 if self.resolve_qname(qname) not in self.maps.types: 1552 break 1553 except XMLSchemaNamespaceError: 1554 break 1555 except (KeyError, ValueError) as err: 1556 self.parse_error(err, elem) 1557 else: 1558 return False 1559 1560 if VC_FACET_AVAILABLE in elem.attrib: 1561 for qname in elem.attrib[VC_FACET_AVAILABLE].split(): 1562 try: 1563 facet_name = self.resolve_qname(qname) 1564 except XMLSchemaNamespaceError: 1565 pass 1566 except (KeyError, ValueError) as err: 1567 self.parse_error(str(err), elem) 1568 else: 1569 if self.XSD_VERSION == '1.0': 1570 if facet_name not in XSD_10_FACETS: 1571 return False 1572 elif facet_name not in XSD_11_FACETS: 1573 return False 1574 1575 if VC_FACET_UNAVAILABLE in elem.attrib: 1576 for qname in elem.attrib[VC_FACET_UNAVAILABLE].split(): 1577 try: 1578 facet_name = self.resolve_qname(qname) 1579 except XMLSchemaNamespaceError: 1580 break 1581 except (KeyError, ValueError) as err: 1582 self.parse_error(err, elem) 1583 else: 1584 if self.XSD_VERSION == '1.0': 1585 if facet_name not in XSD_10_FACETS: 1586 break 1587 elif facet_name not in XSD_11_FACETS: 1588 break 1589 else: 1590 return False 1591 1592 return True 1593 1594 def resolve_qname(self, qname: str, namespace_imported: bool = True) -> str: 1595 """ 1596 QName resolution for a schema instance. 1597 1598 :param qname: a string in xs:QName format. 1599 :param namespace_imported: if this argument is `True` raises an \ 1600 `XMLSchemaNamespaceError` if the namespace of the QName is not the \ 1601 *targetNamespace* and the namespace is not imported by the schema. 1602 :returns: an expanded QName in the format "{*namespace-URI*}*local-name*". 1603 :raises: `XMLSchemaValueError` for an invalid xs:QName is found, \ 1604 `XMLSchemaKeyError` if the namespace prefix is not declared in the \ 1605 schema instance. 1606 """ 1607 qname = qname.strip() 1608 if not qname or ' ' in qname or '\t' in qname or '\n' in qname: 1609 raise XMLSchemaValueError("{!r} is not a valid value for xs:QName".format(qname)) 1610 1611 if qname[0] == '{': 1612 try: 1613 namespace, local_name = qname[1:].split('}') 1614 except ValueError: 1615 raise XMLSchemaValueError("{!r} is not a valid value for xs:QName".format(qname)) 1616 elif ':' in qname: 1617 try: 1618 prefix, local_name = qname.split(':') 1619 except ValueError: 1620 raise XMLSchemaValueError("{!r} is not a valid value for xs:QName".format(qname)) 1621 else: 1622 try: 1623 namespace = self.namespaces[prefix] 1624 except KeyError: 1625 raise XMLSchemaKeyError("prefix %r not found in namespace map" % prefix) 1626 else: 1627 namespace, local_name = self.namespaces.get('', ''), qname 1628 1629 if not namespace: 1630 if namespace_imported and self.target_namespace and '' not in self.imports: 1631 raise XMLSchemaNamespaceError( 1632 "the QName {!r} is mapped to no namespace, but this requires " 1633 "that there is an xs:import statement in the schema without " 1634 "the 'namespace' attribute.".format(qname) 1635 ) 1636 return local_name 1637 elif namespace_imported and self.meta_schema is not None and \ 1638 namespace != self.target_namespace and \ 1639 namespace not in {XSD_NAMESPACE, XSI_NAMESPACE} and \ 1640 namespace not in self.imports: 1641 raise XMLSchemaNamespaceError( 1642 "the QName {!r} is mapped to the namespace {!r}, but this namespace has " 1643 "not an xs:import statement in the schema.".format(qname, namespace) 1644 ) 1645 return '{%s}%s' % (namespace, local_name) 1646 1647 def validate(self, source: Union[XMLSourceType, XMLResource], 1648 path: Optional[str] = None, 1649 schema_path: Optional[str] = None, 1650 use_defaults: bool = True, 1651 namespaces: Optional[NamespacesType] = None, 1652 max_depth: Optional[int] = None, 1653 extra_validator: Optional[ExtraValidatorType] = None) -> None: 1654 """ 1655 Validates an XML data against the XSD schema/component instance. 1656 1657 :param source: the source of XML data. Can be an :class:`XMLResource` instance, a \ 1658 path to a file or an URI of a resource or an opened file-like object or an Element \ 1659 instance or an ElementTree instance or a string containing the XML data. 1660 :param path: is an optional XPath expression that matches the elements of the XML \ 1661 data that have to be decoded. If not provided the XML root element is selected. 1662 :param schema_path: an alternative XPath expression to select the XSD element \ 1663 to use for decoding. Useful if the root of the XML data doesn't match an XSD \ 1664 global element of the schema. 1665 :param use_defaults: Use schema's default values for filling missing data. 1666 :param namespaces: is an optional mapping from namespace prefix to URI. 1667 :param max_depth: maximum level of validation, for default there is no limit. \ 1668 With lazy resources is set to `source.lazy_depth` for managing lazy validation. 1669 :param extra_validator: an optional function for performing non-standard \ 1670 validations on XML data. The provided function is called for each traversed \ 1671 element, with the XML element as 1st argument and the corresponding XSD \ 1672 element as 2nd argument. It can be also a generator function and has to \ 1673 raise/yield :exc:`XMLSchemaValidationError` exceptions. 1674 :raises: :exc:`XMLSchemaValidationError` if the XML data instance is invalid. 1675 """ 1676 for error in self.iter_errors(source, path, schema_path, use_defaults, 1677 namespaces, max_depth, extra_validator): 1678 raise error 1679 1680 def is_valid(self, source: Union[XMLSourceType, XMLResource], 1681 path: Optional[str] = None, 1682 schema_path: Optional[str] = None, 1683 use_defaults: bool = True, 1684 namespaces: Optional[NamespacesType] = None, 1685 max_depth: Optional[int] = None, 1686 extra_validator: Optional[ExtraValidatorType] = None) -> bool: 1687 """ 1688 Like :meth:`validate` except that does not raise an exception but returns 1689 ``True`` if the XML data instance is valid, ``False`` if it is invalid. 1690 """ 1691 error = next(self.iter_errors(source, path, schema_path, use_defaults, 1692 namespaces, max_depth, extra_validator), None) 1693 return error is None 1694 1695 def iter_errors(self, source: Union[XMLSourceType, XMLResource], 1696 path: Optional[str] = None, 1697 schema_path: Optional[str] = None, 1698 use_defaults: bool = True, 1699 namespaces: Optional[NamespacesType] = None, 1700 max_depth: Optional[int] = None, 1701 extra_validator: Optional[ExtraValidatorType] = None) \ 1702 -> Iterator[XMLSchemaValidationError]: 1703 """ 1704 Creates an iterator for the errors generated by the validation of an XML data against 1705 the XSD schema/component instance. Accepts the same arguments of :meth:`validate`. 1706 """ 1707 self.check_validator(validation='lax') 1708 if isinstance(source, XMLResource): 1709 resource: XMLResource = source 1710 else: 1711 resource = XMLResource(source, defuse=self.defuse, timeout=self.timeout) 1712 1713 if not schema_path: 1714 schema_path = resource.get_absolute_path(path) 1715 1716 namespaces = resource.get_namespaces(namespaces, root_only=True) 1717 namespace = resource.namespace or namespaces.get('', '') 1718 1719 try: 1720 schema = self.get_schema(namespace) 1721 except KeyError: 1722 schema = self 1723 1724 identities: Dict[XsdIdentity, IdentityCounter] = {} 1725 locations: List[Any] = [] 1726 ancestors: List[ElementType] = [] 1727 prev_ancestors: List[ElementType] = [] 1728 kwargs: Dict[Any, Any] = { 1729 'level': resource.lazy_depth or bool(path), 1730 'source': resource, 1731 'namespaces': namespaces, 1732 'converter': None, 1733 'use_defaults': use_defaults, 1734 'id_map': Counter[str](), 1735 'identities': identities, 1736 'inherited': {}, 1737 'locations': locations, # TODO: lazy schemas load 1738 } 1739 if max_depth is not None: 1740 kwargs['max_depth'] = max_depth 1741 if extra_validator is not None: 1742 kwargs['extra_validator'] = extra_validator 1743 1744 if path: 1745 selector = resource.iterfind(path, namespaces, nsmap=namespaces, ancestors=ancestors) 1746 else: 1747 selector = resource.iter_depth(mode=3, nsmap=namespaces, ancestors=ancestors) 1748 1749 for elem in selector: 1750 if elem is resource.root: 1751 xsd_element = schema.get_element(elem.tag, namespaces=namespaces) 1752 if resource.lazy_depth: 1753 kwargs['level'] = 0 1754 kwargs['identities'] = {} 1755 kwargs['max_depth'] = resource.lazy_depth 1756 else: 1757 if prev_ancestors != ancestors: 1758 k = 0 1759 for k in range(min(len(ancestors), len(prev_ancestors))): 1760 if ancestors[k] is not prev_ancestors[k]: 1761 break 1762 1763 path_ = '/'.join(e.tag for e in ancestors) + '/ancestor-or-self::node()' 1764 xsd_ancestors = cast(List[XsdElement], schema.findall(path_, namespaces)[1:]) 1765 1766 for e in xsd_ancestors[k:]: 1767 e.stop_identities(identities) 1768 1769 for e in xsd_ancestors[k:]: 1770 e.start_identities(identities) 1771 1772 prev_ancestors = ancestors[:] 1773 1774 xsd_element = schema.get_element(elem.tag, schema_path, namespaces) 1775 1776 if xsd_element is None: 1777 if XSI_TYPE in elem.attrib: 1778 xsd_element = self.create_element(name=elem.tag) 1779 elif elem is not resource.root and ancestors: 1780 continue 1781 else: 1782 reason = "{!r} is not an element of the schema".format(elem) 1783 yield schema.validation_error('lax', reason, elem, resource, namespaces) 1784 return 1785 1786 for result in xsd_element.iter_decode(elem, **kwargs): 1787 if isinstance(result, XMLSchemaValidationError): 1788 yield result 1789 else: 1790 del result 1791 1792 if kwargs['identities'] is not identities: 1793 identity: XsdIdentity 1794 counter: IdentityCounter 1795 for identity, counter in kwargs['identities'].items(): 1796 identities[identity].counter.update(counter.counter) 1797 kwargs['identities'] = identities 1798 1799 yield from self._validate_references(validation='lax', **kwargs) 1800 1801 def _validate_references(self, source: XMLResource, 1802 validation: str = 'lax', 1803 id_map: Optional[Counter[str]] = None, 1804 identities: Optional[IdentityMapType] = None, 1805 **kwargs: Any) -> Iterator[XMLSchemaValidationError]: 1806 # Check unresolved IDREF values 1807 if id_map is not None: 1808 for k, v in id_map.items(): 1809 if v == 0: 1810 msg = "IDREF %r not found in XML document" % k 1811 yield self.validation_error(validation, msg, source.root) 1812 1813 # Check still enabled key references (lazy validation cases) 1814 if identities is not None: 1815 for constraint, counter in identities.items(): 1816 if counter.enabled and isinstance(constraint, XsdKeyref): 1817 for error in cast(KeyrefCounter, counter).iter_errors(identities): 1818 yield self.validation_error(validation, error, source.root, **kwargs) 1819 1820 def raw_decoder(self, source: XMLResource, path: Optional[str] = None, 1821 schema_path: Optional[str] = None, validation: str = 'lax', 1822 namespaces: Optional[NamespacesType] = None, **kwargs: Any) \ 1823 -> Iterator[Union[Any, XMLSchemaValidationError]]: 1824 """Returns a generator for decoding a resource.""" 1825 if path: 1826 selector = source.iterfind(path, namespaces, nsmap=namespaces) 1827 else: 1828 selector = source.iter_depth(nsmap=namespaces) 1829 1830 for elem in selector: 1831 xsd_element = self.get_element(elem.tag, schema_path, namespaces) 1832 if xsd_element is None: 1833 if XSI_TYPE in elem.attrib: 1834 xsd_element = self.create_element(name=elem.tag) 1835 else: 1836 reason = "{!r} is not an element of the schema".format(elem) 1837 yield self.validation_error(validation, reason, elem, source, namespaces) 1838 continue 1839 1840 yield from xsd_element.iter_decode(elem, validation, **kwargs) 1841 1842 if 'max_depth' not in kwargs: 1843 yield from self._validate_references(source, validation=validation, **kwargs) 1844 1845 def iter_decode(self, source: Union[XMLSourceType, XMLResource], 1846 path: Optional[str] = None, 1847 schema_path: Optional[str] = None, 1848 validation: str = 'lax', 1849 process_namespaces: bool = True, 1850 namespaces: Optional[NamespacesType] = None, 1851 use_defaults: bool = True, 1852 decimal_type: Optional[Type[Any]] = None, 1853 datetime_types: bool = False, 1854 binary_types: bool = False, 1855 converter: Optional[ConverterType] = None, 1856 filler: Optional[Callable[[Union[XsdElement, XsdAttribute]], Any]] = None, 1857 fill_missing: bool = False, 1858 keep_unknown: bool = False, 1859 max_depth: Optional[int] = None, 1860 depth_filler: Optional[Callable[[XsdElement], Any]] = None, 1861 value_hook: Optional[Callable[[AtomicValueType, BaseXsdType], Any]] = None, 1862 **kwargs: Any) -> Iterator[Union[Any, XMLSchemaValidationError]]: 1863 """ 1864 Creates an iterator for decoding an XML source to a data structure. 1865 1866 :param source: the source of XML data. Can be an :class:`XMLResource` instance, a \ 1867 path to a file or an URI of a resource or an opened file-like object or an Element \ 1868 instance or an ElementTree instance or a string containing the XML data. 1869 :param path: is an optional XPath expression that matches the elements of the XML \ 1870 data that have to be decoded. If not provided the XML root element is selected. 1871 :param schema_path: an alternative XPath expression to select the XSD element \ 1872 to use for decoding. Useful if the root of the XML data doesn't match an XSD \ 1873 global element of the schema. 1874 :param validation: defines the XSD validation mode to use for decode, can be \ 1875 'strict', 'lax' or 'skip'. 1876 :param process_namespaces: whether to use namespace information in the \ 1877 decoding process, using the map provided with the argument *namespaces* \ 1878 and the map extracted from the XML document. 1879 :param namespaces: is an optional mapping from namespace prefix to URI. 1880 :param use_defaults: whether to use default values for filling missing data. 1881 :param decimal_type: conversion type for `Decimal` objects (generated by \ 1882 `xs:decimal` built-in and derived types), useful if you want to generate a \ 1883 JSON-compatible data structure. 1884 :param datetime_types: if set to `True` the datetime and duration XSD types \ 1885 are kept decoded, otherwise their origin XML string is returned. 1886 :param binary_types: if set to `True` xs:hexBinary and xs:base64Binary types \ 1887 are kept decoded, otherwise their origin XML string is returned. 1888 :param converter: an :class:`XMLSchemaConverter` subclass or instance to use \ 1889 for decoding. 1890 :param filler: an optional callback function to fill undecodable data with a \ 1891 typed value. The callback function must accept one positional argument, that \ 1892 can be an XSD Element or an attribute declaration. If not provided undecodable \ 1893 data is replaced by `None`. 1894 :param fill_missing: if set to `True` the decoder fills also missing attributes. \ 1895 The filling value is `None` or a typed value if the *filler* callback is provided. 1896 :param keep_unknown: if set to `True` unknown tags are kept and are decoded with \ 1897 *xs:anyType*. For default unknown tags not decoded by a wildcard are discarded. 1898 :param max_depth: maximum level of decoding, for default there is no limit. \ 1899 With lazy resources is set to `source.lazy_depth` for managing lazy decoding. 1900 :param depth_filler: an optional callback function to replace data over the \ 1901 *max_depth* level. The callback function must accept one positional argument, that \ 1902 can be an XSD Element. If not provided deeper data are replaced with `None` values. 1903 :param value_hook: an optional function that will be called with any decoded \ 1904 atomic value and the XSD type used for decoding. The return value will be used \ 1905 instead of the original value. 1906 :param kwargs: keyword arguments with other options for converter and decoder. 1907 :return: yields a decoded data object, eventually preceded by a sequence of \ 1908 validation or decoding errors. 1909 """ 1910 self.check_validator(validation) 1911 if isinstance(source, XMLResource): 1912 resource: XMLResource = source 1913 else: 1914 resource = XMLResource(source, defuse=self.defuse, timeout=self.timeout) 1915 1916 if not schema_path and path: 1917 schema_path = resource.get_absolute_path(path) 1918 1919 if process_namespaces: 1920 namespaces = resource.get_namespaces(namespaces, root_only=True) 1921 namespace = resource.namespace or namespaces.get('', '') 1922 else: 1923 namespace = resource.namespace 1924 1925 schema = self.get_schema(namespace) 1926 converter = self.get_converter(converter, namespaces=namespaces, **kwargs) 1927 kwargs.update( 1928 converter=converter, 1929 namespaces=namespaces, 1930 source=resource, 1931 use_defaults=use_defaults, 1932 id_map=Counter[str](), 1933 identities={}, 1934 inherited={}, 1935 ) 1936 1937 if decimal_type is not None: 1938 kwargs['decimal_type'] = decimal_type 1939 if datetime_types: 1940 kwargs['datetime_types'] = datetime_types 1941 if binary_types: 1942 kwargs['binary_types'] = binary_types 1943 if filler is not None: 1944 kwargs['filler'] = filler 1945 if fill_missing: 1946 kwargs['fill_missing'] = fill_missing 1947 if keep_unknown: 1948 kwargs['keep_unknown'] = keep_unknown 1949 if max_depth is not None: 1950 kwargs['max_depth'] = max_depth 1951 if depth_filler is not None: 1952 kwargs['depth_filler'] = depth_filler 1953 if value_hook is not None: 1954 kwargs['value_hook'] = value_hook 1955 1956 if path: 1957 selector = resource.iterfind(path, namespaces, nsmap=namespaces) 1958 elif not resource.is_lazy(): 1959 selector = resource.iter_depth(nsmap=namespaces) 1960 else: 1961 decoder = self.raw_decoder( 1962 schema_path=resource.get_absolute_path(), 1963 validation=validation, 1964 **kwargs 1965 ) 1966 kwargs['depth_filler'] = lambda x: decoder 1967 kwargs['max_depth'] = resource.lazy_depth 1968 selector = resource.iter_depth(mode=2, nsmap=namespaces) 1969 1970 for elem in selector: 1971 xsd_element = schema.get_element(elem.tag, schema_path, namespaces) 1972 if xsd_element is None: 1973 if XSI_TYPE in elem.attrib: 1974 xsd_element = self.create_element(name=elem.tag) 1975 else: 1976 reason = "{!r} is not an element of the schema".format(elem) 1977 yield schema.validation_error(validation, reason, elem, resource, namespaces) 1978 return 1979 1980 yield from xsd_element.iter_decode(elem, validation, **kwargs) 1981 1982 if 'max_depth' not in kwargs: 1983 yield from self._validate_references(validation=validation, **kwargs) 1984 1985 def decode(self, source: Union[XMLSourceType, XMLResource], 1986 path: Optional[str] = None, 1987 schema_path: Optional[str] = None, 1988 validation: str = 'strict', 1989 *args: Any, **kwargs: Any) -> DecodeType[Any]: 1990 """ 1991 Decodes XML data. Takes the same arguments of the method :func:`XMLSchema.iter_decode`. 1992 """ 1993 data, errors = [], [] 1994 for result in self.iter_decode(source, path, schema_path, validation, *args, **kwargs): 1995 if not isinstance(result, XMLSchemaValidationError): 1996 data.append(result) 1997 elif validation == 'lax': 1998 errors.append(result) 1999 elif validation == 'strict': 2000 raise result 2001 2002 if not data: 2003 return (None, errors) if validation == 'lax' else None 2004 elif len(data) == 1: 2005 return (data[0], errors) if validation == 'lax' else data[0] 2006 else: 2007 return (data, errors) if validation == 'lax' else data 2008 2009 to_dict = decode 2010 2011 def to_objects(self, source: Union[XMLSourceType, XMLResource], with_bindings: bool = False, 2012 **kwargs: Any) -> DecodeType['dataobjects.DataElement']: 2013 """ 2014 Decodes XML data to Python data objects. 2015 2016 :param source: the XML data. Can be a string for an attribute or for a simple \ 2017 type components or a dictionary for an attribute group or an ElementTree's \ 2018 Element for other components. 2019 :param with_bindings: if `True` is provided the decoding is done using \ 2020 :class:`DataBindingConverter` that used XML data binding classes. For \ 2021 default the objects are instances of :class:`DataElement` and uses the \ 2022 :class:`DataElementConverter`. 2023 :param kwargs: other optional keyword arguments for the method \ 2024 :func:`iter_decode`, except the argument *converter*. 2025 """ 2026 if with_bindings: 2027 return self.decode(source, converter=dataobjects.DataBindingConverter, **kwargs) 2028 return self.decode(source, converter=dataobjects.DataElementConverter, **kwargs) 2029 2030 def iter_encode(self, obj: Any, path: Optional[str] = None, validation: str = 'lax', 2031 namespaces: Optional[NamespacesType] = None, use_defaults: bool = True, 2032 converter: Optional[ConverterType] = None, unordered: bool = False, 2033 **kwargs: Any) -> Iterator[Union[ElementType, XMLSchemaValidationError]]: 2034 """ 2035 Creates an iterator for encoding a data structure to an ElementTree's Element. 2036 2037 :param obj: the data that has to be encoded to XML data. 2038 :param path: is an optional XPath expression for selecting the element of \ 2039 the schema that matches the data that has to be encoded. For default the first \ 2040 global element of the schema is used. 2041 :param validation: the XSD validation mode. Can be 'strict', 'lax' or 'skip'. 2042 :param namespaces: is an optional mapping from namespace prefix to URI. 2043 :param use_defaults: whether to use default values for filling missing data. 2044 :param converter: an :class:`XMLSchemaConverter` subclass or instance to use for \ 2045 the encoding. 2046 :param unordered: a flag for explicitly activating unordered encoding mode for \ 2047 content model data. This mode uses content models for a reordered-by-model \ 2048 iteration of the child elements. 2049 :param kwargs: keyword arguments containing options for converter. 2050 :return: yields an Element instance/s or validation/encoding errors. 2051 """ 2052 self.check_validator(validation) 2053 if not self.elements: 2054 raise XMLSchemaValueError("encoding needs at least one XSD element declaration!") 2055 2056 if namespaces is None: 2057 namespaces = {} 2058 else: 2059 namespaces = {k: v for k, v in namespaces.items()} 2060 2061 converter = self.get_converter(converter, namespaces=namespaces, **kwargs) 2062 2063 xsd_element = None 2064 if path is not None: 2065 match = re.search(r'[{\w]', path) 2066 if match: 2067 namespace = get_namespace(path[match.start():], namespaces) 2068 schema = self.get_schema(namespace) 2069 xsd_element = schema.find(path, namespaces) 2070 2071 elif len(self.elements) == 1: 2072 xsd_element = list(self.elements.values())[0] 2073 else: 2074 root_elements = self.root_elements 2075 if len(root_elements) == 1: 2076 xsd_element = root_elements[0] 2077 elif isinstance(obj, (converter.dict, dict)) and len(obj) == 1: 2078 for key in obj: 2079 match = re.search(r'[{\w]', key) 2080 if match: 2081 namespace = get_namespace(key[match.start():], namespaces) 2082 schema = self.get_schema(namespace) 2083 xsd_element = schema.find(key, namespaces) 2084 2085 if not isinstance(xsd_element, XsdElement): 2086 if path is not None: 2087 reason = "the path %r doesn't match any element of the schema!" % path 2088 else: 2089 reason = "unable to select an element for decoding data, " \ 2090 "provide a valid 'path' argument." 2091 raise XMLSchemaEncodeError(self, obj, self.elements, reason, namespaces=namespaces) 2092 else: 2093 yield from xsd_element.iter_encode(obj, validation, use_defaults=use_defaults, 2094 converter=converter, unordered=unordered, **kwargs) 2095 2096 def encode(self, obj: Any, path: Optional[str] = None, validation: str = 'strict', 2097 *args: Any, **kwargs: Any) -> EncodeType[Any]: 2098 """ 2099 Encodes to XML data. Takes the same arguments of the method :func:`XMLSchema.iter_encode`. 2100 2101 :return: An ElementTree's Element or a list containing a sequence of ElementTree's \ 2102 elements if the argument *path* matches multiple XML data chunks. If *validation* \ 2103 argument is 'lax' a 2-items tuple is returned, where the first item is the encoded \ 2104 object and the second item is a list containing the errors. 2105 """ 2106 data, errors = [], [] 2107 result: Union[ElementType, XMLSchemaValidationError] 2108 for result in self.iter_encode(obj, path, validation, *args, **kwargs): 2109 if not isinstance(result, XMLSchemaValidationError): 2110 data.append(result) 2111 elif validation == 'lax': 2112 errors.append(result) 2113 elif validation == 'strict': 2114 raise result 2115 2116 if not data: 2117 return (None, errors) if validation == 'lax' else None 2118 elif len(data) == 1: 2119 return (data[0], errors) if validation == 'lax' else data[0] 2120 else: 2121 return (data, errors) if validation == 'lax' else data 2122 2123 to_etree = encode 2124 2125 2126class XMLSchema10(XMLSchemaBase): 2127 """ 2128 XSD 1.0 schema class. 2129 2130 <schema 2131 attributeFormDefault = (qualified | unqualified) : unqualified 2132 blockDefault = (#all | List of (extension | restriction | substitution)) : '' 2133 elementFormDefault = (qualified | unqualified) : unqualified 2134 finalDefault = (#all | List of (extension | restriction | list | union)) : '' 2135 id = ID 2136 targetNamespace = anyURI 2137 version = token 2138 xml:lang = language 2139 {any attributes with non-schema namespace . . .}> 2140 Content: ((include | import | redefine | annotation)*, (((simpleType | complexType | group | 2141 attributeGroup) | element | attribute | notation), annotation*)*) 2142 </schema> 2143 """ 2144 meta_schema = os.path.join(SCHEMAS_DIR, 'XSD_1.0/XMLSchema.xsd') # type: ignore 2145 BASE_SCHEMAS = { 2146 XML_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XML/xml_minimal.xsd'), 2147 XSI_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSI/XMLSchema-instance_minimal.xsd'), 2148 } 2149 2150 2151class XMLSchema11(XMLSchemaBase): 2152 """ 2153 XSD 1.1 schema class. 2154 2155 <schema 2156 attributeFormDefault = (qualified | unqualified) : unqualified 2157 blockDefault = (#all | List of (extension | restriction | substitution)) : '' 2158 defaultAttributes = QName 2159 xpathDefaultNamespace = (anyURI | (##defaultNamespace | ##targetNamespace| ##local)) : ##local 2160 elementFormDefault = (qualified | unqualified) : unqualified 2161 finalDefault = (#all | List of (extension | restriction | list | union)) : '' 2162 id = ID 2163 targetNamespace = anyURI 2164 version = token 2165 xml:lang = language 2166 {any attributes with non-schema namespace . . .}> 2167 Content: ((include | import | redefine | override | annotation)*, 2168 (defaultOpenContent, annotation*)?, ((simpleType | complexType | 2169 group | attributeGroup | element | attribute | notation), annotation*)*) 2170 </schema> 2171 2172 <schema 2173 attributeFormDefault = (qualified | unqualified) : unqualified 2174 blockDefault = (#all | List of (extension | restriction | substitution)) : '' 2175 elementFormDefault = (qualified | unqualified) : unqualified 2176 finalDefault = (#all | List of (extension | restriction | list | union)) : '' 2177 id = ID 2178 targetNamespace = anyURI 2179 version = token 2180 xml:lang = language 2181 {any attributes with non-schema namespace . . .}> 2182 Content: ((include | import | redefine | annotation)*, (((simpleType | complexType | group | 2183 attributeGroup) | element | attribute | notation), annotation*)*) 2184 </schema> 2185 """ 2186 meta_schema = os.path.join(SCHEMAS_DIR, 'XSD_1.1/XMLSchema.xsd') # type: ignore 2187 XSD_VERSION = '1.1' 2188 2189 BASE_SCHEMAS = { 2190 XML_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XML/xml_minimal.xsd'), 2191 XSI_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSI/XMLSchema-instance_minimal.xsd'), 2192 XSD_NAMESPACE: os.path.join(SCHEMAS_DIR, 'XSD_1.1/xsd11-extra.xsd'), 2193 VC_NAMESPACE: os.path.join(SCHEMAS_DIR, 'VC/XMLSchema-versioning.xsd'), 2194 } 2195 2196 xsd_complex_type_class = Xsd11ComplexType 2197 xsd_attribute_class = Xsd11Attribute 2198 xsd_any_attribute_class = Xsd11AnyAttribute 2199 xsd_group_class = Xsd11Group 2200 xsd_element_class = Xsd11Element 2201 xsd_any_class = Xsd11AnyElement 2202 xsd_atomic_restriction_class = Xsd11AtomicRestriction 2203 xsd_union_class = Xsd11Union 2204 xsd_key_class = Xsd11Key 2205 xsd_keyref_class = Xsd11Keyref 2206 xsd_unique_class = Xsd11Unique 2207 2208 2209XMLSchema = XMLSchema10 2210"""The default class for schema instances.""" 2211