1# 2# Copyright (c), 2016-2021, SISSA (International School for Advanced Studies). 3# All rights reserved. 4# This file is distributed under the terms of the MIT License. 5# See the file 'LICENSE' in the root directory of the present 6# distribution, or http://opensource.org/licenses/MIT. 7# 8# @author Davide Brunato <brunato@sissa.it> 9# 10from collections.abc import MutableMapping, MutableSequence 11from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, Iterable, \ 12 List, Optional, Type, Tuple, Union 13 14from ..exceptions import XMLSchemaTypeError 15from ..names import XSI_NAMESPACE 16from ..etree import etree_element, ElementData 17from ..aliases import NamespacesType, ElementType, BaseXsdType 18from ..namespaces import NamespaceMapper 19 20if TYPE_CHECKING: 21 from ..validators import XsdElement 22 23 24class XMLSchemaConverter(NamespaceMapper): 25 """ 26 Generic XML Schema based converter class. A converter is used to compose 27 decoded XML data for an Element into a data structure and to build an Element 28 from encoded data structure. There are two methods for interfacing the 29 converter with the decoding/encoding process. The method *element_decode* 30 accepts an ElementData tuple, containing the element parts, and returns 31 a data structure. The method *element_encode* accepts a data structure and 32 returns an ElementData tuple. For default character data parts are ignored. 33 Prefixes and text key can be changed also using alphanumeric values but 34 ambiguities with schema elements could affect XML data re-encoding. 35 36 :param namespaces: map from namespace prefixes to URI. 37 :param dict_class: dictionary class to use for decoded data. Default is `dict`. 38 :param list_class: list class to use for decoded data. Default is `list`. 39 :param etree_element_class: the class that has to be used to create new XML elements, \ 40 if not provided uses the ElementTree's Element class. 41 :param text_key: is the key to apply to element's decoded text data. 42 :param attr_prefix: controls the mapping of XML attributes, to the same name or \ 43 with a prefix. If `None` the converter ignores attributes. 44 :param cdata_prefix: is used for including and prefixing the character data parts \ 45 of a mixed content, that are labeled with an integer instead of a string. \ 46 Character data parts are ignored if this argument is `None`. 47 :param indent: number of spaces for XML indentation (default is 4). 48 :param strip_namespaces: if set to `True` removes namespace declarations from data and \ 49 namespace information from names, during decoding or encoding. Defaults to `False`. 50 :param preserve_root: if set to `True` the root element is preserved, wrapped into a \ 51 single-item dictionary. Applicable only to default converter, to \ 52 :class:`UnorderedConverter` and to :class:`ParkerConverter`. 53 :param force_dict: if set to `True` complex elements with simple content are decoded \ 54 with a dictionary also if there are no decoded attributes. Applicable only to default \ 55 converter and to :class:`UnorderedConverter`. Defaults to `False`. 56 :param force_list: if set to `True` child elements are decoded within a list in any case. \ 57 Applicable only to default converter and to :class:`UnorderedConverter`. Defaults to `False`. 58 59 :ivar dict: dictionary class to use for decoded data. 60 :ivar list: list class to use for decoded data. 61 :ivar etree_element_class: Element class to use 62 :ivar text_key: key for decoded Element text 63 :ivar attr_prefix: prefix for attribute names 64 :ivar cdata_prefix: prefix for character data parts 65 :ivar indent: indentation to use for rebuilding XML trees 66 :ivar preserve_root: preserve the root element on decoding 67 :ivar force_dict: force dictionary for complex elements with simple content 68 :ivar force_list: force list for child elements 69 """ 70 ns_prefix: str 71 dict: Type[Dict[str, Any]] = dict 72 list: Type[List[Any]] = list 73 74 etree_element_class: Type[ElementType] 75 etree_element_class = etree_element 76 77 __slots__ = ('text_key', 'ns_prefix', 'attr_prefix', 'cdata_prefix', 78 'indent', 'preserve_root', 'force_dict', 'force_list') 79 80 def __init__(self, namespaces: Optional[NamespacesType] = None, 81 dict_class: Optional[Type[Dict[str, Any]]] = None, 82 list_class: Optional[Type[List[Any]]] = None, 83 etree_element_class: Optional[Type[ElementType]] = None, 84 text_key: Optional[str] = '$', 85 attr_prefix: Optional[str] = '@', 86 cdata_prefix: Optional[str] = None, 87 indent: int = 4, 88 strip_namespaces: bool = False, 89 preserve_root: bool = False, 90 force_dict: bool = False, 91 force_list: bool = False, 92 **kwargs: Any) -> None: 93 94 super(XMLSchemaConverter, self).__init__(namespaces, strip_namespaces) 95 96 if dict_class is not None: 97 self.dict = dict_class 98 if list_class is not None: 99 self.list = list_class 100 if etree_element_class is not None: 101 self.etree_element_class = etree_element_class 102 103 self.text_key = text_key 104 self.attr_prefix = attr_prefix 105 self.cdata_prefix = cdata_prefix 106 self.ns_prefix = 'xmlns' if attr_prefix is None else f'{attr_prefix}xmlns' 107 108 self.indent = indent 109 self.preserve_root = preserve_root 110 self.force_dict = force_dict 111 self.force_list = force_list 112 113 def __setattr__(self, name: str, value: Any) -> None: 114 if name in {'attr_prefix', 'text_key', 'cdata_prefix'}: 115 if value is not None and not isinstance(value, str): 116 msg = '{} must be a str or None, not {}' 117 raise XMLSchemaTypeError(msg.format(name, type(value).__name__)) 118 119 elif name in {'strip_namespaces', 'preserve_root', 'force_dict', 'force_list'}: 120 if not isinstance(value, bool): 121 msg = '{} must be a bool, not {}' 122 raise XMLSchemaTypeError(msg.format(name, type(value).__name__)) 123 124 elif name == 'indent': 125 if isinstance(value, bool) or not isinstance(value, int): 126 msg = '{} must be an int, not {}' 127 raise XMLSchemaTypeError(msg.format(name, type(value).__name__)) 128 129 elif name == 'dict': 130 if not issubclass(value, MutableMapping): 131 msg = '{!r} must be a MutableMapping subclass, not {}' 132 raise XMLSchemaTypeError(msg.format(name, value)) 133 134 elif name == 'list': 135 if not issubclass(value, MutableSequence): 136 msg = '{!r} must be a MutableSequence subclass, not {}' 137 raise XMLSchemaTypeError(msg.format(name, value)) 138 139 super(XMLSchemaConverter, self).__setattr__(name, value) 140 141 @property 142 def lossy(self) -> bool: 143 """The converter ignores some kind of XML data during decoding/encoding.""" 144 return self.cdata_prefix is None or self.text_key is None or self.attr_prefix is None 145 146 @property 147 def losslessly(self) -> bool: 148 """ 149 The XML data is decoded without loss of quality, neither on data nor on data model 150 shape. Only losslessly converters can be always used to encode to an XML data that 151 is strictly conformant to the schema. 152 """ 153 return False 154 155 def copy(self, **kwargs: Any) -> 'XMLSchemaConverter': 156 return type(self)( 157 namespaces=kwargs.get('namespaces', self._namespaces), 158 dict_class=kwargs.get('dict_class', self.dict), 159 list_class=kwargs.get('list_class', self.list), 160 etree_element_class=kwargs.get('etree_element_class'), 161 text_key=kwargs.get('text_key', self.text_key), 162 attr_prefix=kwargs.get('attr_prefix', self.attr_prefix), 163 cdata_prefix=kwargs.get('cdata_prefix', self.cdata_prefix), 164 indent=kwargs.get('indent', self.indent), 165 strip_namespaces=kwargs.get('strip_namespaces', self.strip_namespaces), 166 preserve_root=kwargs.get('preserve_root', self.preserve_root), 167 force_dict=kwargs.get('force_dict', self.force_dict), 168 force_list=kwargs.get('force_list', self.force_list), 169 ) 170 171 def map_attributes(self, attributes: Iterable[Tuple[str, Any]]) \ 172 -> Iterator[Tuple[str, Any]]: 173 """ 174 Creates an iterator for converting decoded attributes to a data structure with 175 appropriate prefixes. If the instance has a not-empty map of namespaces registers 176 the mapped URIs and prefixes. 177 178 :param attributes: A sequence or an iterator of couples with the name of \ 179 the attribute and the decoded value. Default is `None` (for `simpleType` \ 180 elements, that don't have attributes). 181 """ 182 if self.attr_prefix is None or not attributes: 183 return 184 elif self.attr_prefix: 185 for name, value in attributes: 186 yield '%s%s' % (self.attr_prefix, self.map_qname(name)), value 187 else: 188 for name, value in attributes: 189 yield self.map_qname(name), value 190 191 def map_content(self, content: Iterable[Tuple[str, Any, Any]]) \ 192 -> Iterator[Tuple[str, Any, Any]]: 193 """ 194 A generator function for converting decoded content to a data structure. 195 If the instance has a not-empty map of namespaces registers the mapped URIs 196 and prefixes. 197 198 :param content: A sequence or an iterator of tuples with the name of the \ 199 element, the decoded value and the `XsdElement` instance associated. 200 """ 201 if not content: 202 return 203 204 for name, value, xsd_child in content: 205 try: 206 if name[0] == '{': 207 yield self.map_qname(name), value, xsd_child 208 else: 209 yield name, value, xsd_child 210 except TypeError: 211 if self.cdata_prefix is not None: 212 yield '%s%s' % (self.cdata_prefix, name), value, xsd_child 213 214 def etree_element(self, tag: str, 215 text: Optional[str] = None, 216 children: Optional[List[ElementType]] = None, 217 attrib: Optional[Dict[str, str]] = None, 218 level: int = 0) -> ElementType: 219 """ 220 Builds an ElementTree's Element using arguments and the element class and 221 the indent spacing stored in the converter instance. 222 223 :param tag: the Element tag string. 224 :param text: the Element text. 225 :param children: the list of Element children/subelements. 226 :param attrib: a dictionary with Element attributes. 227 :param level: the level related to the encoding process (0 means the root). 228 :return: an instance of the Element class is set for the converter instance. 229 """ 230 if type(self.etree_element_class) is type(etree_element): 231 if attrib is None: 232 elem = self.etree_element_class(tag) 233 else: 234 elem = self.etree_element_class(tag, self.dict(attrib)) 235 else: 236 # FIXME: need a more refined check 237 nsmap = {prefix if prefix else None: uri 238 for prefix, uri in self._namespaces.items() if uri} 239 elem = self.etree_element_class(tag, nsmap=nsmap) # type: ignore[arg-type] 240 elem.attrib.update(attrib) # type: ignore[arg-type] 241 242 if children: 243 elem.extend(children) 244 elem.text = text or '\n' + ' ' * self.indent * (level + 1) 245 elem.tail = '\n' + ' ' * self.indent * level 246 else: 247 elem.text = text 248 elem.tail = '\n' + ' ' * self.indent * level 249 250 return elem 251 252 def element_decode(self, data: ElementData, xsd_element: 'XsdElement', 253 xsd_type: Optional[BaseXsdType] = None, level: int = 0) -> Any: 254 """ 255 Converts a decoded element data to a data structure. 256 257 :param data: ElementData instance decoded from an Element node. 258 :param xsd_element: the `XsdElement` associated to decoded the data. 259 :param xsd_type: optional XSD type for supporting dynamic type through \ 260 *xsi:type* or xs:alternative. 261 :param level: the level related to the decoding process (0 means the root). 262 :return: a data structure containing the decoded data. 263 """ 264 xsd_type = xsd_type or xsd_element.type 265 result_dict = self.dict() 266 if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self: 267 schema_namespaces = set(xsd_element.namespaces.values()) 268 result_dict.update( 269 ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v) 270 for k, v in self._namespaces.items() 271 if v in schema_namespaces or v == XSI_NAMESPACE 272 ) 273 274 xsd_group = xsd_type.model_group 275 if xsd_group is None: 276 if data.attributes or self.force_dict and not xsd_type.is_simple(): 277 result_dict.update(t for t in self.map_attributes(data.attributes)) 278 if data.text is not None and data.text != '' and self.text_key is not None: 279 result_dict[self.text_key] = data.text 280 return result_dict 281 else: 282 return data.text if data.text != '' else None 283 else: 284 if data.attributes: 285 result_dict.update(t for t in self.map_attributes(data.attributes)) 286 287 has_single_group = xsd_group.is_single() 288 if data.content: 289 for name, value, xsd_child in self.map_content(data.content): 290 try: 291 result = result_dict[name] 292 except KeyError: 293 if xsd_child is None or has_single_group and xsd_child.is_single(): 294 result_dict[name] = self.list([value]) if self.force_list else value 295 else: 296 result_dict[name] = self.list([value]) 297 else: 298 if not isinstance(result, MutableSequence) or not result: 299 result_dict[name] = self.list([result, value]) 300 elif isinstance(result[0], MutableSequence) or \ 301 not isinstance(value, MutableSequence): 302 result.append(value) 303 else: 304 result_dict[name] = self.list([result, value]) 305 306 elif data.text is not None and data.text != '' and self.text_key is not None: 307 result_dict[self.text_key] = data.text 308 309 if level == 0 and self.preserve_root: 310 return self.dict( 311 [(self.map_qname(data.tag), result_dict if result_dict else None)] 312 ) 313 314 if not result_dict: 315 return None 316 elif len(result_dict) == 1 and self.text_key in result_dict: 317 return result_dict[self.text_key] 318 return result_dict 319 320 def element_encode(self, obj: Any, xsd_element: 'XsdElement', level: int = 0) -> ElementData: 321 """ 322 Extracts XML decoded data from a data structure for encoding into an ElementTree. 323 324 :param obj: the decoded object. 325 :param xsd_element: the `XsdElement` associated to the decoded data structure. 326 :param level: the level related to the encoding process (0 means the root). 327 :return: an ElementData instance. 328 """ 329 if level != 0: 330 tag = xsd_element.name 331 else: 332 if xsd_element.is_global(): 333 tag = xsd_element.qualified_name 334 else: 335 tag = xsd_element.name 336 if self.preserve_root and isinstance(obj, MutableMapping): 337 match_local_name = cast(bool, self.strip_namespaces or self.default_namespace) 338 match = xsd_element.get_matching_item(obj, self.ns_prefix, match_local_name) 339 if match is not None: 340 obj = match 341 342 if not isinstance(obj, MutableMapping): 343 if xsd_element.type.simple_type is not None: 344 return ElementData(tag, obj, None, {}) 345 elif xsd_element.type.mixed and isinstance(obj, (str, bytes)): 346 return ElementData(tag, None, [(1, obj)], {}) 347 else: 348 return ElementData(tag, None, obj, {}) 349 350 text = None 351 content: List[Tuple[Union[int, str], Any]] = [] 352 attributes = {} 353 354 for name, value in obj.items(): 355 if name == self.text_key: 356 text = value 357 elif self.cdata_prefix is not None and \ 358 name.startswith(self.cdata_prefix) and \ 359 name[len(self.cdata_prefix):].isdigit(): 360 index = int(name[len(self.cdata_prefix):]) 361 content.append((index, value)) 362 elif name == self.ns_prefix: 363 self[''] = value 364 elif name.startswith('%s:' % self.ns_prefix): 365 if not self.strip_namespaces: 366 self[name[len(self.ns_prefix) + 1:]] = value 367 elif self.attr_prefix and \ 368 name.startswith(self.attr_prefix) and \ 369 name != self.attr_prefix: 370 attr_name = name[len(self.attr_prefix):] 371 ns_name = self.unmap_qname(attr_name, xsd_element.attributes) 372 attributes[ns_name] = value 373 elif not isinstance(value, MutableSequence) or not value: 374 content.append((self.unmap_qname(name), value)) 375 elif isinstance(value[0], (MutableMapping, MutableSequence)): 376 ns_name = self.unmap_qname(name) 377 content.extend((ns_name, item) for item in value) 378 else: 379 xsd_group = xsd_element.type.model_group 380 if xsd_group is None: 381 # fallback to xs:anyType encoder 382 xsd_group = xsd_element.any_type.model_group 383 assert xsd_group is not None 384 385 ns_name = self.unmap_qname(name) 386 for xsd_child in xsd_group.iter_elements(): 387 matched_element = xsd_child.match(ns_name, resolve=True) 388 if matched_element is not None: 389 if matched_element.type and matched_element.type.is_list(): 390 content.append((ns_name, value)) 391 else: 392 content.extend((ns_name, item) for item in value) 393 break 394 else: 395 if self.attr_prefix == '' and ns_name not in attributes: 396 for key, xsd_attribute in xsd_element.attributes.items(): 397 if key and xsd_attribute.is_matching(ns_name): 398 attributes[key] = value 399 break 400 else: 401 content.append((ns_name, value)) 402 else: 403 content.append((ns_name, value)) 404 405 return ElementData(tag, text, content, attributes) 406