1#
2# Copyright (c), 2016-2021, SISSA (International School for Advanced Studies).
3# All rights reserved.
4# This file is distributed under the terms of the MIT License.
5# See the file 'LICENSE' in the root directory of the present
6# distribution, or http://opensource.org/licenses/MIT.
7#
8# @author Davide Brunato <brunato@sissa.it>
9#
10from collections.abc import MutableMapping, MutableSequence
11from typing import TYPE_CHECKING, cast, Any, Dict, Iterator, Iterable, \
12    List, Optional, Type, Tuple, Union
13
14from ..exceptions import XMLSchemaTypeError
15from ..names import XSI_NAMESPACE
16from ..etree import etree_element, ElementData
17from ..aliases import NamespacesType, ElementType, BaseXsdType
18from ..namespaces import NamespaceMapper
19
20if TYPE_CHECKING:
21    from ..validators import XsdElement
22
23
24class XMLSchemaConverter(NamespaceMapper):
25    """
26    Generic XML Schema based converter class. A converter is used to compose
27    decoded XML data for an Element into a data structure and to build an Element
28    from encoded data structure. There are two methods for interfacing the
29    converter with the decoding/encoding process. The method *element_decode*
30    accepts an ElementData tuple, containing the element parts, and returns
31    a data structure. The method *element_encode* accepts a data structure and
32    returns an ElementData tuple. For default character data parts are ignored.
33    Prefixes and text key can be changed also using alphanumeric values but
34    ambiguities with schema elements could affect XML data re-encoding.
35
36    :param namespaces: map from namespace prefixes to URI.
37    :param dict_class: dictionary class to use for decoded data. Default is `dict`.
38    :param list_class: list class to use for decoded data. Default is `list`.
39    :param etree_element_class: the class that has to be used to create new XML elements, \
40    if not provided uses the ElementTree's Element class.
41    :param text_key: is the key to apply to element's decoded text data.
42    :param attr_prefix: controls the mapping of XML attributes, to the same name or \
43    with a prefix. If `None` the converter ignores attributes.
44    :param cdata_prefix: is used for including and prefixing the character data parts \
45    of a mixed content, that are labeled with an integer instead of a string. \
46    Character data parts are ignored if this argument is `None`.
47    :param indent: number of spaces for XML indentation (default is 4).
48    :param strip_namespaces: if set to `True` removes namespace declarations from data and \
49    namespace information from names, during decoding or encoding. Defaults to `False`.
50    :param preserve_root: if set to `True` the root element is preserved, wrapped into a \
51    single-item dictionary. Applicable only to default converter, to \
52    :class:`UnorderedConverter` and to :class:`ParkerConverter`.
53    :param force_dict: if set to `True` complex elements with simple content are decoded \
54    with a dictionary also if there are no decoded attributes. Applicable only to default \
55    converter and to :class:`UnorderedConverter`. Defaults to `False`.
56    :param force_list: if set to `True` child elements are decoded within a list in any case. \
57    Applicable only to default converter and to :class:`UnorderedConverter`. Defaults to `False`.
58
59    :ivar dict: dictionary class to use for decoded data.
60    :ivar list: list class to use for decoded data.
61    :ivar etree_element_class: Element class to use
62    :ivar text_key: key for decoded Element text
63    :ivar attr_prefix: prefix for attribute names
64    :ivar cdata_prefix: prefix for character data parts
65    :ivar indent: indentation to use for rebuilding XML trees
66    :ivar preserve_root: preserve the root element on decoding
67    :ivar force_dict: force dictionary for complex elements with simple content
68    :ivar force_list: force list for child elements
69    """
70    ns_prefix: str
71    dict: Type[Dict[str, Any]] = dict
72    list: Type[List[Any]] = list
73
74    etree_element_class: Type[ElementType]
75    etree_element_class = etree_element
76
77    __slots__ = ('text_key', 'ns_prefix', 'attr_prefix', 'cdata_prefix',
78                 'indent', 'preserve_root', 'force_dict', 'force_list')
79
80    def __init__(self, namespaces: Optional[NamespacesType] = None,
81                 dict_class: Optional[Type[Dict[str, Any]]] = None,
82                 list_class: Optional[Type[List[Any]]] = None,
83                 etree_element_class: Optional[Type[ElementType]] = None,
84                 text_key: Optional[str] = '$',
85                 attr_prefix: Optional[str] = '@',
86                 cdata_prefix: Optional[str] = None,
87                 indent: int = 4,
88                 strip_namespaces: bool = False,
89                 preserve_root: bool = False,
90                 force_dict: bool = False,
91                 force_list: bool = False,
92                 **kwargs: Any) -> None:
93
94        super(XMLSchemaConverter, self).__init__(namespaces, strip_namespaces)
95
96        if dict_class is not None:
97            self.dict = dict_class
98        if list_class is not None:
99            self.list = list_class
100        if etree_element_class is not None:
101            self.etree_element_class = etree_element_class
102
103        self.text_key = text_key
104        self.attr_prefix = attr_prefix
105        self.cdata_prefix = cdata_prefix
106        self.ns_prefix = 'xmlns' if attr_prefix is None else f'{attr_prefix}xmlns'
107
108        self.indent = indent
109        self.preserve_root = preserve_root
110        self.force_dict = force_dict
111        self.force_list = force_list
112
113    def __setattr__(self, name: str, value: Any) -> None:
114        if name in {'attr_prefix', 'text_key', 'cdata_prefix'}:
115            if value is not None and not isinstance(value, str):
116                msg = '{} must be a str or None, not {}'
117                raise XMLSchemaTypeError(msg.format(name, type(value).__name__))
118
119        elif name in {'strip_namespaces', 'preserve_root', 'force_dict', 'force_list'}:
120            if not isinstance(value, bool):
121                msg = '{} must be a bool, not {}'
122                raise XMLSchemaTypeError(msg.format(name, type(value).__name__))
123
124        elif name == 'indent':
125            if isinstance(value, bool) or not isinstance(value, int):
126                msg = '{} must be an int, not {}'
127                raise XMLSchemaTypeError(msg.format(name, type(value).__name__))
128
129        elif name == 'dict':
130            if not issubclass(value, MutableMapping):
131                msg = '{!r} must be a MutableMapping subclass, not {}'
132                raise XMLSchemaTypeError(msg.format(name, value))
133
134        elif name == 'list':
135            if not issubclass(value, MutableSequence):
136                msg = '{!r} must be a MutableSequence subclass, not {}'
137                raise XMLSchemaTypeError(msg.format(name, value))
138
139        super(XMLSchemaConverter, self).__setattr__(name, value)
140
141    @property
142    def lossy(self) -> bool:
143        """The converter ignores some kind of XML data during decoding/encoding."""
144        return self.cdata_prefix is None or self.text_key is None or self.attr_prefix is None
145
146    @property
147    def losslessly(self) -> bool:
148        """
149        The XML data is decoded without loss of quality, neither on data nor on data model
150        shape. Only losslessly converters can be always used to encode to an XML data that
151        is strictly conformant to the schema.
152        """
153        return False
154
155    def copy(self, **kwargs: Any) -> 'XMLSchemaConverter':
156        return type(self)(
157            namespaces=kwargs.get('namespaces', self._namespaces),
158            dict_class=kwargs.get('dict_class', self.dict),
159            list_class=kwargs.get('list_class', self.list),
160            etree_element_class=kwargs.get('etree_element_class'),
161            text_key=kwargs.get('text_key', self.text_key),
162            attr_prefix=kwargs.get('attr_prefix', self.attr_prefix),
163            cdata_prefix=kwargs.get('cdata_prefix', self.cdata_prefix),
164            indent=kwargs.get('indent', self.indent),
165            strip_namespaces=kwargs.get('strip_namespaces', self.strip_namespaces),
166            preserve_root=kwargs.get('preserve_root', self.preserve_root),
167            force_dict=kwargs.get('force_dict', self.force_dict),
168            force_list=kwargs.get('force_list', self.force_list),
169        )
170
171    def map_attributes(self, attributes: Iterable[Tuple[str, Any]]) \
172            -> Iterator[Tuple[str, Any]]:
173        """
174        Creates an iterator for converting decoded attributes to a data structure with
175        appropriate prefixes. If the instance has a not-empty map of namespaces registers
176        the mapped URIs and prefixes.
177
178        :param attributes: A sequence or an iterator of couples with the name of \
179        the attribute and the decoded value. Default is `None` (for `simpleType` \
180        elements, that don't have attributes).
181        """
182        if self.attr_prefix is None or not attributes:
183            return
184        elif self.attr_prefix:
185            for name, value in attributes:
186                yield '%s%s' % (self.attr_prefix, self.map_qname(name)), value
187        else:
188            for name, value in attributes:
189                yield self.map_qname(name), value
190
191    def map_content(self, content: Iterable[Tuple[str, Any, Any]]) \
192            -> Iterator[Tuple[str, Any, Any]]:
193        """
194        A generator function for converting decoded content to a data structure.
195        If the instance has a not-empty map of namespaces registers the mapped URIs
196        and prefixes.
197
198        :param content: A sequence or an iterator of tuples with the name of the \
199        element, the decoded value and the `XsdElement` instance associated.
200        """
201        if not content:
202            return
203
204        for name, value, xsd_child in content:
205            try:
206                if name[0] == '{':
207                    yield self.map_qname(name), value, xsd_child
208                else:
209                    yield name, value, xsd_child
210            except TypeError:
211                if self.cdata_prefix is not None:
212                    yield '%s%s' % (self.cdata_prefix, name), value, xsd_child
213
214    def etree_element(self, tag: str,
215                      text: Optional[str] = None,
216                      children: Optional[List[ElementType]] = None,
217                      attrib: Optional[Dict[str, str]] = None,
218                      level: int = 0) -> ElementType:
219        """
220        Builds an ElementTree's Element using arguments and the element class and
221        the indent spacing stored in the converter instance.
222
223        :param tag: the Element tag string.
224        :param text: the Element text.
225        :param children: the list of Element children/subelements.
226        :param attrib: a dictionary with Element attributes.
227        :param level: the level related to the encoding process (0 means the root).
228        :return: an instance of the Element class is set for the converter instance.
229        """
230        if type(self.etree_element_class) is type(etree_element):
231            if attrib is None:
232                elem = self.etree_element_class(tag)
233            else:
234                elem = self.etree_element_class(tag, self.dict(attrib))
235        else:
236            # FIXME: need a more refined check
237            nsmap = {prefix if prefix else None: uri
238                     for prefix, uri in self._namespaces.items() if uri}
239            elem = self.etree_element_class(tag, nsmap=nsmap)  # type: ignore[arg-type]
240            elem.attrib.update(attrib)  # type: ignore[arg-type]
241
242        if children:
243            elem.extend(children)
244            elem.text = text or '\n' + ' ' * self.indent * (level + 1)
245            elem.tail = '\n' + ' ' * self.indent * level
246        else:
247            elem.text = text
248            elem.tail = '\n' + ' ' * self.indent * level
249
250        return elem
251
252    def element_decode(self, data: ElementData, xsd_element: 'XsdElement',
253                       xsd_type: Optional[BaseXsdType] = None, level: int = 0) -> Any:
254        """
255        Converts a decoded element data to a data structure.
256
257        :param data: ElementData instance decoded from an Element node.
258        :param xsd_element: the `XsdElement` associated to decoded the data.
259        :param xsd_type: optional XSD type for supporting dynamic type through \
260        *xsi:type* or xs:alternative.
261        :param level: the level related to the decoding process (0 means the root).
262        :return: a data structure containing the decoded data.
263        """
264        xsd_type = xsd_type or xsd_element.type
265        result_dict = self.dict()
266        if level == 0 and xsd_element.is_global() and not self.strip_namespaces and self:
267            schema_namespaces = set(xsd_element.namespaces.values())
268            result_dict.update(
269                ('%s:%s' % (self.ns_prefix, k) if k else self.ns_prefix, v)
270                for k, v in self._namespaces.items()
271                if v in schema_namespaces or v == XSI_NAMESPACE
272            )
273
274        xsd_group = xsd_type.model_group
275        if xsd_group is None:
276            if data.attributes or self.force_dict and not xsd_type.is_simple():
277                result_dict.update(t for t in self.map_attributes(data.attributes))
278                if data.text is not None and data.text != '' and self.text_key is not None:
279                    result_dict[self.text_key] = data.text
280                return result_dict
281            else:
282                return data.text if data.text != '' else None
283        else:
284            if data.attributes:
285                result_dict.update(t for t in self.map_attributes(data.attributes))
286
287            has_single_group = xsd_group.is_single()
288            if data.content:
289                for name, value, xsd_child in self.map_content(data.content):
290                    try:
291                        result = result_dict[name]
292                    except KeyError:
293                        if xsd_child is None or has_single_group and xsd_child.is_single():
294                            result_dict[name] = self.list([value]) if self.force_list else value
295                        else:
296                            result_dict[name] = self.list([value])
297                    else:
298                        if not isinstance(result, MutableSequence) or not result:
299                            result_dict[name] = self.list([result, value])
300                        elif isinstance(result[0], MutableSequence) or \
301                                not isinstance(value, MutableSequence):
302                            result.append(value)
303                        else:
304                            result_dict[name] = self.list([result, value])
305
306            elif data.text is not None and data.text != '' and self.text_key is not None:
307                result_dict[self.text_key] = data.text
308
309            if level == 0 and self.preserve_root:
310                return self.dict(
311                    [(self.map_qname(data.tag), result_dict if result_dict else None)]
312                )
313
314            if not result_dict:
315                return None
316            elif len(result_dict) == 1 and self.text_key in result_dict:
317                return result_dict[self.text_key]
318            return result_dict
319
320    def element_encode(self, obj: Any, xsd_element: 'XsdElement', level: int = 0) -> ElementData:
321        """
322        Extracts XML decoded data from a data structure for encoding into an ElementTree.
323
324        :param obj: the decoded object.
325        :param xsd_element: the `XsdElement` associated to the decoded data structure.
326        :param level: the level related to the encoding process (0 means the root).
327        :return: an ElementData instance.
328        """
329        if level != 0:
330            tag = xsd_element.name
331        else:
332            if xsd_element.is_global():
333                tag = xsd_element.qualified_name
334            else:
335                tag = xsd_element.name
336            if self.preserve_root and isinstance(obj, MutableMapping):
337                match_local_name = cast(bool, self.strip_namespaces or self.default_namespace)
338                match = xsd_element.get_matching_item(obj, self.ns_prefix, match_local_name)
339                if match is not None:
340                    obj = match
341
342        if not isinstance(obj, MutableMapping):
343            if xsd_element.type.simple_type is not None:
344                return ElementData(tag, obj, None, {})
345            elif xsd_element.type.mixed and isinstance(obj, (str, bytes)):
346                return ElementData(tag, None, [(1, obj)], {})
347            else:
348                return ElementData(tag, None, obj, {})
349
350        text = None
351        content: List[Tuple[Union[int, str], Any]] = []
352        attributes = {}
353
354        for name, value in obj.items():
355            if name == self.text_key:
356                text = value
357            elif self.cdata_prefix is not None and \
358                    name.startswith(self.cdata_prefix) and \
359                    name[len(self.cdata_prefix):].isdigit():
360                index = int(name[len(self.cdata_prefix):])
361                content.append((index, value))
362            elif name == self.ns_prefix:
363                self[''] = value
364            elif name.startswith('%s:' % self.ns_prefix):
365                if not self.strip_namespaces:
366                    self[name[len(self.ns_prefix) + 1:]] = value
367            elif self.attr_prefix and \
368                    name.startswith(self.attr_prefix) and \
369                    name != self.attr_prefix:
370                attr_name = name[len(self.attr_prefix):]
371                ns_name = self.unmap_qname(attr_name, xsd_element.attributes)
372                attributes[ns_name] = value
373            elif not isinstance(value, MutableSequence) or not value:
374                content.append((self.unmap_qname(name), value))
375            elif isinstance(value[0], (MutableMapping, MutableSequence)):
376                ns_name = self.unmap_qname(name)
377                content.extend((ns_name, item) for item in value)
378            else:
379                xsd_group = xsd_element.type.model_group
380                if xsd_group is None:
381                    # fallback to xs:anyType encoder
382                    xsd_group = xsd_element.any_type.model_group
383                    assert xsd_group is not None
384
385                ns_name = self.unmap_qname(name)
386                for xsd_child in xsd_group.iter_elements():
387                    matched_element = xsd_child.match(ns_name, resolve=True)
388                    if matched_element is not None:
389                        if matched_element.type and matched_element.type.is_list():
390                            content.append((ns_name, value))
391                        else:
392                            content.extend((ns_name, item) for item in value)
393                        break
394                else:
395                    if self.attr_prefix == '' and ns_name not in attributes:
396                        for key, xsd_attribute in xsd_element.attributes.items():
397                            if key and xsd_attribute.is_matching(ns_name):
398                                attributes[key] = value
399                                break
400                        else:
401                            content.append((ns_name, value))
402                    else:
403                        content.append((ns_name, value))
404
405        return ElementData(tag, text, content, attributes)
406