1"""
2Various XML utilities
3"""
4
5import re
6import string  # pylint: disable=deprecated-module
7from xml.etree import ElementTree
8
9import salt.utils.data
10
11
12def _conv_name(x):
13    """
14    If this XML tree has an xmlns attribute, then etree will add it
15    to the beginning of the tag, like: "{http://path}tag".
16    """
17    if "}" in x:
18        comps = x.split("}")
19        name = comps[1]
20        return name
21    return x
22
23
24def _to_dict(xmltree):
25    """
26    Converts an XML ElementTree to a dictionary that only contains items.
27    This is the default behavior in version 2017.7. This will default to prevent
28    unexpected parsing issues on modules dependent on this.
29    """
30    # If this object has no children, the for..loop below will return nothing
31    # for it, so just return a single dict representing it.
32    if not xmltree:
33        name = _conv_name(xmltree.tag)
34        return {name: xmltree.text}
35
36    xmldict = {}
37    for item in xmltree:
38        name = _conv_name(item.tag)
39
40        if name not in xmldict:
41            if item:
42                xmldict[name] = _to_dict(item)
43            else:
44                xmldict[name] = item.text
45        else:
46            # If a tag appears more than once in the same place, convert it to
47            # a list. This may require that the caller watch for such a thing
48            # to happen, and behave accordingly.
49            if not isinstance(xmldict[name], list):
50                xmldict[name] = [xmldict[name]]
51            xmldict[name].append(_to_dict(item))
52    return xmldict
53
54
55def _to_full_dict(xmltree):
56    """
57    Returns the full XML dictionary including attributes.
58    """
59    xmldict = {}
60
61    for attrName, attrValue in xmltree.attrib.items():
62        xmldict[attrName] = attrValue
63
64    if not xmltree:
65        if not xmldict:
66            # If we don't have attributes, we should return the value as a string
67            # ex: <entry>test</entry>
68            return xmltree.text
69        elif xmltree.text:
70            # XML allows for empty sets with attributes, so we need to make sure that capture this.
71            # ex: <entry name="test"/>
72            xmldict[_conv_name(xmltree.tag)] = xmltree.text
73
74    for item in xmltree:
75        name = _conv_name(item.tag)
76
77        if name not in xmldict:
78            xmldict[name] = _to_full_dict(item)
79        else:
80            # If a tag appears more than once in the same place, convert it to
81            # a list. This may require that the caller watch for such a thing
82            # to happen, and behave accordingly.
83            if not isinstance(xmldict[name], list):
84                xmldict[name] = [xmldict[name]]
85
86            xmldict[name].append(_to_full_dict(item))
87
88    return xmldict
89
90
91def to_dict(xmltree, attr=False):
92    """
93    Convert an XML tree into a dict. The tree that is passed in must be an
94    ElementTree object.
95    Args:
96        xmltree: An ElementTree object.
97        attr: If true, attributes will be parsed. If false, they will be ignored.
98
99    """
100    if attr:
101        return _to_full_dict(xmltree)
102    else:
103        return _to_dict(xmltree)
104
105
106def get_xml_node(node, xpath):
107    """
108    Get an XML node using a path (super simple xpath showing complete node ancestry).
109    This also creates the missing nodes.
110
111    The supported XPath can contain elements filtering using [@attr='value'].
112
113    Args:
114        node: an Element object
115        xpath: simple XPath to look for.
116    """
117    if not xpath.startswith("./"):
118        xpath = "./{}".format(xpath)
119    res = node.find(xpath)
120    if res is None:
121        parent_xpath = xpath[: xpath.rfind("/")]
122        parent = node.find(parent_xpath)
123        if parent is None:
124            parent = get_xml_node(node, parent_xpath)
125        segment = xpath[xpath.rfind("/") + 1 :]
126        # We may have [] filter in the segment
127        matcher = re.match(
128            r"""(?P<tag>[^[]+)(?:\[@(?P<attr>\w+)=["'](?P<value>[^"']+)["']])?""",
129            segment,
130        )
131        attrib = (
132            {matcher.group("attr"): matcher.group("value")}
133            if matcher.group("attr") and matcher.group("value")
134            else {}
135        )
136        res = ElementTree.SubElement(parent, matcher.group("tag"), attrib)
137    return res
138
139
140def set_node_text(node, value):
141    """
142    Function to use in the ``set`` value in the :py:func:`change_xml` mapping items to set the text.
143    This is the default.
144
145    :param node: the node to set the text to
146    :param value: the value to set
147    """
148    node.text = str(value)
149
150
151def clean_node(parent_map, node, ignored=None):
152    """
153    Remove the node from its parent if it has no attribute but the ignored ones, no text and no child.
154    Recursively called up to the document root to ensure no empty node is left.
155
156    :param parent_map: dictionary mapping each node to its parent
157    :param node: the node to clean
158    :param ignored: a list of ignored attributes.
159    :return: True if anything has been removed, False otherwise
160    """
161    has_text = node.text is not None and node.text.strip()
162    parent = parent_map.get(node)
163    removed = False
164    if (
165        len(node.attrib.keys() - (ignored or [])) == 0
166        and not list(node)
167        and not has_text
168        and parent
169    ):
170        parent.remove(node)
171        removed = True
172    # Clean parent nodes if needed
173    if parent is not None:
174        parent_cleaned = clean_node(parent_map, parent, ignored)
175        removed = removed or parent_cleaned
176    return removed
177
178
179def del_text(parent_map, node):
180    """
181    Function to use as ``del`` value in the :py:func:`change_xml` mapping items to remove the text.
182    This is the default function.
183    Calls :py:func:`clean_node` before returning.
184    """
185    parent = parent_map[node]
186    parent.remove(node)
187    clean_node(parent, node)
188    return True
189
190
191def del_attribute(attribute, ignored=None):
192    """
193    Helper returning a function to use as ``del`` value in the :py:func:`change_xml` mapping items to
194    remove an attribute.
195
196    The generated function calls :py:func:`clean_node` before returning.
197
198    :param attribute: the name of the attribute to remove
199    :param ignored: the list of attributes to ignore during the cleanup
200
201    :return: the function called by :py:func:`change_xml`.
202    """
203
204    def _do_delete(parent_map, node):
205        if attribute not in node.keys():
206            return False
207        node.attrib.pop(attribute)
208        clean_node(parent_map, node, ignored)
209        return True
210
211    return _do_delete
212
213
214def attribute(path, xpath, attr_name, ignored=None, convert=None):
215    """
216    Helper function creating a change_xml mapping entry for a text XML attribute.
217
218    :param path: the path to the value in the data
219    :param xpath: the xpath to the node holding the attribute
220    :param attr_name: the attribute name
221    :param ignored: the list of attributes to ignore when cleaning up the node
222    :param convert: a function used to convert the value
223    """
224    entry = {
225        "path": path,
226        "xpath": xpath,
227        "get": lambda n: n.get(attr_name),
228        "set": lambda n, v: n.set(attr_name, str(v)),
229        "del": salt.utils.xmlutil.del_attribute(attr_name, ignored),
230    }
231    if convert:
232        entry["convert"] = convert
233    return entry
234
235
236def int_attribute(path, xpath, attr_name, ignored=None):
237    """
238    Helper function creating a change_xml mapping entry for a text XML integer attribute.
239
240    :param path: the path to the value in the data
241    :param xpath: the xpath to the node holding the attribute
242    :param attr_name: the attribute name
243    :param ignored: the list of attributes to ignore when cleaning up the node
244    """
245    return {
246        "path": path,
247        "xpath": xpath,
248        "get": lambda n: int(n.get(attr_name)) if n.get(attr_name) else None,
249        "set": lambda n, v: n.set(attr_name, str(v)),
250        "del": salt.utils.xmlutil.del_attribute(attr_name, ignored),
251    }
252
253
254def change_xml(doc, data, mapping):
255    """
256    Change an XML ElementTree document according.
257
258    :param doc: the ElementTree parsed XML document to modify
259    :param data: the dictionary of values used to modify the XML.
260    :param mapping: a list of items describing how to modify the XML document.
261        Each item is a dictionary containing the following keys:
262
263        .. glossary::
264            path
265                the path to the value to set or remove in the ``data`` parameter.
266                See :py:func:`salt.utils.data.get_value <salt.utils.data.get_value>` for the format
267                of the value.
268
269            xpath
270                Simplified XPath expression used to locate the change in the XML tree.
271                See :py:func:`get_xml_node` documentation for details on the supported XPath syntax
272
273            get
274                function gettin the value from the XML.
275                Takes a single parameter for the XML node found by the XPath expression.
276                Default returns the node text value.
277                This may be used to return an attribute or to perform value transformation.
278
279            set
280                function setting the value in the XML.
281                Takes two parameters for the XML node and the value to set.
282                Default is to set the text value.
283
284            del
285                function deleting the value in the XML.
286                Takes two parameters for the parent node and the node matched by the XPath.
287                Returns True if anything was removed, False otherwise.
288                Default is to remove the text value.
289                More cleanup may be performed, see the :py:func:`clean_node` function for details.
290
291            convert
292                function modifying the user-provided value right before comparing it with the one from the XML.
293                Takes the value as single parameter.
294                Default is to apply no conversion.
295
296    :return: ``True`` if the XML has been modified, ``False`` otherwise.
297    """
298    need_update = False
299    for param in mapping:
300        # Get the value from the function parameter using the path-like description
301        # Using an empty list as a default value will cause values not provided by the user
302        # to be left untouched, as opposed to explicit None unsetting the value
303        values = salt.utils.data.get_value(data, param["path"], [])
304        xpath = param["xpath"]
305        # Prepend the xpath with ./ to handle the root more easily
306        if not xpath.startswith("./"):
307            xpath = "./{}".format(xpath)
308
309        placeholders = [
310            s[1:-1]
311            for s in param["path"].split(":")
312            if s.startswith("{") and s.endswith("}")
313        ]
314
315        ctx = {placeholder: "$$$" for placeholder in placeholders}
316        all_nodes_xpath = string.Template(xpath).substitute(ctx)
317        all_nodes_xpath = re.sub(
318            r"""(?:=['"]\$\$\$["'])|(?:\[\$\$\$\])""", "", all_nodes_xpath
319        )
320
321        # Store the nodes that are not removed for later cleanup
322        kept_nodes = set()
323
324        for value_item in values:
325            new_value = value_item["value"]
326
327            # Only handle simple type values. Use multiple entries or a custom get for dict or lists
328            if isinstance(new_value, list) or isinstance(new_value, dict):
329                continue
330
331            if new_value is not None:
332                # We need to increment ids from arrays since xpath starts at 1
333                converters = {
334                    p: (lambda n: n + 1)
335                    if "[${}]".format(p) in xpath
336                    else (lambda n: n)
337                    for p in placeholders
338                }
339                ctx = {
340                    placeholder: converters[placeholder](
341                        value_item.get(placeholder, "")
342                    )
343                    for placeholder in placeholders
344                }
345                node_xpath = string.Template(xpath).substitute(ctx)
346                node = get_xml_node(doc, node_xpath)
347
348                kept_nodes.add(node)
349
350                get_fn = param.get("get", lambda n: n.text)
351                set_fn = param.get("set", set_node_text)
352                current_value = get_fn(node)
353
354                # Do we need to apply some conversion to the user-provided value?
355                convert_fn = param.get("convert")
356                if convert_fn:
357                    new_value = convert_fn(new_value)
358
359                # Allow custom comparison. Can be useful for almost equal numeric values
360                compare_fn = param.get("equals", lambda o, n: str(o) == str(n))
361                if not compare_fn(current_value, new_value):
362                    set_fn(node, new_value)
363                    need_update = True
364            else:
365                nodes = doc.findall(all_nodes_xpath)
366                del_fn = param.get("del", del_text)
367                parent_map = {c: p for p in doc.iter() for c in p}
368                for node in nodes:
369                    deleted = del_fn(parent_map, node)
370                    need_update = need_update or deleted
371
372        # Clean the left over XML elements if there were placeholders
373        if placeholders and [v for v in values if v.get("value") != []]:
374            all_nodes = set(doc.findall(all_nodes_xpath))
375            to_remove = all_nodes - kept_nodes
376            del_fn = param.get("del", del_text)
377            parent_map = {c: p for p in doc.iter() for c in p}
378            for node in to_remove:
379                deleted = del_fn(parent_map, node)
380                need_update = need_update or deleted
381    return need_update
382
383
384def strip_spaces(node):
385    """
386    Remove all spaces and line breaks before and after nodes.
387    This helps comparing XML trees.
388
389    :param node: the XML node to remove blanks from
390    :return: the node
391    """
392
393    if node.tail is not None:
394        node.tail = node.tail.strip(" \t\n")
395    if node.text is not None:
396        node.text = node.text.strip(" \t\n")
397    try:
398        for child in node:
399            strip_spaces(child)
400    except RecursionError:
401        raise Exception("Failed to recurse on the node")
402
403    return node
404
405
406def element_to_str(node):
407    """
408    Serialize an XML node into a string
409    """
410    return salt.utils.stringutils.to_str(ElementTree.tostring(node))
411