1""" 2Various XML utilities 3""" 4 5import re 6import string # pylint: disable=deprecated-module 7from xml.etree import ElementTree 8 9import salt.utils.data 10 11 12def _conv_name(x): 13 """ 14 If this XML tree has an xmlns attribute, then etree will add it 15 to the beginning of the tag, like: "{http://path}tag". 16 """ 17 if "}" in x: 18 comps = x.split("}") 19 name = comps[1] 20 return name 21 return x 22 23 24def _to_dict(xmltree): 25 """ 26 Converts an XML ElementTree to a dictionary that only contains items. 27 This is the default behavior in version 2017.7. This will default to prevent 28 unexpected parsing issues on modules dependent on this. 29 """ 30 # If this object has no children, the for..loop below will return nothing 31 # for it, so just return a single dict representing it. 32 if not xmltree: 33 name = _conv_name(xmltree.tag) 34 return {name: xmltree.text} 35 36 xmldict = {} 37 for item in xmltree: 38 name = _conv_name(item.tag) 39 40 if name not in xmldict: 41 if item: 42 xmldict[name] = _to_dict(item) 43 else: 44 xmldict[name] = item.text 45 else: 46 # If a tag appears more than once in the same place, convert it to 47 # a list. This may require that the caller watch for such a thing 48 # to happen, and behave accordingly. 49 if not isinstance(xmldict[name], list): 50 xmldict[name] = [xmldict[name]] 51 xmldict[name].append(_to_dict(item)) 52 return xmldict 53 54 55def _to_full_dict(xmltree): 56 """ 57 Returns the full XML dictionary including attributes. 58 """ 59 xmldict = {} 60 61 for attrName, attrValue in xmltree.attrib.items(): 62 xmldict[attrName] = attrValue 63 64 if not xmltree: 65 if not xmldict: 66 # If we don't have attributes, we should return the value as a string 67 # ex: <entry>test</entry> 68 return xmltree.text 69 elif xmltree.text: 70 # XML allows for empty sets with attributes, so we need to make sure that capture this. 71 # ex: <entry name="test"/> 72 xmldict[_conv_name(xmltree.tag)] = xmltree.text 73 74 for item in xmltree: 75 name = _conv_name(item.tag) 76 77 if name not in xmldict: 78 xmldict[name] = _to_full_dict(item) 79 else: 80 # If a tag appears more than once in the same place, convert it to 81 # a list. This may require that the caller watch for such a thing 82 # to happen, and behave accordingly. 83 if not isinstance(xmldict[name], list): 84 xmldict[name] = [xmldict[name]] 85 86 xmldict[name].append(_to_full_dict(item)) 87 88 return xmldict 89 90 91def to_dict(xmltree, attr=False): 92 """ 93 Convert an XML tree into a dict. The tree that is passed in must be an 94 ElementTree object. 95 Args: 96 xmltree: An ElementTree object. 97 attr: If true, attributes will be parsed. If false, they will be ignored. 98 99 """ 100 if attr: 101 return _to_full_dict(xmltree) 102 else: 103 return _to_dict(xmltree) 104 105 106def get_xml_node(node, xpath): 107 """ 108 Get an XML node using a path (super simple xpath showing complete node ancestry). 109 This also creates the missing nodes. 110 111 The supported XPath can contain elements filtering using [@attr='value']. 112 113 Args: 114 node: an Element object 115 xpath: simple XPath to look for. 116 """ 117 if not xpath.startswith("./"): 118 xpath = "./{}".format(xpath) 119 res = node.find(xpath) 120 if res is None: 121 parent_xpath = xpath[: xpath.rfind("/")] 122 parent = node.find(parent_xpath) 123 if parent is None: 124 parent = get_xml_node(node, parent_xpath) 125 segment = xpath[xpath.rfind("/") + 1 :] 126 # We may have [] filter in the segment 127 matcher = re.match( 128 r"""(?P<tag>[^[]+)(?:\[@(?P<attr>\w+)=["'](?P<value>[^"']+)["']])?""", 129 segment, 130 ) 131 attrib = ( 132 {matcher.group("attr"): matcher.group("value")} 133 if matcher.group("attr") and matcher.group("value") 134 else {} 135 ) 136 res = ElementTree.SubElement(parent, matcher.group("tag"), attrib) 137 return res 138 139 140def set_node_text(node, value): 141 """ 142 Function to use in the ``set`` value in the :py:func:`change_xml` mapping items to set the text. 143 This is the default. 144 145 :param node: the node to set the text to 146 :param value: the value to set 147 """ 148 node.text = str(value) 149 150 151def clean_node(parent_map, node, ignored=None): 152 """ 153 Remove the node from its parent if it has no attribute but the ignored ones, no text and no child. 154 Recursively called up to the document root to ensure no empty node is left. 155 156 :param parent_map: dictionary mapping each node to its parent 157 :param node: the node to clean 158 :param ignored: a list of ignored attributes. 159 :return: True if anything has been removed, False otherwise 160 """ 161 has_text = node.text is not None and node.text.strip() 162 parent = parent_map.get(node) 163 removed = False 164 if ( 165 len(node.attrib.keys() - (ignored or [])) == 0 166 and not list(node) 167 and not has_text 168 and parent 169 ): 170 parent.remove(node) 171 removed = True 172 # Clean parent nodes if needed 173 if parent is not None: 174 parent_cleaned = clean_node(parent_map, parent, ignored) 175 removed = removed or parent_cleaned 176 return removed 177 178 179def del_text(parent_map, node): 180 """ 181 Function to use as ``del`` value in the :py:func:`change_xml` mapping items to remove the text. 182 This is the default function. 183 Calls :py:func:`clean_node` before returning. 184 """ 185 parent = parent_map[node] 186 parent.remove(node) 187 clean_node(parent, node) 188 return True 189 190 191def del_attribute(attribute, ignored=None): 192 """ 193 Helper returning a function to use as ``del`` value in the :py:func:`change_xml` mapping items to 194 remove an attribute. 195 196 The generated function calls :py:func:`clean_node` before returning. 197 198 :param attribute: the name of the attribute to remove 199 :param ignored: the list of attributes to ignore during the cleanup 200 201 :return: the function called by :py:func:`change_xml`. 202 """ 203 204 def _do_delete(parent_map, node): 205 if attribute not in node.keys(): 206 return False 207 node.attrib.pop(attribute) 208 clean_node(parent_map, node, ignored) 209 return True 210 211 return _do_delete 212 213 214def attribute(path, xpath, attr_name, ignored=None, convert=None): 215 """ 216 Helper function creating a change_xml mapping entry for a text XML attribute. 217 218 :param path: the path to the value in the data 219 :param xpath: the xpath to the node holding the attribute 220 :param attr_name: the attribute name 221 :param ignored: the list of attributes to ignore when cleaning up the node 222 :param convert: a function used to convert the value 223 """ 224 entry = { 225 "path": path, 226 "xpath": xpath, 227 "get": lambda n: n.get(attr_name), 228 "set": lambda n, v: n.set(attr_name, str(v)), 229 "del": salt.utils.xmlutil.del_attribute(attr_name, ignored), 230 } 231 if convert: 232 entry["convert"] = convert 233 return entry 234 235 236def int_attribute(path, xpath, attr_name, ignored=None): 237 """ 238 Helper function creating a change_xml mapping entry for a text XML integer attribute. 239 240 :param path: the path to the value in the data 241 :param xpath: the xpath to the node holding the attribute 242 :param attr_name: the attribute name 243 :param ignored: the list of attributes to ignore when cleaning up the node 244 """ 245 return { 246 "path": path, 247 "xpath": xpath, 248 "get": lambda n: int(n.get(attr_name)) if n.get(attr_name) else None, 249 "set": lambda n, v: n.set(attr_name, str(v)), 250 "del": salt.utils.xmlutil.del_attribute(attr_name, ignored), 251 } 252 253 254def change_xml(doc, data, mapping): 255 """ 256 Change an XML ElementTree document according. 257 258 :param doc: the ElementTree parsed XML document to modify 259 :param data: the dictionary of values used to modify the XML. 260 :param mapping: a list of items describing how to modify the XML document. 261 Each item is a dictionary containing the following keys: 262 263 .. glossary:: 264 path 265 the path to the value to set or remove in the ``data`` parameter. 266 See :py:func:`salt.utils.data.get_value <salt.utils.data.get_value>` for the format 267 of the value. 268 269 xpath 270 Simplified XPath expression used to locate the change in the XML tree. 271 See :py:func:`get_xml_node` documentation for details on the supported XPath syntax 272 273 get 274 function gettin the value from the XML. 275 Takes a single parameter for the XML node found by the XPath expression. 276 Default returns the node text value. 277 This may be used to return an attribute or to perform value transformation. 278 279 set 280 function setting the value in the XML. 281 Takes two parameters for the XML node and the value to set. 282 Default is to set the text value. 283 284 del 285 function deleting the value in the XML. 286 Takes two parameters for the parent node and the node matched by the XPath. 287 Returns True if anything was removed, False otherwise. 288 Default is to remove the text value. 289 More cleanup may be performed, see the :py:func:`clean_node` function for details. 290 291 convert 292 function modifying the user-provided value right before comparing it with the one from the XML. 293 Takes the value as single parameter. 294 Default is to apply no conversion. 295 296 :return: ``True`` if the XML has been modified, ``False`` otherwise. 297 """ 298 need_update = False 299 for param in mapping: 300 # Get the value from the function parameter using the path-like description 301 # Using an empty list as a default value will cause values not provided by the user 302 # to be left untouched, as opposed to explicit None unsetting the value 303 values = salt.utils.data.get_value(data, param["path"], []) 304 xpath = param["xpath"] 305 # Prepend the xpath with ./ to handle the root more easily 306 if not xpath.startswith("./"): 307 xpath = "./{}".format(xpath) 308 309 placeholders = [ 310 s[1:-1] 311 for s in param["path"].split(":") 312 if s.startswith("{") and s.endswith("}") 313 ] 314 315 ctx = {placeholder: "$$$" for placeholder in placeholders} 316 all_nodes_xpath = string.Template(xpath).substitute(ctx) 317 all_nodes_xpath = re.sub( 318 r"""(?:=['"]\$\$\$["'])|(?:\[\$\$\$\])""", "", all_nodes_xpath 319 ) 320 321 # Store the nodes that are not removed for later cleanup 322 kept_nodes = set() 323 324 for value_item in values: 325 new_value = value_item["value"] 326 327 # Only handle simple type values. Use multiple entries or a custom get for dict or lists 328 if isinstance(new_value, list) or isinstance(new_value, dict): 329 continue 330 331 if new_value is not None: 332 # We need to increment ids from arrays since xpath starts at 1 333 converters = { 334 p: (lambda n: n + 1) 335 if "[${}]".format(p) in xpath 336 else (lambda n: n) 337 for p in placeholders 338 } 339 ctx = { 340 placeholder: converters[placeholder]( 341 value_item.get(placeholder, "") 342 ) 343 for placeholder in placeholders 344 } 345 node_xpath = string.Template(xpath).substitute(ctx) 346 node = get_xml_node(doc, node_xpath) 347 348 kept_nodes.add(node) 349 350 get_fn = param.get("get", lambda n: n.text) 351 set_fn = param.get("set", set_node_text) 352 current_value = get_fn(node) 353 354 # Do we need to apply some conversion to the user-provided value? 355 convert_fn = param.get("convert") 356 if convert_fn: 357 new_value = convert_fn(new_value) 358 359 # Allow custom comparison. Can be useful for almost equal numeric values 360 compare_fn = param.get("equals", lambda o, n: str(o) == str(n)) 361 if not compare_fn(current_value, new_value): 362 set_fn(node, new_value) 363 need_update = True 364 else: 365 nodes = doc.findall(all_nodes_xpath) 366 del_fn = param.get("del", del_text) 367 parent_map = {c: p for p in doc.iter() for c in p} 368 for node in nodes: 369 deleted = del_fn(parent_map, node) 370 need_update = need_update or deleted 371 372 # Clean the left over XML elements if there were placeholders 373 if placeholders and [v for v in values if v.get("value") != []]: 374 all_nodes = set(doc.findall(all_nodes_xpath)) 375 to_remove = all_nodes - kept_nodes 376 del_fn = param.get("del", del_text) 377 parent_map = {c: p for p in doc.iter() for c in p} 378 for node in to_remove: 379 deleted = del_fn(parent_map, node) 380 need_update = need_update or deleted 381 return need_update 382 383 384def strip_spaces(node): 385 """ 386 Remove all spaces and line breaks before and after nodes. 387 This helps comparing XML trees. 388 389 :param node: the XML node to remove blanks from 390 :return: the node 391 """ 392 393 if node.tail is not None: 394 node.tail = node.tail.strip(" \t\n") 395 if node.text is not None: 396 node.text = node.text.strip(" \t\n") 397 try: 398 for child in node: 399 strip_spaces(child) 400 except RecursionError: 401 raise Exception("Failed to recurse on the node") 402 403 return node 404 405 406def element_to_str(node): 407 """ 408 Serialize an XML node into a string 409 """ 410 return salt.utils.stringutils.to_str(ElementTree.tostring(node)) 411