1#!/usr/bin/python 2 3""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """ 4 5from __future__ import print_function 6from copy import deepcopy 7from lxml import etree as ET 8import random, re, io 9 10 11########################### 12# The XmlMutatorMin class # 13########################### 14 15 16class XmlMutatorMin: 17 18 """ 19 Optionals parameters: 20 seed Seed used by the PRNG (default: "RANDOM") 21 verbose Verbosity (default: False) 22 """ 23 24 def __init__(self, seed="RANDOM", verbose=False): 25 26 """ Initialize seed, database and mutators """ 27 28 # Verbosity 29 self.verbose = verbose 30 31 # Initialize PRNG 32 self.seed = str(seed) 33 if self.seed == "RANDOM": 34 random.seed() 35 else: 36 if self.verbose: 37 print("Static seed '%s'" % self.seed) 38 random.seed(self.seed) 39 40 # Initialize input and output documents 41 self.input_tree = None 42 self.tree = None 43 44 # High-level mutators (no database needed) 45 hl_mutators_delete = [ 46 "del_node_and_children", 47 "del_node_but_children", 48 "del_attribute", 49 "del_content", 50 ] # Delete items 51 hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values 52 53 # Exposed mutators 54 self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete 55 56 def __parse_xml(self, xml): 57 58 """ Parse an XML string. Basic wrapper around lxml.parse() """ 59 60 try: 61 # Function parse() takes care of comments / DTD / processing instructions / ... 62 tree = ET.parse(io.BytesIO(xml)) 63 except ET.ParseError: 64 raise RuntimeError("XML isn't well-formed!") 65 except LookupError as e: 66 raise RuntimeError(e) 67 68 # Return a document wrapper 69 return tree 70 71 def __exec_among(self, module, functions, min_times, max_times): 72 73 """ Randomly execute $functions between $min and $max times """ 74 75 for i in xrange(random.randint(min_times, max_times)): 76 # Function names are mangled because they are "private" 77 getattr(module, "_XmlMutatorMin__" + random.choice(functions))() 78 79 def __serialize_xml(self, tree): 80 81 """ Serialize a XML document. Basic wrapper around lxml.tostring() """ 82 83 return ET.tostring( 84 tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding 85 ) 86 87 def __ver(self, version): 88 89 """ Helper for displaying lxml version numbers """ 90 91 return ".".join(map(str, version)) 92 93 def reset(self): 94 95 """ Reset the mutator """ 96 97 self.tree = deepcopy(self.input_tree) 98 99 def init_from_string(self, input_string): 100 101 """ Initialize the mutator from a XML string """ 102 103 # Get a pointer to the top-element 104 self.input_tree = self.__parse_xml(input_string) 105 106 # Get a working copy 107 self.tree = deepcopy(self.input_tree) 108 109 def save_to_string(self): 110 111 """ Return the current XML document as UTF-8 string """ 112 113 # Return a text version of the tree 114 return self.__serialize_xml(self.tree) 115 116 def __pick_element(self, exclude_root_node=False): 117 118 """ Pick a random element from the current document """ 119 120 # Get a list of all elements, but nodes like PI and comments 121 elems = list(self.tree.getroot().iter(tag=ET.Element)) 122 123 # Is the root node excluded? 124 if exclude_root_node: 125 start = 1 126 else: 127 start = 0 128 129 # Pick a random element 130 try: 131 elem_id = random.randint(start, len(elems) - 1) 132 elem = elems[elem_id] 133 except ValueError: 134 # Should only occurs if "exclude_root_node = True" 135 return (None, None) 136 137 return (elem_id, elem) 138 139 def __fuzz_attribute(self): 140 141 """ Fuzz (part of) an attribute value """ 142 143 # Select a node to modify 144 (rand_elem_id, rand_elem) = self.__pick_element() 145 146 # Get all the attributes 147 attribs = rand_elem.keys() 148 149 # Is there attributes? 150 if len(attribs) < 1: 151 if self.verbose: 152 print("No attribute: can't replace!") 153 return 154 155 # Pick a random attribute 156 rand_attrib_id = random.randint(0, len(attribs) - 1) 157 rand_attrib = attribs[rand_attrib_id] 158 159 # We have the attribute to modify 160 # Get its value 161 attrib_value = rand_elem.get(rand_attrib) 162 # print("- Value: " + attrib_value) 163 164 # Should we work on the whole value? 165 func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)" 166 p = re.compile(func_call) 167 l = p.findall(attrib_value) 168 if random.choice((True, False)) and l: 169 # Randomly pick one the function calls 170 (func, args) = random.choice(l) 171 # Split by "," and randomly pick one of the arguments 172 value = random.choice(args.split(",")) 173 # Remove superfluous characters 174 unclean_value = value 175 value = value.strip(" ").strip("'") 176 # print("Selected argument: [%s]" % value) 177 else: 178 value = attrib_value 179 180 # For each type, define some possible replacement values 181 choices_number = ( 182 "0", 183 "11111", 184 "-128", 185 "2", 186 "-1", 187 "1/3", 188 "42/0", 189 "1094861636 idiv 1.0", 190 "-1123329771506872 idiv 3.8", 191 "17=$numericRTF", 192 str(3 + random.randrange(0, 100)), 193 ) 194 195 choices_letter = ( 196 "P" * (25 * random.randrange(1, 100)), 197 "%s%s%s%s%s%s", 198 "foobar", 199 ) 200 201 choices_alnum = ( 202 "Abc123", 203 "020F0302020204030204", 204 "020F0302020204030204" * (random.randrange(5, 20)), 205 ) 206 207 # Fuzz the value 208 if random.choice((True, False)) and value == "": 209 210 # Empty 211 new_value = value 212 213 elif random.choice((True, False)) and value.isdigit(): 214 215 # Numbers 216 new_value = random.choice(choices_number) 217 218 elif random.choice((True, False)) and value.isalpha(): 219 220 # Letters 221 new_value = random.choice(choices_letter) 222 223 elif random.choice((True, False)) and value.isalnum(): 224 225 # Alphanumeric 226 new_value = random.choice(choices_alnum) 227 228 else: 229 230 # Default type 231 new_value = random.choice(choices_alnum + choices_letter + choices_number) 232 233 # If we worked on a substring, apply changes to the whole string 234 if value != attrib_value: 235 # No ' around empty values 236 if new_value != "" and value != "": 237 new_value = "'" + new_value + "'" 238 # Apply changes 239 new_value = attrib_value.replace(unclean_value, new_value) 240 241 # Log something 242 if self.verbose: 243 print( 244 "Fuzzing attribute #%i '%s' of tag #%i '%s'" 245 % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) 246 ) 247 248 # Modify the attribute 249 rand_elem.set(rand_attrib, new_value.decode("utf-8")) 250 251 def __del_node_and_children(self): 252 253 """High-level minimizing mutator 254 Delete a random node and its children (i.e. delete a random tree)""" 255 256 self.__del_node(True) 257 258 def __del_node_but_children(self): 259 260 """High-level minimizing mutator 261 Delete a random node but its children (i.e. link them to the parent of the deleted node)""" 262 263 self.__del_node(False) 264 265 def __del_node(self, delete_children): 266 267 """ Called by the __del_node_* mutators """ 268 269 # Select a node to modify (but the root one) 270 (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True) 271 272 # If the document includes only a top-level element 273 # Then we can't pick a element (given that "exclude_root_node = True") 274 275 # Is the document deep enough? 276 if rand_elem is None: 277 if self.verbose: 278 print("Can't delete a node: document not deep enough!") 279 return 280 281 # Log something 282 if self.verbose: 283 but_or_and = "and" if delete_children else "but" 284 print( 285 "Deleting tag #%i '%s' %s its children" 286 % (rand_elem_id, rand_elem.tag, but_or_and) 287 ) 288 289 if delete_children is False: 290 # Link children of the random (soon to be deleted) node to its parent 291 for child in rand_elem: 292 rand_elem.getparent().append(child) 293 294 # Remove the node 295 rand_elem.getparent().remove(rand_elem) 296 297 def __del_content(self): 298 299 """High-level minimizing mutator 300 Delete the attributes and children of a random node""" 301 302 # Select a node to modify 303 (rand_elem_id, rand_elem) = self.__pick_element() 304 305 # Log something 306 if self.verbose: 307 print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag)) 308 309 # Reset the node 310 rand_elem.clear() 311 312 def __del_attribute(self): 313 314 """High-level minimizing mutator 315 Delete a random attribute from a random node""" 316 317 # Select a node to modify 318 (rand_elem_id, rand_elem) = self.__pick_element() 319 320 # Get all the attributes 321 attribs = rand_elem.keys() 322 323 # Is there attributes? 324 if len(attribs) < 1: 325 if self.verbose: 326 print("No attribute: can't delete!") 327 return 328 329 # Pick a random attribute 330 rand_attrib_id = random.randint(0, len(attribs) - 1) 331 rand_attrib = attribs[rand_attrib_id] 332 333 # Log something 334 if self.verbose: 335 print( 336 "Deleting attribute #%i '%s' of tag #%i '%s'" 337 % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) 338 ) 339 340 # Delete the attribute 341 rand_elem.attrib.pop(rand_attrib) 342 343 def mutate(self, min=1, max=5): 344 345 """ Execute some high-level mutators between $min and $max times, then some medium-level ones """ 346 347 # High-level mutation 348 self.__exec_among(self, self.hl_mutators_all, min, max) 349