#!/usr/bin/python """ Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """ from __future__ import print_function from copy import deepcopy from lxml import etree as ET import random, re, io ########################### # The XmlMutatorMin class # ########################### class XmlMutatorMin: """ Optionals parameters: seed Seed used by the PRNG (default: "RANDOM") verbose Verbosity (default: False) """ def __init__(self, seed="RANDOM", verbose=False): """ Initialize seed, database and mutators """ # Verbosity self.verbose = verbose # Initialize PRNG self.seed = str(seed) if self.seed == "RANDOM": random.seed() else: if self.verbose: print("Static seed '%s'" % self.seed) random.seed(self.seed) # Initialize input and output documents self.input_tree = None self.tree = None # High-level mutators (no database needed) hl_mutators_delete = [ "del_node_and_children", "del_node_but_children", "del_attribute", "del_content", ] # Delete items hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values # Exposed mutators self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete def __parse_xml(self, xml): """ Parse an XML string. Basic wrapper around lxml.parse() """ try: # Function parse() takes care of comments / DTD / processing instructions / ... tree = ET.parse(io.BytesIO(xml)) except ET.ParseError: raise RuntimeError("XML isn't well-formed!") except LookupError as e: raise RuntimeError(e) # Return a document wrapper return tree def __exec_among(self, module, functions, min_times, max_times): """ Randomly execute $functions between $min and $max times """ for i in xrange(random.randint(min_times, max_times)): # Function names are mangled because they are "private" getattr(module, "_XmlMutatorMin__" + random.choice(functions))() def __serialize_xml(self, tree): """ Serialize a XML document. Basic wrapper around lxml.tostring() """ return ET.tostring( tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding ) def __ver(self, version): """ Helper for displaying lxml version numbers """ return ".".join(map(str, version)) def reset(self): """ Reset the mutator """ self.tree = deepcopy(self.input_tree) def init_from_string(self, input_string): """ Initialize the mutator from a XML string """ # Get a pointer to the top-element self.input_tree = self.__parse_xml(input_string) # Get a working copy self.tree = deepcopy(self.input_tree) def save_to_string(self): """ Return the current XML document as UTF-8 string """ # Return a text version of the tree return self.__serialize_xml(self.tree) def __pick_element(self, exclude_root_node=False): """ Pick a random element from the current document """ # Get a list of all elements, but nodes like PI and comments elems = list(self.tree.getroot().iter(tag=ET.Element)) # Is the root node excluded? if exclude_root_node: start = 1 else: start = 0 # Pick a random element try: elem_id = random.randint(start, len(elems) - 1) elem = elems[elem_id] except ValueError: # Should only occurs if "exclude_root_node = True" return (None, None) return (elem_id, elem) def __fuzz_attribute(self): """ Fuzz (part of) an attribute value """ # Select a node to modify (rand_elem_id, rand_elem) = self.__pick_element() # Get all the attributes attribs = rand_elem.keys() # Is there attributes? if len(attribs) < 1: if self.verbose: print("No attribute: can't replace!") return # Pick a random attribute rand_attrib_id = random.randint(0, len(attribs) - 1) rand_attrib = attribs[rand_attrib_id] # We have the attribute to modify # Get its value attrib_value = rand_elem.get(rand_attrib) # print("- Value: " + attrib_value) # Should we work on the whole value? func_call = "(?P[a-zA-Z:\-]+)\((?P.*?)\)" p = re.compile(func_call) l = p.findall(attrib_value) if random.choice((True, False)) and l: # Randomly pick one the function calls (func, args) = random.choice(l) # Split by "," and randomly pick one of the arguments value = random.choice(args.split(",")) # Remove superfluous characters unclean_value = value value = value.strip(" ").strip("'") # print("Selected argument: [%s]" % value) else: value = attrib_value # For each type, define some possible replacement values choices_number = ( "0", "11111", "-128", "2", "-1", "1/3", "42/0", "1094861636 idiv 1.0", "-1123329771506872 idiv 3.8", "17=$numericRTF", str(3 + random.randrange(0, 100)), ) choices_letter = ( "P" * (25 * random.randrange(1, 100)), "%s%s%s%s%s%s", "foobar", ) choices_alnum = ( "Abc123", "020F0302020204030204", "020F0302020204030204" * (random.randrange(5, 20)), ) # Fuzz the value if random.choice((True, False)) and value == "": # Empty new_value = value elif random.choice((True, False)) and value.isdigit(): # Numbers new_value = random.choice(choices_number) elif random.choice((True, False)) and value.isalpha(): # Letters new_value = random.choice(choices_letter) elif random.choice((True, False)) and value.isalnum(): # Alphanumeric new_value = random.choice(choices_alnum) else: # Default type new_value = random.choice(choices_alnum + choices_letter + choices_number) # If we worked on a substring, apply changes to the whole string if value != attrib_value: # No ' around empty values if new_value != "" and value != "": new_value = "'" + new_value + "'" # Apply changes new_value = attrib_value.replace(unclean_value, new_value) # Log something if self.verbose: print( "Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) ) # Modify the attribute rand_elem.set(rand_attrib, new_value.decode("utf-8")) def __del_node_and_children(self): """High-level minimizing mutator Delete a random node and its children (i.e. delete a random tree)""" self.__del_node(True) def __del_node_but_children(self): """High-level minimizing mutator Delete a random node but its children (i.e. link them to the parent of the deleted node)""" self.__del_node(False) def __del_node(self, delete_children): """ Called by the __del_node_* mutators """ # Select a node to modify (but the root one) (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True) # If the document includes only a top-level element # Then we can't pick a element (given that "exclude_root_node = True") # Is the document deep enough? if rand_elem is None: if self.verbose: print("Can't delete a node: document not deep enough!") return # Log something if self.verbose: but_or_and = "and" if delete_children else "but" print( "Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and) ) if delete_children is False: # Link children of the random (soon to be deleted) node to its parent for child in rand_elem: rand_elem.getparent().append(child) # Remove the node rand_elem.getparent().remove(rand_elem) def __del_content(self): """High-level minimizing mutator Delete the attributes and children of a random node""" # Select a node to modify (rand_elem_id, rand_elem) = self.__pick_element() # Log something if self.verbose: print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag)) # Reset the node rand_elem.clear() def __del_attribute(self): """High-level minimizing mutator Delete a random attribute from a random node""" # Select a node to modify (rand_elem_id, rand_elem) = self.__pick_element() # Get all the attributes attribs = rand_elem.keys() # Is there attributes? if len(attribs) < 1: if self.verbose: print("No attribute: can't delete!") return # Pick a random attribute rand_attrib_id = random.randint(0, len(attribs) - 1) rand_attrib = attribs[rand_attrib_id] # Log something if self.verbose: print( "Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag) ) # Delete the attribute rand_elem.attrib.pop(rand_attrib) def mutate(self, min=1, max=5): """ Execute some high-level mutators between $min and $max times, then some medium-level ones """ # High-level mutation self.__exec_among(self, self.hl_mutators_all, min, max)