1#!/usr/bin/python
2
3""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """
4
5from __future__ import print_function
6from copy import deepcopy
7from lxml import etree as ET
8import random, re, io
9
10
11###########################
12# The XmlMutatorMin class #
13###########################
14
15
16class XmlMutatorMin:
17
18    """
19    Optionals parameters:
20        seed        Seed used by the PRNG (default: "RANDOM")
21        verbose     Verbosity (default: False)
22    """
23
24    def __init__(self, seed="RANDOM", verbose=False):
25
26        """ Initialize seed, database and mutators """
27
28        # Verbosity
29        self.verbose = verbose
30
31        # Initialize PRNG
32        self.seed = str(seed)
33        if self.seed == "RANDOM":
34            random.seed()
35        else:
36            if self.verbose:
37                print("Static seed '%s'" % self.seed)
38            random.seed(self.seed)
39
40        # Initialize input and output documents
41        self.input_tree = None
42        self.tree = None
43
44        # High-level mutators (no database needed)
45        hl_mutators_delete = [
46            "del_node_and_children",
47            "del_node_but_children",
48            "del_attribute",
49            "del_content",
50        ]  # Delete items
51        hl_mutators_fuzz = ["fuzz_attribute"]  # Randomly change attribute values
52
53        # Exposed mutators
54        self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete
55
56    def __parse_xml(self, xml):
57
58        """ Parse an XML string. Basic wrapper around lxml.parse() """
59
60        try:
61            # Function parse() takes care of comments / DTD / processing instructions / ...
62            tree = ET.parse(io.BytesIO(xml))
63        except ET.ParseError:
64            raise RuntimeError("XML isn't well-formed!")
65        except LookupError as e:
66            raise RuntimeError(e)
67
68        # Return a document wrapper
69        return tree
70
71    def __exec_among(self, module, functions, min_times, max_times):
72
73        """ Randomly execute $functions between $min and $max times """
74
75        for i in xrange(random.randint(min_times, max_times)):
76            # Function names are mangled because they are "private"
77            getattr(module, "_XmlMutatorMin__" + random.choice(functions))()
78
79    def __serialize_xml(self, tree):
80
81        """ Serialize a XML document. Basic wrapper around lxml.tostring() """
82
83        return ET.tostring(
84            tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding
85        )
86
87    def __ver(self, version):
88
89        """ Helper for displaying lxml version numbers """
90
91        return ".".join(map(str, version))
92
93    def reset(self):
94
95        """ Reset the mutator """
96
97        self.tree = deepcopy(self.input_tree)
98
99    def init_from_string(self, input_string):
100
101        """ Initialize the mutator from a XML string """
102
103        # Get a pointer to the top-element
104        self.input_tree = self.__parse_xml(input_string)
105
106        # Get a working copy
107        self.tree = deepcopy(self.input_tree)
108
109    def save_to_string(self):
110
111        """ Return the current XML document as UTF-8 string """
112
113        # Return a text version of the tree
114        return self.__serialize_xml(self.tree)
115
116    def __pick_element(self, exclude_root_node=False):
117
118        """ Pick a random element from the current document """
119
120        # Get a list of all elements, but nodes like PI and comments
121        elems = list(self.tree.getroot().iter(tag=ET.Element))
122
123        # Is the root node excluded?
124        if exclude_root_node:
125            start = 1
126        else:
127            start = 0
128
129        # Pick a random element
130        try:
131            elem_id = random.randint(start, len(elems) - 1)
132            elem = elems[elem_id]
133        except ValueError:
134            # Should only occurs if "exclude_root_node = True"
135            return (None, None)
136
137        return (elem_id, elem)
138
139    def __fuzz_attribute(self):
140
141        """ Fuzz (part of) an attribute value """
142
143        # Select a node to modify
144        (rand_elem_id, rand_elem) = self.__pick_element()
145
146        # Get all the attributes
147        attribs = rand_elem.keys()
148
149        # Is there attributes?
150        if len(attribs) < 1:
151            if self.verbose:
152                print("No attribute: can't replace!")
153            return
154
155        # Pick a random attribute
156        rand_attrib_id = random.randint(0, len(attribs) - 1)
157        rand_attrib = attribs[rand_attrib_id]
158
159        # We have the attribute to modify
160        # Get its value
161        attrib_value = rand_elem.get(rand_attrib)
162        # print("- Value: " + attrib_value)
163
164        # Should we work on the whole value?
165        func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
166        p = re.compile(func_call)
167        l = p.findall(attrib_value)
168        if random.choice((True, False)) and l:
169            # Randomly pick one the function calls
170            (func, args) = random.choice(l)
171            # Split by "," and randomly pick one of the arguments
172            value = random.choice(args.split(","))
173            # Remove superfluous characters
174            unclean_value = value
175            value = value.strip(" ").strip("'")
176            # print("Selected argument: [%s]" % value)
177        else:
178            value = attrib_value
179
180        # For each type, define some possible replacement values
181        choices_number = (
182            "0",
183            "11111",
184            "-128",
185            "2",
186            "-1",
187            "1/3",
188            "42/0",
189            "1094861636 idiv 1.0",
190            "-1123329771506872 idiv 3.8",
191            "17=$numericRTF",
192            str(3 + random.randrange(0, 100)),
193        )
194
195        choices_letter = (
196            "P" * (25 * random.randrange(1, 100)),
197            "%s%s%s%s%s%s",
198            "foobar",
199        )
200
201        choices_alnum = (
202            "Abc123",
203            "020F0302020204030204",
204            "020F0302020204030204" * (random.randrange(5, 20)),
205        )
206
207        # Fuzz the value
208        if random.choice((True, False)) and value == "":
209
210            # Empty
211            new_value = value
212
213        elif random.choice((True, False)) and value.isdigit():
214
215            # Numbers
216            new_value = random.choice(choices_number)
217
218        elif random.choice((True, False)) and value.isalpha():
219
220            # Letters
221            new_value = random.choice(choices_letter)
222
223        elif random.choice((True, False)) and value.isalnum():
224
225            # Alphanumeric
226            new_value = random.choice(choices_alnum)
227
228        else:
229
230            # Default type
231            new_value = random.choice(choices_alnum + choices_letter + choices_number)
232
233        # If we worked on a substring, apply changes to the whole string
234        if value != attrib_value:
235            # No ' around empty values
236            if new_value != "" and value != "":
237                new_value = "'" + new_value + "'"
238            # Apply changes
239            new_value = attrib_value.replace(unclean_value, new_value)
240
241        # Log something
242        if self.verbose:
243            print(
244                "Fuzzing attribute #%i '%s' of tag #%i '%s'"
245                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
246            )
247
248        # Modify the attribute
249        rand_elem.set(rand_attrib, new_value.decode("utf-8"))
250
251    def __del_node_and_children(self):
252
253        """High-level minimizing mutator
254        Delete a random node and its children (i.e. delete a random tree)"""
255
256        self.__del_node(True)
257
258    def __del_node_but_children(self):
259
260        """High-level minimizing mutator
261        Delete a random node but its children (i.e. link them to the parent of the deleted node)"""
262
263        self.__del_node(False)
264
265    def __del_node(self, delete_children):
266
267        """ Called by the __del_node_* mutators """
268
269        # Select a node to modify (but the root one)
270        (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True)
271
272        # If the document includes only a top-level element
273        # Then we can't pick a element (given that "exclude_root_node = True")
274
275        # Is the document deep enough?
276        if rand_elem is None:
277            if self.verbose:
278                print("Can't delete a node: document not deep enough!")
279            return
280
281        # Log something
282        if self.verbose:
283            but_or_and = "and" if delete_children else "but"
284            print(
285                "Deleting tag #%i '%s' %s its children"
286                % (rand_elem_id, rand_elem.tag, but_or_and)
287            )
288
289        if delete_children is False:
290            # Link children of the random (soon to be deleted) node to its parent
291            for child in rand_elem:
292                rand_elem.getparent().append(child)
293
294        # Remove the node
295        rand_elem.getparent().remove(rand_elem)
296
297    def __del_content(self):
298
299        """High-level minimizing mutator
300        Delete the attributes and children of a random node"""
301
302        # Select a node to modify
303        (rand_elem_id, rand_elem) = self.__pick_element()
304
305        # Log something
306        if self.verbose:
307            print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))
308
309        # Reset the node
310        rand_elem.clear()
311
312    def __del_attribute(self):
313
314        """High-level minimizing mutator
315        Delete a random attribute from a random node"""
316
317        # Select a node to modify
318        (rand_elem_id, rand_elem) = self.__pick_element()
319
320        # Get all the attributes
321        attribs = rand_elem.keys()
322
323        # Is there attributes?
324        if len(attribs) < 1:
325            if self.verbose:
326                print("No attribute: can't delete!")
327            return
328
329        # Pick a random attribute
330        rand_attrib_id = random.randint(0, len(attribs) - 1)
331        rand_attrib = attribs[rand_attrib_id]
332
333        # Log something
334        if self.verbose:
335            print(
336                "Deleting attribute #%i '%s' of tag #%i '%s'"
337                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
338            )
339
340        # Delete the attribute
341        rand_elem.attrib.pop(rand_attrib)
342
343    def mutate(self, min=1, max=5):
344
345        """ Execute some high-level mutators between $min and $max times, then some medium-level ones """
346
347        # High-level mutation
348        self.__exec_among(self, self.hl_mutators_all, min, max)
349