1#!/usr/bin/env python
2
3# Copyright (C) 2009-2011, Parrot Foundation.
4
5from fusil.application    import Application
6from fusil.process.watch  import WatchProcess
7from fusil.process.create import CreateProcess
8from fusil.process.stdout import WatchStdout
9from fusil.project_agent  import ProjectAgent
10from fusil.process.tools  import locateProgram
11from fusil.write_code     import WriteCode
12from optparse             import OptionGroup
13import re
14import string
15import random
16
17'''
18
19=head1 NAME
20
21parrot_fuzzer.py - opcode fuzzer
22
23=head1 DESCRIPTION
24
25This is a fuzzer for Parrot, written in Python using the Fusil library. It
26attempts to break Parrot by generating calls to random PIR opcodes.
27
28=head1 DEPENDENCIES
29
30This script requires Python 2.5+ to run. The Fusil
31L<http://fusil.hachoir.org/trac> and python-ptrace
32L<http://python-ptrace.hachoir.org/trac> libraries are also required.
33
34=head1 USAGE
35
36Short version: C<sudo python tools/dev/parrot_fuzzer.py>
37
38C<parrot_fuzzer.py> is run like any other Fusil-based fuzzer. Fusil likes to be
39run as the root user so that the child process in which Parrot runs can be put
40in a more restricted environment, limiting potential damage.
41
42Fusil assumes the existence of a C<fusil> user and group. Parrot runs as this
43user/group as part of its restricted environment. Passing C<--unsafe> allows
44it to run as the current user. Although it is not likely that this will cause
45any damage to your system, it is possible.
46
47C<parrot_fuzzer.py> needs access to Parrot's source code in order to figure out
48which PMCs and ops are available. It assumes that it's running in the root
49directory of Parrot's source code. You can specify a different directory using
50the C<--parrot-root> switch.
51
52=head1 OPTIONS
53
54=over 4
55
56=item C<--parrot-root=/path/to/parrot>
57
58Represents the path to the Parrot root directory. By default, this is the
59current directory.
60
61=item C<--runcore=--some-runcore>
62
63Specifies which runcore to use when running Parrot. The default is the I<slow>
64core. This option corresponds directly to Parrot's C<--runcore> option. Other
65runcores include I<fast>.
66
67Run C<parrot --help> for more details.
68
69=item C<--ignore-blacklist>
70
71Some PMC's and opcodes are known to cause false positives or results of limited
72value. These are blacklisted by default. Using C<--ignore-blacklist> causes
73the fuzzer to use all available PMC's and opcodes, even those known to behave
74badly during testing.
75
76=item C<--instructions=10>
77
78Represents the number of instructions during the test run. Note that a larger
79number such as 20 does not necessarily result in more failures. Defaults to 3.
80
81=back
82
83=head1 LICENSE
84
85This program is distributed under the same license as Parrot itself.
86
87=cut
88
89'''
90
91class ParrotFuzzer(Application):
92
93    # Base name of the dir where temp files and successful results will be stored
94    NAME="parrot_fuzz"
95
96    def createFuzzerOptions(self, parser):
97        options = OptionGroup(parser, "Parrot fuzzer")
98        options.add_option("--parrot-root",
99                help="Parrot program path (default: .)",
100                type="str",
101                default=".")
102        options.add_option("--runcore",
103                help="Run Parrot with the specified runcore (default: --slow-core)",
104                type="str",
105                default="--slow-core")
106        options.add_option("--instructions",
107                help="Generate this many instructions per test run (default: 3)",
108                type="int",
109                default="3")
110        options.add_option("--ignore-blacklist",
111                help="Use opcodes and PMCs known to cause bad or questionable results (default: use blacklists)",
112                action="store_true",
113                default=False)
114        return options
115
116
117    def setupProject(self):
118        parrot_root  = self.options.parrot_root
119        runcore      = self.options.runcore
120        parrot       = locateProgram(parrot_root + "/parrot")
121        process      = ParrotProcess(self.project, [parrot, runcore, "<fuzzy.pir>"])
122        pirgen       = PirGenerator(self.project, self.options)
123        WatchProcess(process)
124        WatchStdout(process)
125
126class PirGenerator(ProjectAgent, WriteCode):
127
128    def __init__(self, project, options):
129        self.parrot_root       = options.parrot_root
130        self.instruction_count = options.instructions
131        self.ignore_blacklist  = options.ignore_blacklist
132        self.opfunc_gen        = OpfuncGenerator()
133        self.arg_gen           = ArgGenerator(self.parrot_root, self.ignore_blacklist)
134
135        self.opfunc_gen.populateOpfuncList(self.parrot_root, self.ignore_blacklist)
136
137        ProjectAgent.__init__(self, project, "pir_source")
138        WriteCode.__init__(self)
139
140    def generatePir(self, filename):
141
142        self.pir_body     = ''
143        self.pir_preamble = """
144.sub main
145    $P0 = new ['ExceptionHandler']
146    set_addr $P0, catchall
147    push_eh $P0   #pokemon: gotta catch 'em all
148"""
149        self.pir_postamble = """
150catchall:
151    # Don't do anything with exceptions: we're hoping for a segfault or similar.
152.end
153"""
154        # How many instructions to generate
155        # Strangely, a low number like 3 seems to generate slightly more faults
156        # than a high number like 20
157        opfunc_count = self.instruction_count
158        self.pir_body += "    # generating "+str(opfunc_count)+" instructions\n"
159
160        arg_types = ['s', 'p', 'i', 'n', 'sc', 'ic', 'nc']
161        opfuncs      = []
162        arg_counts   = dict()
163        self.createFile(filename)
164        arg_gen = self.arg_gen
165
166        # Pick some opfuncs
167        for i in range(opfunc_count):
168            opfuncs.append(OpfuncCall(*self.opfunc_gen.getOpfunc()))
169
170        # Calculate how many of each type of arg will be needed
171        for arg_type in arg_types:
172            arg_counts[arg_type] = 0
173            for opfunc in opfuncs:
174                arg_counts[arg_type] += opfunc.getArgCount(arg_type)
175
176        for arg_type in arg_types:
177            arg_gen.setArgCount(arg_type, arg_counts[arg_type])
178
179        # Generate the args, adding any supporting code to the preamble
180        self.pir_preamble += arg_gen.generateStringArgs()
181        self.pir_preamble += arg_gen.generatePMCArgs()
182        self.pir_preamble += arg_gen.generateIntArgs()
183        self.pir_preamble += arg_gen.generateNumArgs()
184        self.pir_preamble += arg_gen.generateStringConstArgs()
185        self.pir_preamble += arg_gen.generateIntConstArgs()
186        self.pir_preamble += arg_gen.generateNumConstArgs()
187
188        # Put the args into the opfunc calls
189        for opfunc in opfuncs:
190            for arg_num in range(opfunc.getTotalArgCount()):
191                arg_type = opfunc.getArgType(arg_num)
192                opfunc.setArgVal(arg_num, arg_gen.getArgVal(arg_type))
193            self.pir_body += opfunc.getOpfuncCall()
194
195        # Write the code
196        self.write(0, self.pir_preamble)
197        self.write(0, self.pir_body)
198        self.write(0, self.pir_postamble)
199        self.close()
200
201    def on_session_start(self):
202        filename = self.session().createFilename('fuzzy.pir')
203        self.generatePir(filename)
204        self.send('pir_source', filename)
205
206# Representation of a call to an opfunc, including values of arguments
207# Note that argumens are literal, e.g. '$P0', '"foo"', etc
208class OpfuncCall:
209    def __init__(self, name, sig):
210        self.arg_types = []
211        self.arg_vals = []
212        self.name = name
213        if sig == '':
214            self.long_name = name
215        else:
216            self.long_name = name + '_' + sig
217        self.total_arg_count = 0
218        if sig != '':
219            for arg in string.split(sig, "_"):
220                self.arg_types.append(arg)
221                self.arg_vals.append('')
222                self.total_arg_count += 1
223
224    def getLongName(self):
225        return self.long_name
226
227    def getArgCount(self, arg):
228        return self.arg_types.count(arg)
229
230    def getTotalArgCount(self):
231        return self.total_arg_count
232
233    def getArgType(self, n):
234        return self.arg_types[n]
235
236    def getArgType(self, n):
237        return self.arg_types[n]
238
239    def setArgVal(self, n, arg_val):
240        self.arg_vals[n] = arg_val
241
242    def getOpfuncCall(self):
243        opfunc_call = '\n    # '+self.long_name+'\n    ' + self.name
244        for arg_val in self.arg_vals:
245            opfunc_call += ' ' + arg_val + ','
246        opfunc_call = string.rstrip(opfunc_call, ",")
247        opfunc_call += "\n"
248        return opfunc_call
249
250class ArgGenerator:
251    arg_counts = {}
252    args       = {}
253
254    def __init__(self, parrot_root, ignore_blacklist):
255        self.pmc_gen = PMCTypeGenerator()
256        self.pmc_gen.populatePMCList(parrot_root, ignore_blacklist)
257
258    def setArgCount(self, arg_type, count):
259        self.arg_counts[arg_type] = count
260
261    def getArgVal(self, arg_type):
262        return random.choice(self.args[arg_type])
263
264    def generateStringArgs(self):
265        pir_preamble = ""
266        self.args['s'] = []
267        for n in range(self.arg_counts['s']):
268            str_val = self.getString()
269            pir_preamble += "    $S" + str(n) + " = \"" + str_val + "\"\n"
270            self.args['s'].append('$S' + str(n))
271        return pir_preamble
272
273    def generatePMCArgs(self):
274        pir_preamble = ""
275        self.args['p'] = []
276        for n in range(self.arg_counts['p']):
277            pir_preamble += "    $P" + str(n) + " = new ['" + self.pmc_gen.getPMCType() + "']\n"
278            self.args['p'].append('$P' + str(n))
279        return pir_preamble
280
281    def generateIntArgs(self):
282        pir_preamble = ""
283        self.args['i'] = []
284        for n in range(self.arg_counts['i']):
285            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
286
287            if num == 'neg_many':
288                num_val = random.randint(-999999,-2)
289            if num == 'neg_one':
290                num_val = -1
291            if num == 'zero':
292                num_val = 0
293            if num == 'pos_one':
294                num_val = 1
295            if num == 'pos_many':
296                num_val = random.randint(2, 999999)
297
298            pir_preamble += "    $I" + str(n) + " = "+str(num_val)+"\n"
299            self.args['i'].append('$I' + str(n))
300        return pir_preamble
301
302    def generateNumArgs(self):
303        pir_preamble = ""
304        self.args['n'] = []
305        for n in range(self.arg_counts['n']):
306            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
307
308            if num == 'neg_many':
309                num_val = (random.random() * -999999) - 1
310            if num == 'neg_one':
311                num_val = -1.0
312            if num == 'zero':
313                num_val = 0.0
314            if num == 'pos_one':
315                num_val = 1.0
316            if num == 'pos_many':
317                num_val = (random.random() * 999999) + 1
318            pir_preamble += "    $N" + str(n) + " = "+str(num_val)+"\n"
319            self.args['n'].append('$N' + str(n))
320        return pir_preamble
321
322    def generateStringConstArgs(self):
323        pir_preamble = ""
324        self.args['sc'] = []
325        for n in range(self.arg_counts['sc']):
326            self.args['sc'].append('"'+self.getString()+'"')
327        return pir_preamble
328
329    def generateIntConstArgs(self):
330        pir_preamble = ""
331        self.args['ic'] = []
332        for n in range(self.arg_counts['ic']):
333            # Negative numbers and zero mess up control flow-related ops
334            #num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
335            num = random.choice(['pos_one','pos_many'])
336
337            if num == 'neg_many':
338                num_val = random.randint(-999999,-2)
339            if num == 'neg_one':
340                num_val = -1
341            if num == 'zero':
342                num_val = 0
343            if num == 'pos_one':
344                num_val = 1
345            if num == 'pos_many':
346                num_val = random.randint(2, 999999)
347
348            self.args['ic'].append(str(num_val))
349        return pir_preamble
350
351    def generateNumConstArgs(self):
352        pir_preamble = ""
353        self.args['nc'] = []
354        for n in range(self.arg_counts['nc']):
355            num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
356
357            if num == 'neg_many':
358                num_val = (random.random() * -999999) - 1
359            if num == 'neg_one':
360                num_val = -1.0
361            if num == 'zero':
362                num_val = 0.0
363            if num == 'pos_one':
364                num_val = 1.0
365            if num == 'pos_many':
366                num_val = (random.random() * 999999) + 1
367            self.args['nc'].append(str(num_val))
368        return pir_preamble
369
370    def getString(self):
371        str_val = ''
372        chars = string.printable + string.punctuation + string.whitespace
373        str_len = random.randint(0,10)
374        for m in range(str_len):
375            char = chars[random.randint(0, len(chars)-1)]
376            if char == '"':
377                char = '\\"'
378            if char == '\\':
379                char = '\\\\'
380            if char == '\n' or char == '\r':
381                char = ''
382            str_val += char
383        return str_val
384
385class PMCTypeGenerator:
386    pmc_list = []
387    pmc_blacklist = [
388            'Packfile',
389            'PackfileAnnotation',
390            'PackfileAnnotationKeys',
391            'PackfileAnnotations',
392            'PackfileConstantTable',
393            'PackfileDirectory',
394            'PackfileFixupEntry',
395            'PackfileFixupTable',
396            'PackfileRawSegment',
397            'PackfileSegment',
398            ]
399
400    def populatePMCList(self, parrot_root, ignore_blacklist):
401        pmc_pm = parrot_root + "/lib/Parrot/PMC.pm"
402        pmc_f  = open(pmc_pm, 'r')
403        for line in pmc_f:
404            if re.search('\t[a-zA-Z]+ => [0-9]+,', line):
405                line = re.sub('\t',      '', line)
406                line = re.sub(' =>.*\n', '', line)
407                if ignore_blacklist or line not in self.pmc_blacklist:
408                    self.pmc_list.append(line)
409
410    def getPMCType(self):
411        return random.choice(self.pmc_list)
412
413
414class OpfuncGenerator:
415    opfunc_list = []
416    opfunc_blacklist = [
417            'check_events', # Only for testing
418            'check_events__', # Not for direct use
419            'clears', # Clearing all [SPIN] registers isn't useful
420            'clearp',
421            'cleari',
422            'clearn',
423            'cpu_ret',
424            'debug',
425            'debug_break',
426            'debug_init',
427            'debug_load',
428            'debug_print',
429            'die',
430            'exit',
431            'gc_debug',
432            'if',
433            'pic_callr__',
434            'pic_get_params__',
435            'pic_infix__',
436            'pic_inline_sub__',
437            'pic_set_returns__',
438            'pin',
439            'pop_eh',
440            'prederef__',
441            'profile',
442            'push_eh',
443            'returncc',
444            'rethrow',
445            'runinterp',
446            'setn_ind',
447            'sets_ind',
448            'seti_ind',
449            'setp_ind',
450            'sleep',
451            'tailcall',
452            'trace',
453            'trap',
454            'unless',
455            'unpin',
456            'yield',
457            ]
458
459    def populateOpfuncList(self, parrot_root, ignore_blacklist):
460        ops_h = parrot_root + "/src/ops/core_ops.c"
461        ops_f = open(ops_h, 'r')
462        # This is a moderately fragile hack that relies on the specific
463        # format of some generated code, expect breakage
464        for line in ops_f:
465            if line.find('PARROT_INLINE_OP') > -1 or line.find('PARROT_FUNCTION_OP') > -1:
466                line = ops_f.next()
467                short_name = line
468                line = ops_f.next()
469                long_name = line
470                # Strip leading space and opening double-quote
471                short_name = re.sub('[ ]+"', '', short_name)
472                long_name  = re.sub('[ ]+"', '', long_name)
473                # Strip everything after closing double-quote
474                short_name = re.sub('".*\n', '', short_name)
475                long_name  = re.sub('".*\n', '', long_name)
476
477                if long_name == short_name:
478                    sig = ''
479                else:
480                    sig = string.replace(long_name, short_name + '_', '')
481
482                #XXX: Don't know how to handle these args
483                if (not re.search('(pc|k|ki|kc|kic)', sig)):
484                    if ignore_blacklist or short_name not in self.opfunc_blacklist:
485                        self.opfunc_list.append([short_name, sig])
486                #        print "accepted "+long_name+"("+sig+")"
487                #else:
488                #    print "REJECTED "+long_name+"("+sig+")"
489
490    def getOpfunc(self):
491        return random.choice(self.opfunc_list)
492
493class ParrotProcess(CreateProcess):
494    def on_pir_source(self, filename):
495        self.cmdline.arguments[1] = filename
496        self.createProcess()
497
498if __name__ == "__main__":
499    ParrotFuzzer().main()
500