1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) Facebook, Inc.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# You may select, at your option, one of the above-listed licenses.
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
17import shlex
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25    return os.path.abspath(os.path.join(a, *p))
26
27
28class InputType(object):
29    RAW_DATA = 1
30    COMPRESSED_DATA = 2
31    DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35    ZSTD = 1
36    BLOCK = 2
37
38
39class TargetInfo(object):
40    def __init__(self, input_type, frame_type=FrameType.ZSTD):
41        self.input_type = input_type
42        self.frame_type = frame_type
43
44
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48TARGET_INFO = {
49    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58    'simple_compress': TargetInfo(InputType.RAW_DATA),
59    'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60    'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61    'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62    'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
63    'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
64    'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
65    'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
66}
67TARGETS = list(TARGET_INFO.keys())
68ALL_TARGETS = TARGETS + ['all']
69FUZZ_RNG_SEED_SIZE = 4
70
71# Standard environment variables
72CC = os.environ.get('CC', 'cc')
73CXX = os.environ.get('CXX', 'c++')
74CPPFLAGS = os.environ.get('CPPFLAGS', '')
75CFLAGS = os.environ.get('CFLAGS', '-O3')
76CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
77LDFLAGS = os.environ.get('LDFLAGS', '')
78MFLAGS = os.environ.get('MFLAGS', '-j')
79
80# Fuzzing environment variables
81LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
82AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
83DECODECORPUS = os.environ.get('DECODECORPUS',
84                              abs_join(FUZZ_DIR, '..', 'decodecorpus'))
85ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
86
87# Sanitizer environment variables
88MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
89MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
90MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
91MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
92
93
94def create(r):
95    d = os.path.abspath(r)
96    if not os.path.isdir(d):
97        os.makedirs(d)
98    return d
99
100
101def check(r):
102    d = os.path.abspath(r)
103    if not os.path.isdir(d):
104        return None
105    return d
106
107
108@contextlib.contextmanager
109def tmpdir():
110    dirpath = tempfile.mkdtemp()
111    try:
112        yield dirpath
113    finally:
114        shutil.rmtree(dirpath, ignore_errors=True)
115
116
117def parse_targets(in_targets):
118    targets = set()
119    for target in in_targets:
120        if not target:
121            continue
122        if target == 'all':
123            targets = targets.union(TARGETS)
124        elif target in TARGETS:
125            targets.add(target)
126        else:
127            raise RuntimeError('{} is not a valid target'.format(target))
128    return list(targets)
129
130
131def targets_parser(args, description):
132    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
133    parser.add_argument(
134        'TARGET',
135        nargs='*',
136        type=str,
137        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
138    args, extra = parser.parse_known_args(args)
139    args.extra = extra
140
141    args.TARGET = parse_targets(args.TARGET)
142
143    return args
144
145
146def parse_env_flags(args, flags):
147    """
148    Look for flags set by environment variables.
149    """
150    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
151    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
152
153    def set_sanitizer(sanitizer, default, san, nosan):
154        if sanitizer in san and sanitizer in nosan:
155            raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
156                               format(s=sanitizer))
157        if sanitizer in san:
158            return True
159        if sanitizer in nosan:
160            return False
161        return default
162
163    san = set(san_flags.split(','))
164    nosan = set(nosan_flags.split(','))
165
166    args.asan = set_sanitizer('address', args.asan, san, nosan)
167    args.msan = set_sanitizer('memory', args.msan, san, nosan)
168    args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
169
170    args.sanitize = args.asan or args.msan or args.ubsan
171
172    return args
173
174
175def compiler_version(cc, cxx):
176    """
177    Determines the compiler and version.
178    Only works for clang and gcc.
179    """
180    cc_version_bytes = subprocess.check_output([cc, "--version"])
181    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
182    compiler = None
183    version = None
184    print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii')))
185    if b'clang' in cc_version_bytes:
186        assert(b'clang' in cxx_version_bytes)
187        compiler = 'clang'
188    elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes:
189        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
190        compiler = 'gcc'
191    if compiler is not None:
192        version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)'
193        version_match = re.search(version_regex, cc_version_bytes)
194        version = tuple(int(version_match.group(i)) for i in range(1, 4))
195    return compiler, version
196
197
198def overflow_ubsan_flags(cc, cxx):
199    compiler, version = compiler_version(cc, cxx)
200    if compiler == 'gcc' and version < (8, 0, 0):
201        return ['-fno-sanitize=signed-integer-overflow']
202    if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)):
203        return ['-fno-sanitize=pointer-overflow']
204    return []
205
206
207def build_parser(args):
208    description = """
209    Cleans the repository and builds a fuzz target (or all).
210    Many flags default to environment variables (default says $X='y').
211    Options that aren't enabling features default to the correct values for
212    zstd.
213    Enable sanitizers with --enable-*san.
214    For regression testing just build.
215    For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
216    For AFL set CC and CXX to AFL's compilers and set
217    LIB_FUZZING_ENGINE='libregression.a'.
218    """
219    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
220    parser.add_argument(
221        '--lib-fuzzing-engine',
222        dest='lib_fuzzing_engine',
223        type=str,
224        default=LIB_FUZZING_ENGINE,
225        help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
226              "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
227
228    fuzz_group = parser.add_mutually_exclusive_group()
229    fuzz_group.add_argument(
230        '--enable-coverage',
231        dest='coverage',
232        action='store_true',
233        help='Enable coverage instrumentation (-fsanitize-coverage)')
234    fuzz_group.add_argument(
235        '--enable-fuzzer',
236        dest='fuzzer',
237        action='store_true',
238        help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
239              'LIB_FUZZING_ENGINE is ignored')
240    )
241
242    parser.add_argument(
243        '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
244    parser.add_argument(
245        '--enable-ubsan',
246        dest='ubsan',
247        action='store_true',
248        help='Enable UBSAN')
249    parser.add_argument(
250        '--enable-ubsan-pointer-overflow',
251        dest='ubsan_pointer_overflow',
252        action='store_true',
253        help='Enable UBSAN pointer overflow check (known failure)')
254    parser.add_argument(
255        '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
256    parser.add_argument(
257        '--enable-msan-track-origins', dest='msan_track_origins',
258        action='store_true', help='Enable MSAN origin tracking')
259    parser.add_argument(
260        '--msan-extra-cppflags',
261        dest='msan_extra_cppflags',
262        type=str,
263        default=MSAN_EXTRA_CPPFLAGS,
264        help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
265        format(MSAN_EXTRA_CPPFLAGS))
266    parser.add_argument(
267        '--msan-extra-cflags',
268        dest='msan_extra_cflags',
269        type=str,
270        default=MSAN_EXTRA_CFLAGS,
271        help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
272            MSAN_EXTRA_CFLAGS))
273    parser.add_argument(
274        '--msan-extra-cxxflags',
275        dest='msan_extra_cxxflags',
276        type=str,
277        default=MSAN_EXTRA_CXXFLAGS,
278        help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
279        format(MSAN_EXTRA_CXXFLAGS))
280    parser.add_argument(
281        '--msan-extra-ldflags',
282        dest='msan_extra_ldflags',
283        type=str,
284        default=MSAN_EXTRA_LDFLAGS,
285        help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
286        format(MSAN_EXTRA_LDFLAGS))
287    parser.add_argument(
288        '--enable-sanitize-recover',
289        dest='sanitize_recover',
290        action='store_true',
291        help='Non-fatal sanitizer errors where possible')
292    parser.add_argument(
293        '--debug',
294        dest='debug',
295        type=int,
296        default=1,
297        help='Set DEBUGLEVEL (default: 1)')
298    parser.add_argument(
299        '--force-memory-access',
300        dest='memory_access',
301        type=int,
302        default=0,
303        help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
304    parser.add_argument(
305        '--fuzz-rng-seed-size',
306        dest='fuzz_rng_seed_size',
307        type=int,
308        default=4,
309        help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
310    parser.add_argument(
311        '--disable-fuzzing-mode',
312        dest='fuzzing_mode',
313        action='store_false',
314        help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
315    parser.add_argument(
316        '--enable-stateful-fuzzing',
317        dest='stateful_fuzzing',
318        action='store_true',
319        help='Reuse contexts between runs (makes reproduction impossible)')
320    parser.add_argument(
321        '--cc',
322        dest='cc',
323        type=str,
324        default=CC,
325        help="CC (default: $CC='{}')".format(CC))
326    parser.add_argument(
327        '--cxx',
328        dest='cxx',
329        type=str,
330        default=CXX,
331        help="CXX (default: $CXX='{}')".format(CXX))
332    parser.add_argument(
333        '--cppflags',
334        dest='cppflags',
335        type=str,
336        default=CPPFLAGS,
337        help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
338    parser.add_argument(
339        '--cflags',
340        dest='cflags',
341        type=str,
342        default=CFLAGS,
343        help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
344    parser.add_argument(
345        '--cxxflags',
346        dest='cxxflags',
347        type=str,
348        default=CXXFLAGS,
349        help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
350    parser.add_argument(
351        '--ldflags',
352        dest='ldflags',
353        type=str,
354        default=LDFLAGS,
355        help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
356    parser.add_argument(
357        '--mflags',
358        dest='mflags',
359        type=str,
360        default=MFLAGS,
361        help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
362    parser.add_argument(
363        'TARGET',
364        nargs='*',
365        type=str,
366        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
367    )
368    args = parser.parse_args(args)
369    args = parse_env_flags(args, ' '.join(
370        [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
371
372    # Check option sanity
373    if args.msan and (args.asan or args.ubsan):
374        raise RuntimeError('MSAN may not be used with any other sanitizers')
375    if args.msan_track_origins and not args.msan:
376        raise RuntimeError('--enable-msan-track-origins requires MSAN')
377    if args.ubsan_pointer_overflow and not args.ubsan:
378        raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
379    if args.sanitize_recover and not args.sanitize:
380        raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
381
382    return args
383
384
385def build(args):
386    try:
387        args = build_parser(args)
388    except Exception as e:
389        print(e)
390        return 1
391    # The compilation flags we are setting
392    targets = args.TARGET
393    cc = args.cc
394    cxx = args.cxx
395    cppflags = shlex.split(args.cppflags)
396    cflags = shlex.split(args.cflags)
397    ldflags = shlex.split(args.ldflags)
398    cxxflags = shlex.split(args.cxxflags)
399    mflags = shlex.split(args.mflags)
400    # Flags to be added to both cflags and cxxflags
401    common_flags = []
402
403    cppflags += [
404        '-DDEBUGLEVEL={}'.format(args.debug),
405        '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
406        '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
407    ]
408
409    # Set flags for options
410    assert not (args.fuzzer and args.coverage)
411    if args.coverage:
412        common_flags += [
413            '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
414        ]
415    if args.fuzzer:
416        common_flags += ['-fsanitize=fuzzer']
417        args.lib_fuzzing_engine = ''
418
419    mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
420
421    if args.sanitize_recover:
422        recover_flags = ['-fsanitize-recover=all']
423    else:
424        recover_flags = ['-fno-sanitize-recover=all']
425    if args.sanitize:
426        common_flags += recover_flags
427
428    if args.msan:
429        msan_flags = ['-fsanitize=memory']
430        if args.msan_track_origins:
431            msan_flags += ['-fsanitize-memory-track-origins']
432        common_flags += msan_flags
433        # Append extra MSAN flags (it might require special setup)
434        cppflags += [args.msan_extra_cppflags]
435        cflags += [args.msan_extra_cflags]
436        cxxflags += [args.msan_extra_cxxflags]
437        ldflags += [args.msan_extra_ldflags]
438
439    if args.asan:
440        common_flags += ['-fsanitize=address']
441
442    if args.ubsan:
443        ubsan_flags = ['-fsanitize=undefined']
444        if not args.ubsan_pointer_overflow:
445            ubsan_flags += overflow_ubsan_flags(cc, cxx)
446        common_flags += ubsan_flags
447
448    if args.stateful_fuzzing:
449        cppflags += ['-DSTATEFUL_FUZZING']
450
451    if args.fuzzing_mode:
452        cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
453
454    if args.lib_fuzzing_engine == 'libregression.a':
455        targets = ['libregression.a'] + targets
456
457    # Append the common flags
458    cflags += common_flags
459    cxxflags += common_flags
460
461    # Prepare the flags for Make
462    cc_str = "CC={}".format(cc)
463    cxx_str = "CXX={}".format(cxx)
464    cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
465    cflags_str = "CFLAGS={}".format(' '.join(cflags))
466    cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
467    ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
468
469    # Print the flags
470    print('MFLAGS={}'.format(' '.join(mflags)))
471    print(cc_str)
472    print(cxx_str)
473    print(cppflags_str)
474    print(cflags_str)
475    print(cxxflags_str)
476    print(ldflags_str)
477
478    # Clean and build
479    clean_cmd = ['make', 'clean'] + mflags
480    print(' '.join(clean_cmd))
481    subprocess.check_call(clean_cmd)
482    build_cmd = [
483        'make',
484        cc_str,
485        cxx_str,
486        cppflags_str,
487        cflags_str,
488        cxxflags_str,
489        ldflags_str,
490    ] + mflags + targets
491    print(' '.join(build_cmd))
492    subprocess.check_call(build_cmd)
493    return 0
494
495
496def libfuzzer_parser(args):
497    description = """
498    Runs a libfuzzer binary.
499    Passes all extra arguments to libfuzzer.
500    The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
501    libFuzzer.a.
502    Generates output in the CORPORA directory, puts crashes in the ARTIFACT
503    directory, and takes extra input from the SEED directory.
504    To merge AFL's output pass the SEED as AFL's output directory and pass
505    '-merge=1'.
506    """
507    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
508    parser.add_argument(
509        '--corpora',
510        type=str,
511        help='Override the default corpora dir (default: {})'.format(
512            abs_join(CORPORA_DIR, 'TARGET')))
513    parser.add_argument(
514        '--artifact',
515        type=str,
516        help='Override the default artifact dir (default: {})'.format(
517            abs_join(CORPORA_DIR, 'TARGET-crash')))
518    parser.add_argument(
519        '--seed',
520        type=str,
521        help='Override the default seed dir (default: {})'.format(
522            abs_join(CORPORA_DIR, 'TARGET-seed')))
523    parser.add_argument(
524        'TARGET',
525        type=str,
526        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
527    args, extra = parser.parse_known_args(args)
528    args.extra = extra
529
530    if args.TARGET and args.TARGET not in TARGETS:
531        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
532
533    return args
534
535
536def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
537    if corpora is None:
538        corpora = abs_join(CORPORA_DIR, target)
539    if artifact is None:
540        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
541    if seed is None:
542        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
543    if extra_args is None:
544        extra_args = []
545
546    target = abs_join(FUZZ_DIR, target)
547
548    corpora = [create(corpora)]
549    artifact = create(artifact)
550    seed = check(seed)
551
552    corpora += [artifact]
553    if seed is not None:
554        corpora += [seed]
555
556    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
557    cmd += corpora + extra_args
558    print(' '.join(cmd))
559    subprocess.check_call(cmd)
560
561
562def libfuzzer_cmd(args):
563    try:
564        args = libfuzzer_parser(args)
565    except Exception as e:
566        print(e)
567        return 1
568    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
569    return 0
570
571
572def afl_parser(args):
573    description = """
574    Runs an afl-fuzz job.
575    Passes all extra arguments to afl-fuzz.
576    The fuzzer should have been built with CC/CXX set to the AFL compilers,
577    and with LIB_FUZZING_ENGINE='libregression.a'.
578    Takes input from CORPORA and writes output to OUTPUT.
579    Uses AFL_FUZZ as the binary (set from flag or environment variable).
580    """
581    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
582    parser.add_argument(
583        '--corpora',
584        type=str,
585        help='Override the default corpora dir (default: {})'.format(
586            abs_join(CORPORA_DIR, 'TARGET')))
587    parser.add_argument(
588        '--output',
589        type=str,
590        help='Override the default AFL output dir (default: {})'.format(
591            abs_join(CORPORA_DIR, 'TARGET-afl')))
592    parser.add_argument(
593        '--afl-fuzz',
594        type=str,
595        default=AFL_FUZZ,
596        help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
597    parser.add_argument(
598        'TARGET',
599        type=str,
600        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
601    args, extra = parser.parse_known_args(args)
602    args.extra = extra
603
604    if args.TARGET and args.TARGET not in TARGETS:
605        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
606
607    if not args.corpora:
608        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
609    if not args.output:
610        args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
611
612    return args
613
614
615def afl(args):
616    try:
617        args = afl_parser(args)
618    except Exception as e:
619        print(e)
620        return 1
621    target = abs_join(FUZZ_DIR, args.TARGET)
622
623    corpora = create(args.corpora)
624    output = create(args.output)
625
626    cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
627    cmd += [target, '@@']
628    print(' '.join(cmd))
629    subprocess.call(cmd)
630    return 0
631
632
633def regression(args):
634    try:
635        description = """
636        Runs one or more regression tests.
637        The fuzzer should have been built with with
638        LIB_FUZZING_ENGINE='libregression.a'.
639        Takes input from CORPORA.
640        """
641        args = targets_parser(args, description)
642    except Exception as e:
643        print(e)
644        return 1
645    for target in args.TARGET:
646        corpora = create(abs_join(CORPORA_DIR, target))
647        target = abs_join(FUZZ_DIR, target)
648        cmd = [target, corpora]
649        print(' '.join(cmd))
650        subprocess.check_call(cmd)
651    return 0
652
653
654def gen_parser(args):
655    description = """
656    Generate a seed corpus appropriate for TARGET with data generated with
657    decodecorpus.
658    The fuzz inputs are prepended with a seed before the zstd data, so the
659    output of decodecorpus shouldn't be used directly.
660    Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
661    puts the output in SEED.
662    DECODECORPUS is the decodecorpus binary, and must already be built.
663    """
664    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
665    parser.add_argument(
666        '--number',
667        '-n',
668        type=int,
669        default=100,
670        help='Number of samples to generate')
671    parser.add_argument(
672        '--max-size-log',
673        type=int,
674        default=18,
675        help='Maximum sample size to generate')
676    parser.add_argument(
677        '--seed',
678        type=str,
679        help='Override the default seed dir (default: {})'.format(
680            abs_join(CORPORA_DIR, 'TARGET-seed')))
681    parser.add_argument(
682        '--decodecorpus',
683        type=str,
684        default=DECODECORPUS,
685        help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
686            DECODECORPUS))
687    parser.add_argument(
688        '--zstd',
689        type=str,
690        default=ZSTD,
691        help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
692    parser.add_argument(
693        '--fuzz-rng-seed-size',
694        type=int,
695        default=4,
696        help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
697    )
698    parser.add_argument(
699        'TARGET',
700        type=str,
701        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
702    args, extra = parser.parse_known_args(args)
703    args.extra = extra
704
705    if args.TARGET and args.TARGET not in TARGETS:
706        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
707
708    if not args.seed:
709        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
710
711    if not os.path.isfile(args.decodecorpus):
712        raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
713                           format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
714
715    return args
716
717
718def gen(args):
719    try:
720        args = gen_parser(args)
721    except Exception as e:
722        print(e)
723        return 1
724
725    seed = create(args.seed)
726    with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
727        info = TARGET_INFO[args.TARGET]
728
729        if info.input_type == InputType.DICTIONARY_DATA:
730            number = max(args.number, 1000)
731        else:
732            number = args.number
733        cmd = [
734            args.decodecorpus,
735            '-n{}'.format(args.number),
736            '-p{}/'.format(compressed),
737            '-o{}'.format(decompressed),
738        ]
739
740        if info.frame_type == FrameType.BLOCK:
741            cmd += [
742                '--gen-blocks',
743                '--max-block-size-log={}'.format(min(args.max_size_log, 17))
744            ]
745        else:
746            cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
747
748        print(' '.join(cmd))
749        subprocess.check_call(cmd)
750
751        if info.input_type == InputType.RAW_DATA:
752            print('using decompressed data in {}'.format(decompressed))
753            samples = decompressed
754        elif info.input_type == InputType.COMPRESSED_DATA:
755            print('using compressed data in {}'.format(compressed))
756            samples = compressed
757        else:
758            assert info.input_type == InputType.DICTIONARY_DATA
759            print('making dictionary data from {}'.format(decompressed))
760            samples = dict
761            min_dict_size_log = 9
762            max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
763            for dict_size_log in range(min_dict_size_log, max_dict_size_log):
764                dict_size = 1 << dict_size_log
765                cmd = [
766                    args.zstd,
767                    '--train',
768                    '-r', decompressed,
769                    '--maxdict={}'.format(dict_size),
770                    '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
771                ]
772                print(' '.join(cmd))
773                subprocess.check_call(cmd)
774
775        # Copy the samples over and prepend the RNG seeds
776        for name in os.listdir(samples):
777            samplename = abs_join(samples, name)
778            outname = abs_join(seed, name)
779            with open(samplename, 'rb') as sample:
780                with open(outname, 'wb') as out:
781                    CHUNK_SIZE = 131072
782                    chunk = sample.read(CHUNK_SIZE)
783                    while len(chunk) > 0:
784                        out.write(chunk)
785                        chunk = sample.read(CHUNK_SIZE)
786    return 0
787
788
789def minimize(args):
790    try:
791        description = """
792        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
793        TARGET_seed_corpus. All extra args are passed to libfuzzer.
794        """
795        args = targets_parser(args, description)
796    except Exception as e:
797        print(e)
798        return 1
799
800    for target in args.TARGET:
801        # Merge the corpus + anything else into the seed_corpus
802        corpus = abs_join(CORPORA_DIR, target)
803        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
804        extra_args = [corpus, "-merge=1"] + args.extra
805        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
806        seeds = set(os.listdir(seed_corpus))
807        # Copy all crashes directly into the seed_corpus if not already present
808        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
809        for crash in os.listdir(crashes):
810            if crash not in seeds:
811                shutil.copy(abs_join(crashes, crash), seed_corpus)
812                seeds.add(crash)
813
814
815def zip_cmd(args):
816    try:
817        description = """
818        Zips up the seed corpus.
819        """
820        args = targets_parser(args, description)
821    except Exception as e:
822        print(e)
823        return 1
824
825    for target in args.TARGET:
826        # Zip the seed_corpus
827        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
828        zip_file = "{}.zip".format(seed_corpus)
829        cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
830        print(' '.join(cmd))
831        subprocess.check_call(cmd, cwd=seed_corpus)
832
833
834def list_cmd(args):
835    print("\n".join(TARGETS))
836
837
838def short_help(args):
839    name = args[0]
840    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
841
842
843def help(args):
844    short_help(args)
845    print("\tfuzzing helpers (select a command and pass -h for help)\n")
846    print("Options:")
847    print("\t-h, --help\tPrint this message")
848    print("")
849    print("Commands:")
850    print("\tbuild\t\tBuild a fuzzer")
851    print("\tlibfuzzer\tRun a libFuzzer fuzzer")
852    print("\tafl\t\tRun an AFL fuzzer")
853    print("\tregression\tRun a regression test")
854    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
855    print("\tminimize\tMinimize the test corpora")
856    print("\tzip\t\tZip the minimized corpora up")
857    print("\tlist\t\tList the available targets")
858
859
860def main():
861    args = sys.argv
862    if len(args) < 2:
863        help(args)
864        return 1
865    if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
866        help(args)
867        return 1
868    command = args.pop(1)
869    args[0] = "{} {}".format(args[0], command)
870    if command == "build":
871        return build(args)
872    if command == "libfuzzer":
873        return libfuzzer_cmd(args)
874    if command == "regression":
875        return regression(args)
876    if command == "afl":
877        return afl(args)
878    if command == "gen":
879        return gen(args)
880    if command == "minimize":
881        return minimize(args)
882    if command == "zip":
883        return zip_cmd(args)
884    if command == "list":
885        return list_cmd(args)
886    short_help(args)
887    print("Error: No such command {} (pass -h for help)".format(command))
888    return 1
889
890
891if __name__ == "__main__":
892    sys.exit(main())
893