1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) 2016-2020, Facebook, Inc.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# You may select, at your option, one of the above-listed licenses.
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
17import shlex
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25    return os.path.abspath(os.path.join(a, *p))
26
27
28class InputType(object):
29    RAW_DATA = 1
30    COMPRESSED_DATA = 2
31    DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35    ZSTD = 1
36    BLOCK = 2
37
38
39class TargetInfo(object):
40    def __init__(self, input_type, frame_type=FrameType.ZSTD):
41        self.input_type = input_type
42        self.frame_type = frame_type
43
44
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
48TARGET_INFO = {
49    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58    'simple_compress': TargetInfo(InputType.RAW_DATA),
59    'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60    'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61    'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62    'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA),
63    'fse_read_ncount': TargetInfo(InputType.RAW_DATA),
64    'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
65}
66TARGETS = list(TARGET_INFO.keys())
67ALL_TARGETS = TARGETS + ['all']
68FUZZ_RNG_SEED_SIZE = 4
69
70# Standard environment variables
71CC = os.environ.get('CC', 'cc')
72CXX = os.environ.get('CXX', 'c++')
73CPPFLAGS = os.environ.get('CPPFLAGS', '')
74CFLAGS = os.environ.get('CFLAGS', '-O3')
75CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
76LDFLAGS = os.environ.get('LDFLAGS', '')
77MFLAGS = os.environ.get('MFLAGS', '-j')
78
79# Fuzzing environment variables
80LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
81AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
82DECODECORPUS = os.environ.get('DECODECORPUS',
83                              abs_join(FUZZ_DIR, '..', 'decodecorpus'))
84ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
85
86# Sanitizer environment variables
87MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
88MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
89MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
90MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
91
92
93def create(r):
94    d = os.path.abspath(r)
95    if not os.path.isdir(d):
96        os.makedirs(d)
97    return d
98
99
100def check(r):
101    d = os.path.abspath(r)
102    if not os.path.isdir(d):
103        return None
104    return d
105
106
107@contextlib.contextmanager
108def tmpdir():
109    dirpath = tempfile.mkdtemp()
110    try:
111        yield dirpath
112    finally:
113        shutil.rmtree(dirpath, ignore_errors=True)
114
115
116def parse_targets(in_targets):
117    targets = set()
118    for target in in_targets:
119        if not target:
120            continue
121        if target == 'all':
122            targets = targets.union(TARGETS)
123        elif target in TARGETS:
124            targets.add(target)
125        else:
126            raise RuntimeError('{} is not a valid target'.format(target))
127    return list(targets)
128
129
130def targets_parser(args, description):
131    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
132    parser.add_argument(
133        'TARGET',
134        nargs='*',
135        type=str,
136        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
137    args, extra = parser.parse_known_args(args)
138    args.extra = extra
139
140    args.TARGET = parse_targets(args.TARGET)
141
142    return args
143
144
145def parse_env_flags(args, flags):
146    """
147    Look for flags set by environment variables.
148    """
149    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
150    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
151
152    def set_sanitizer(sanitizer, default, san, nosan):
153        if sanitizer in san and sanitizer in nosan:
154            raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
155                               format(s=sanitizer))
156        if sanitizer in san:
157            return True
158        if sanitizer in nosan:
159            return False
160        return default
161
162    san = set(san_flags.split(','))
163    nosan = set(nosan_flags.split(','))
164
165    args.asan = set_sanitizer('address', args.asan, san, nosan)
166    args.msan = set_sanitizer('memory', args.msan, san, nosan)
167    args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
168
169    args.sanitize = args.asan or args.msan or args.ubsan
170
171    return args
172
173
174def compiler_version(cc, cxx):
175    """
176    Determines the compiler and version.
177    Only works for clang and gcc.
178    """
179    cc_version_bytes = subprocess.check_output([cc, "--version"])
180    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
181    compiler = None
182    version = None
183    if b'clang' in cc_version_bytes:
184        assert(b'clang' in cxx_version_bytes)
185        compiler = 'clang'
186    elif b'gcc' in cc_version_bytes:
187        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
188        compiler = 'gcc'
189    if compiler is not None:
190        version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
191        version_match = re.search(version_regex, cc_version_bytes)
192        version = tuple(int(version_match.group(i)) for i in range(1, 4))
193    return compiler, version
194
195
196def overflow_ubsan_flags(cc, cxx):
197    compiler, version = compiler_version(cc, cxx)
198    if compiler == 'gcc':
199        return ['-fno-sanitize=signed-integer-overflow']
200    if compiler == 'clang' and version >= (5, 0, 0):
201        return ['-fno-sanitize=pointer-overflow']
202    return []
203
204
205def build_parser(args):
206    description = """
207    Cleans the repository and builds a fuzz target (or all).
208    Many flags default to environment variables (default says $X='y').
209    Options that aren't enabling features default to the correct values for
210    zstd.
211    Enable sanitizers with --enable-*san.
212    For regression testing just build.
213    For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
214    For AFL set CC and CXX to AFL's compilers and set
215    LIB_FUZZING_ENGINE='libregression.a'.
216    """
217    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
218    parser.add_argument(
219        '--lib-fuzzing-engine',
220        dest='lib_fuzzing_engine',
221        type=str,
222        default=LIB_FUZZING_ENGINE,
223        help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
224              "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
225
226    fuzz_group = parser.add_mutually_exclusive_group()
227    fuzz_group.add_argument(
228        '--enable-coverage',
229        dest='coverage',
230        action='store_true',
231        help='Enable coverage instrumentation (-fsanitize-coverage)')
232    fuzz_group.add_argument(
233        '--enable-fuzzer',
234        dest='fuzzer',
235        action='store_true',
236        help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
237              'LIB_FUZZING_ENGINE is ignored')
238    )
239
240    parser.add_argument(
241        '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
242    parser.add_argument(
243        '--enable-ubsan',
244        dest='ubsan',
245        action='store_true',
246        help='Enable UBSAN')
247    parser.add_argument(
248        '--enable-ubsan-pointer-overflow',
249        dest='ubsan_pointer_overflow',
250        action='store_true',
251        help='Enable UBSAN pointer overflow check (known failure)')
252    parser.add_argument(
253        '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
254    parser.add_argument(
255        '--enable-msan-track-origins', dest='msan_track_origins',
256        action='store_true', help='Enable MSAN origin tracking')
257    parser.add_argument(
258        '--msan-extra-cppflags',
259        dest='msan_extra_cppflags',
260        type=str,
261        default=MSAN_EXTRA_CPPFLAGS,
262        help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
263        format(MSAN_EXTRA_CPPFLAGS))
264    parser.add_argument(
265        '--msan-extra-cflags',
266        dest='msan_extra_cflags',
267        type=str,
268        default=MSAN_EXTRA_CFLAGS,
269        help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
270            MSAN_EXTRA_CFLAGS))
271    parser.add_argument(
272        '--msan-extra-cxxflags',
273        dest='msan_extra_cxxflags',
274        type=str,
275        default=MSAN_EXTRA_CXXFLAGS,
276        help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
277        format(MSAN_EXTRA_CXXFLAGS))
278    parser.add_argument(
279        '--msan-extra-ldflags',
280        dest='msan_extra_ldflags',
281        type=str,
282        default=MSAN_EXTRA_LDFLAGS,
283        help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
284        format(MSAN_EXTRA_LDFLAGS))
285    parser.add_argument(
286        '--enable-sanitize-recover',
287        dest='sanitize_recover',
288        action='store_true',
289        help='Non-fatal sanitizer errors where possible')
290    parser.add_argument(
291        '--debug',
292        dest='debug',
293        type=int,
294        default=1,
295        help='Set DEBUGLEVEL (default: 1)')
296    parser.add_argument(
297        '--force-memory-access',
298        dest='memory_access',
299        type=int,
300        default=0,
301        help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
302    parser.add_argument(
303        '--fuzz-rng-seed-size',
304        dest='fuzz_rng_seed_size',
305        type=int,
306        default=4,
307        help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
308    parser.add_argument(
309        '--disable-fuzzing-mode',
310        dest='fuzzing_mode',
311        action='store_false',
312        help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
313    parser.add_argument(
314        '--enable-stateful-fuzzing',
315        dest='stateful_fuzzing',
316        action='store_true',
317        help='Reuse contexts between runs (makes reproduction impossible)')
318    parser.add_argument(
319        '--cc',
320        dest='cc',
321        type=str,
322        default=CC,
323        help="CC (default: $CC='{}')".format(CC))
324    parser.add_argument(
325        '--cxx',
326        dest='cxx',
327        type=str,
328        default=CXX,
329        help="CXX (default: $CXX='{}')".format(CXX))
330    parser.add_argument(
331        '--cppflags',
332        dest='cppflags',
333        type=str,
334        default=CPPFLAGS,
335        help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
336    parser.add_argument(
337        '--cflags',
338        dest='cflags',
339        type=str,
340        default=CFLAGS,
341        help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
342    parser.add_argument(
343        '--cxxflags',
344        dest='cxxflags',
345        type=str,
346        default=CXXFLAGS,
347        help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
348    parser.add_argument(
349        '--ldflags',
350        dest='ldflags',
351        type=str,
352        default=LDFLAGS,
353        help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
354    parser.add_argument(
355        '--mflags',
356        dest='mflags',
357        type=str,
358        default=MFLAGS,
359        help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
360    parser.add_argument(
361        'TARGET',
362        nargs='*',
363        type=str,
364        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
365    )
366    args = parser.parse_args(args)
367    args = parse_env_flags(args, ' '.join(
368        [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
369
370    # Check option sanity
371    if args.msan and (args.asan or args.ubsan):
372        raise RuntimeError('MSAN may not be used with any other sanitizers')
373    if args.msan_track_origins and not args.msan:
374        raise RuntimeError('--enable-msan-track-origins requires MSAN')
375    if args.ubsan_pointer_overflow and not args.ubsan:
376        raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
377    if args.sanitize_recover and not args.sanitize:
378        raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
379
380    return args
381
382
383def build(args):
384    try:
385        args = build_parser(args)
386    except Exception as e:
387        print(e)
388        return 1
389    # The compilation flags we are setting
390    targets = args.TARGET
391    cc = args.cc
392    cxx = args.cxx
393    cppflags = shlex.split(args.cppflags)
394    cflags = shlex.split(args.cflags)
395    ldflags = shlex.split(args.ldflags)
396    cxxflags = shlex.split(args.cxxflags)
397    mflags = shlex.split(args.mflags)
398    # Flags to be added to both cflags and cxxflags
399    common_flags = []
400
401    cppflags += [
402        '-DDEBUGLEVEL={}'.format(args.debug),
403        '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
404        '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
405    ]
406
407    # Set flags for options
408    assert not (args.fuzzer and args.coverage)
409    if args.coverage:
410        common_flags += [
411            '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
412        ]
413    if args.fuzzer:
414        common_flags += ['-fsanitize=fuzzer']
415        args.lib_fuzzing_engine = ''
416
417    mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
418
419    if args.sanitize_recover:
420        recover_flags = ['-fsanitize-recover=all']
421    else:
422        recover_flags = ['-fno-sanitize-recover=all']
423    if args.sanitize:
424        common_flags += recover_flags
425
426    if args.msan:
427        msan_flags = ['-fsanitize=memory']
428        if args.msan_track_origins:
429            msan_flags += ['-fsanitize-memory-track-origins']
430        common_flags += msan_flags
431        # Append extra MSAN flags (it might require special setup)
432        cppflags += [args.msan_extra_cppflags]
433        cflags += [args.msan_extra_cflags]
434        cxxflags += [args.msan_extra_cxxflags]
435        ldflags += [args.msan_extra_ldflags]
436
437    if args.asan:
438        common_flags += ['-fsanitize=address']
439
440    if args.ubsan:
441        ubsan_flags = ['-fsanitize=undefined']
442        if not args.ubsan_pointer_overflow:
443            ubsan_flags += overflow_ubsan_flags(cc, cxx)
444        common_flags += ubsan_flags
445
446    if args.stateful_fuzzing:
447        cppflags += ['-DSTATEFUL_FUZZING']
448
449    if args.fuzzing_mode:
450        cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
451
452    if args.lib_fuzzing_engine == 'libregression.a':
453        targets = ['libregression.a'] + targets
454
455    # Append the common flags
456    cflags += common_flags
457    cxxflags += common_flags
458
459    # Prepare the flags for Make
460    cc_str = "CC={}".format(cc)
461    cxx_str = "CXX={}".format(cxx)
462    cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
463    cflags_str = "CFLAGS={}".format(' '.join(cflags))
464    cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
465    ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
466
467    # Print the flags
468    print('MFLAGS={}'.format(' '.join(mflags)))
469    print(cc_str)
470    print(cxx_str)
471    print(cppflags_str)
472    print(cflags_str)
473    print(cxxflags_str)
474    print(ldflags_str)
475
476    # Clean and build
477    clean_cmd = ['make', 'clean'] + mflags
478    print(' '.join(clean_cmd))
479    subprocess.check_call(clean_cmd)
480    build_cmd = [
481        'make',
482        cc_str,
483        cxx_str,
484        cppflags_str,
485        cflags_str,
486        cxxflags_str,
487        ldflags_str,
488    ] + mflags + targets
489    print(' '.join(build_cmd))
490    subprocess.check_call(build_cmd)
491    return 0
492
493
494def libfuzzer_parser(args):
495    description = """
496    Runs a libfuzzer binary.
497    Passes all extra arguments to libfuzzer.
498    The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
499    libFuzzer.a.
500    Generates output in the CORPORA directory, puts crashes in the ARTIFACT
501    directory, and takes extra input from the SEED directory.
502    To merge AFL's output pass the SEED as AFL's output directory and pass
503    '-merge=1'.
504    """
505    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
506    parser.add_argument(
507        '--corpora',
508        type=str,
509        help='Override the default corpora dir (default: {})'.format(
510            abs_join(CORPORA_DIR, 'TARGET')))
511    parser.add_argument(
512        '--artifact',
513        type=str,
514        help='Override the default artifact dir (default: {})'.format(
515            abs_join(CORPORA_DIR, 'TARGET-crash')))
516    parser.add_argument(
517        '--seed',
518        type=str,
519        help='Override the default seed dir (default: {})'.format(
520            abs_join(CORPORA_DIR, 'TARGET-seed')))
521    parser.add_argument(
522        'TARGET',
523        type=str,
524        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
525    args, extra = parser.parse_known_args(args)
526    args.extra = extra
527
528    if args.TARGET and args.TARGET not in TARGETS:
529        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
530
531    return args
532
533
534def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
535    if corpora is None:
536        corpora = abs_join(CORPORA_DIR, target)
537    if artifact is None:
538        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
539    if seed is None:
540        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
541    if extra_args is None:
542        extra_args = []
543
544    target = abs_join(FUZZ_DIR, target)
545
546    corpora = [create(corpora)]
547    artifact = create(artifact)
548    seed = check(seed)
549
550    corpora += [artifact]
551    if seed is not None:
552        corpora += [seed]
553
554    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
555    cmd += corpora + extra_args
556    print(' '.join(cmd))
557    subprocess.check_call(cmd)
558
559
560def libfuzzer_cmd(args):
561    try:
562        args = libfuzzer_parser(args)
563    except Exception as e:
564        print(e)
565        return 1
566    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
567    return 0
568
569
570def afl_parser(args):
571    description = """
572    Runs an afl-fuzz job.
573    Passes all extra arguments to afl-fuzz.
574    The fuzzer should have been built with CC/CXX set to the AFL compilers,
575    and with LIB_FUZZING_ENGINE='libregression.a'.
576    Takes input from CORPORA and writes output to OUTPUT.
577    Uses AFL_FUZZ as the binary (set from flag or environment variable).
578    """
579    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
580    parser.add_argument(
581        '--corpora',
582        type=str,
583        help='Override the default corpora dir (default: {})'.format(
584            abs_join(CORPORA_DIR, 'TARGET')))
585    parser.add_argument(
586        '--output',
587        type=str,
588        help='Override the default AFL output dir (default: {})'.format(
589            abs_join(CORPORA_DIR, 'TARGET-afl')))
590    parser.add_argument(
591        '--afl-fuzz',
592        type=str,
593        default=AFL_FUZZ,
594        help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
595    parser.add_argument(
596        'TARGET',
597        type=str,
598        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
599    args, extra = parser.parse_known_args(args)
600    args.extra = extra
601
602    if args.TARGET and args.TARGET not in TARGETS:
603        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
604
605    if not args.corpora:
606        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
607    if not args.output:
608        args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
609
610    return args
611
612
613def afl(args):
614    try:
615        args = afl_parser(args)
616    except Exception as e:
617        print(e)
618        return 1
619    target = abs_join(FUZZ_DIR, args.TARGET)
620
621    corpora = create(args.corpora)
622    output = create(args.output)
623
624    cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
625    cmd += [target, '@@']
626    print(' '.join(cmd))
627    subprocess.call(cmd)
628    return 0
629
630
631def regression(args):
632    try:
633        description = """
634        Runs one or more regression tests.
635        The fuzzer should have been built with with
636        LIB_FUZZING_ENGINE='libregression.a'.
637        Takes input from CORPORA.
638        """
639        args = targets_parser(args, description)
640    except Exception as e:
641        print(e)
642        return 1
643    for target in args.TARGET:
644        corpora = create(abs_join(CORPORA_DIR, target))
645        target = abs_join(FUZZ_DIR, target)
646        cmd = [target, corpora]
647        print(' '.join(cmd))
648        subprocess.check_call(cmd)
649    return 0
650
651
652def gen_parser(args):
653    description = """
654    Generate a seed corpus appropriate for TARGET with data generated with
655    decodecorpus.
656    The fuzz inputs are prepended with a seed before the zstd data, so the
657    output of decodecorpus shouldn't be used directly.
658    Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
659    puts the output in SEED.
660    DECODECORPUS is the decodecorpus binary, and must already be built.
661    """
662    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
663    parser.add_argument(
664        '--number',
665        '-n',
666        type=int,
667        default=100,
668        help='Number of samples to generate')
669    parser.add_argument(
670        '--max-size-log',
671        type=int,
672        default=18,
673        help='Maximum sample size to generate')
674    parser.add_argument(
675        '--seed',
676        type=str,
677        help='Override the default seed dir (default: {})'.format(
678            abs_join(CORPORA_DIR, 'TARGET-seed')))
679    parser.add_argument(
680        '--decodecorpus',
681        type=str,
682        default=DECODECORPUS,
683        help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
684            DECODECORPUS))
685    parser.add_argument(
686        '--zstd',
687        type=str,
688        default=ZSTD,
689        help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
690    parser.add_argument(
691        '--fuzz-rng-seed-size',
692        type=int,
693        default=4,
694        help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
695    )
696    parser.add_argument(
697        'TARGET',
698        type=str,
699        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
700    args, extra = parser.parse_known_args(args)
701    args.extra = extra
702
703    if args.TARGET and args.TARGET not in TARGETS:
704        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
705
706    if not args.seed:
707        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
708
709    if not os.path.isfile(args.decodecorpus):
710        raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
711                           format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
712
713    return args
714
715
716def gen(args):
717    try:
718        args = gen_parser(args)
719    except Exception as e:
720        print(e)
721        return 1
722
723    seed = create(args.seed)
724    with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
725        info = TARGET_INFO[args.TARGET]
726
727        if info.input_type == InputType.DICTIONARY_DATA:
728            number = max(args.number, 1000)
729        else:
730            number = args.number
731        cmd = [
732            args.decodecorpus,
733            '-n{}'.format(args.number),
734            '-p{}/'.format(compressed),
735            '-o{}'.format(decompressed),
736        ]
737
738        if info.frame_type == FrameType.BLOCK:
739            cmd += [
740                '--gen-blocks',
741                '--max-block-size-log={}'.format(min(args.max_size_log, 17))
742            ]
743        else:
744            cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
745
746        print(' '.join(cmd))
747        subprocess.check_call(cmd)
748
749        if info.input_type == InputType.RAW_DATA:
750            print('using decompressed data in {}'.format(decompressed))
751            samples = decompressed
752        elif info.input_type == InputType.COMPRESSED_DATA:
753            print('using compressed data in {}'.format(compressed))
754            samples = compressed
755        else:
756            assert info.input_type == InputType.DICTIONARY_DATA
757            print('making dictionary data from {}'.format(decompressed))
758            samples = dict
759            min_dict_size_log = 9
760            max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
761            for dict_size_log in range(min_dict_size_log, max_dict_size_log):
762                dict_size = 1 << dict_size_log
763                cmd = [
764                    args.zstd,
765                    '--train',
766                    '-r', decompressed,
767                    '--maxdict={}'.format(dict_size),
768                    '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
769                ]
770                print(' '.join(cmd))
771                subprocess.check_call(cmd)
772
773        # Copy the samples over and prepend the RNG seeds
774        for name in os.listdir(samples):
775            samplename = abs_join(samples, name)
776            outname = abs_join(seed, name)
777            with open(samplename, 'rb') as sample:
778                with open(outname, 'wb') as out:
779                    CHUNK_SIZE = 131072
780                    chunk = sample.read(CHUNK_SIZE)
781                    while len(chunk) > 0:
782                        out.write(chunk)
783                        chunk = sample.read(CHUNK_SIZE)
784    return 0
785
786
787def minimize(args):
788    try:
789        description = """
790        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
791        TARGET_seed_corpus. All extra args are passed to libfuzzer.
792        """
793        args = targets_parser(args, description)
794    except Exception as e:
795        print(e)
796        return 1
797
798    for target in args.TARGET:
799        # Merge the corpus + anything else into the seed_corpus
800        corpus = abs_join(CORPORA_DIR, target)
801        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
802        extra_args = [corpus, "-merge=1"] + args.extra
803        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
804        seeds = set(os.listdir(seed_corpus))
805        # Copy all crashes directly into the seed_corpus if not already present
806        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
807        for crash in os.listdir(crashes):
808            if crash not in seeds:
809                shutil.copy(abs_join(crashes, crash), seed_corpus)
810                seeds.add(crash)
811
812
813def zip_cmd(args):
814    try:
815        description = """
816        Zips up the seed corpus.
817        """
818        args = targets_parser(args, description)
819    except Exception as e:
820        print(e)
821        return 1
822
823    for target in args.TARGET:
824        # Zip the seed_corpus
825        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
826        zip_file = "{}.zip".format(seed_corpus)
827        cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
828        print(' '.join(cmd))
829        subprocess.check_call(cmd, cwd=seed_corpus)
830
831
832def list_cmd(args):
833    print("\n".join(TARGETS))
834
835
836def short_help(args):
837    name = args[0]
838    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
839
840
841def help(args):
842    short_help(args)
843    print("\tfuzzing helpers (select a command and pass -h for help)\n")
844    print("Options:")
845    print("\t-h, --help\tPrint this message")
846    print("")
847    print("Commands:")
848    print("\tbuild\t\tBuild a fuzzer")
849    print("\tlibfuzzer\tRun a libFuzzer fuzzer")
850    print("\tafl\t\tRun an AFL fuzzer")
851    print("\tregression\tRun a regression test")
852    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
853    print("\tminimize\tMinimize the test corpora")
854    print("\tzip\t\tZip the minimized corpora up")
855    print("\tlist\t\tList the available targets")
856
857
858def main():
859    args = sys.argv
860    if len(args) < 2:
861        help(args)
862        return 1
863    if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
864        help(args)
865        return 1
866    command = args.pop(1)
867    args[0] = "{} {}".format(args[0], command)
868    if command == "build":
869        return build(args)
870    if command == "libfuzzer":
871        return libfuzzer_cmd(args)
872    if command == "regression":
873        return regression(args)
874    if command == "afl":
875        return afl(args)
876    if command == "gen":
877        return gen(args)
878    if command == "minimize":
879        return minimize(args)
880    if command == "zip":
881        return zip_cmd(args)
882    if command == "list":
883        return list_cmd(args)
884    short_help(args)
885    print("Error: No such command {} (pass -h for help)".format(command))
886    return 1
887
888
889if __name__ == "__main__":
890    sys.exit(main())
891