1#!/usr/bin/env python
2
3# ################################################################
4# Copyright (c) 2016-present, Facebook, Inc.
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
10# ##########################################################################
11
12import argparse
13import contextlib
14import os
15import re
16import shlex
17import shutil
18import subprocess
19import sys
20import tempfile
21
22
23def abs_join(a, *p):
24    return os.path.abspath(os.path.join(a, *p))
25
26
27class InputType(object):
28    RAW_DATA = 1
29    COMPRESSED_DATA = 2
30    DICTIONARY_DATA = 3
31
32
33class FrameType(object):
34    ZSTD = 1
35    BLOCK = 2
36
37
38class TargetInfo(object):
39    def __init__(self, input_type, frame_type=FrameType.ZSTD):
40        self.input_type = input_type
41        self.frame_type = frame_type
42
43
44# Constants
45FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
46CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
47TARGET_INFO = {
48    'simple_round_trip': TargetInfo(InputType.RAW_DATA),
49    'stream_round_trip': TargetInfo(InputType.RAW_DATA),
50    'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
51    'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
52    'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53    'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
54    'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
55    'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
56    'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
57    'simple_compress': TargetInfo(InputType.RAW_DATA),
58    'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
59}
60TARGETS = list(TARGET_INFO.keys())
61ALL_TARGETS = TARGETS + ['all']
62FUZZ_RNG_SEED_SIZE = 4
63
64# Standard environment variables
65CC = os.environ.get('CC', 'cc')
66CXX = os.environ.get('CXX', 'c++')
67CPPFLAGS = os.environ.get('CPPFLAGS', '')
68CFLAGS = os.environ.get('CFLAGS', '-O3')
69CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
70LDFLAGS = os.environ.get('LDFLAGS', '')
71MFLAGS = os.environ.get('MFLAGS', '-j')
72
73# Fuzzing environment variables
74LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
75AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
76DECODECORPUS = os.environ.get('DECODECORPUS',
77                              abs_join(FUZZ_DIR, '..', 'decodecorpus'))
78ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
79
80# Sanitizer environment variables
81MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
82MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
83MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
84MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
85
86
87def create(r):
88    d = os.path.abspath(r)
89    if not os.path.isdir(d):
90        os.makedirs(d)
91    return d
92
93
94def check(r):
95    d = os.path.abspath(r)
96    if not os.path.isdir(d):
97        return None
98    return d
99
100
101@contextlib.contextmanager
102def tmpdir():
103    dirpath = tempfile.mkdtemp()
104    try:
105        yield dirpath
106    finally:
107        shutil.rmtree(dirpath, ignore_errors=True)
108
109
110def parse_targets(in_targets):
111    targets = set()
112    for target in in_targets:
113        if not target:
114            continue
115        if target == 'all':
116            targets = targets.union(TARGETS)
117        elif target in TARGETS:
118            targets.add(target)
119        else:
120            raise RuntimeError('{} is not a valid target'.format(target))
121    return list(targets)
122
123
124def targets_parser(args, description):
125    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
126    parser.add_argument(
127        'TARGET',
128        nargs='*',
129        type=str,
130        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
131    args, extra = parser.parse_known_args(args)
132    args.extra = extra
133
134    args.TARGET = parse_targets(args.TARGET)
135
136    return args
137
138
139def parse_env_flags(args, flags):
140    """
141    Look for flags set by environment variables.
142    """
143    san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
144    nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
145
146    def set_sanitizer(sanitizer, default, san, nosan):
147        if sanitizer in san and sanitizer in nosan:
148            raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
149                               format(s=sanitizer))
150        if sanitizer in san:
151            return True
152        if sanitizer in nosan:
153            return False
154        return default
155
156    san = set(san_flags.split(','))
157    nosan = set(nosan_flags.split(','))
158
159    args.asan = set_sanitizer('address', args.asan, san, nosan)
160    args.msan = set_sanitizer('memory', args.msan, san, nosan)
161    args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
162
163    args.sanitize = args.asan or args.msan or args.ubsan
164
165    return args
166
167
168def compiler_version(cc, cxx):
169    """
170    Determines the compiler and version.
171    Only works for clang and gcc.
172    """
173    cc_version_bytes = subprocess.check_output([cc, "--version"])
174    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
175    compiler = None
176    version = None
177    if b'clang' in cc_version_bytes:
178        assert(b'clang' in cxx_version_bytes)
179        compiler = 'clang'
180    elif b'gcc' in cc_version_bytes:
181        assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
182        compiler = 'gcc'
183    if compiler is not None:
184        version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
185        version_match = re.search(version_regex, cc_version_bytes)
186        version = tuple(int(version_match.group(i)) for i in range(1, 4))
187    return compiler, version
188
189
190def overflow_ubsan_flags(cc, cxx):
191    compiler, version = compiler_version(cc, cxx)
192    if compiler == 'gcc':
193        return ['-fno-sanitize=signed-integer-overflow']
194    if compiler == 'clang' and version >= (5, 0, 0):
195        return ['-fno-sanitize=pointer-overflow']
196    return []
197
198
199def build_parser(args):
200    description = """
201    Cleans the repository and builds a fuzz target (or all).
202    Many flags default to environment variables (default says $X='y').
203    Options that aren't enabling features default to the correct values for
204    zstd.
205    Enable sanitizers with --enable-*san.
206    For regression testing just build.
207    For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
208    For AFL set CC and CXX to AFL's compilers and set
209    LIB_FUZZING_ENGINE='libregression.a'.
210    """
211    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
212    parser.add_argument(
213        '--lib-fuzzing-engine',
214        dest='lib_fuzzing_engine',
215        type=str,
216        default=LIB_FUZZING_ENGINE,
217        help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
218              "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
219
220    fuzz_group = parser.add_mutually_exclusive_group()
221    fuzz_group.add_argument(
222        '--enable-coverage',
223        dest='coverage',
224        action='store_true',
225        help='Enable coverage instrumentation (-fsanitize-coverage)')
226    fuzz_group.add_argument(
227        '--enable-fuzzer',
228        dest='fuzzer',
229        action='store_true',
230        help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
231              'LIB_FUZZING_ENGINE is ignored')
232    )
233
234    parser.add_argument(
235        '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
236    parser.add_argument(
237        '--enable-ubsan',
238        dest='ubsan',
239        action='store_true',
240        help='Enable UBSAN')
241    parser.add_argument(
242        '--enable-ubsan-pointer-overflow',
243        dest='ubsan_pointer_overflow',
244        action='store_true',
245        help='Enable UBSAN pointer overflow check (known failure)')
246    parser.add_argument(
247        '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
248    parser.add_argument(
249        '--enable-msan-track-origins', dest='msan_track_origins',
250        action='store_true', help='Enable MSAN origin tracking')
251    parser.add_argument(
252        '--msan-extra-cppflags',
253        dest='msan_extra_cppflags',
254        type=str,
255        default=MSAN_EXTRA_CPPFLAGS,
256        help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
257        format(MSAN_EXTRA_CPPFLAGS))
258    parser.add_argument(
259        '--msan-extra-cflags',
260        dest='msan_extra_cflags',
261        type=str,
262        default=MSAN_EXTRA_CFLAGS,
263        help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
264            MSAN_EXTRA_CFLAGS))
265    parser.add_argument(
266        '--msan-extra-cxxflags',
267        dest='msan_extra_cxxflags',
268        type=str,
269        default=MSAN_EXTRA_CXXFLAGS,
270        help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
271        format(MSAN_EXTRA_CXXFLAGS))
272    parser.add_argument(
273        '--msan-extra-ldflags',
274        dest='msan_extra_ldflags',
275        type=str,
276        default=MSAN_EXTRA_LDFLAGS,
277        help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
278        format(MSAN_EXTRA_LDFLAGS))
279    parser.add_argument(
280        '--enable-sanitize-recover',
281        dest='sanitize_recover',
282        action='store_true',
283        help='Non-fatal sanitizer errors where possible')
284    parser.add_argument(
285        '--debug',
286        dest='debug',
287        type=int,
288        default=1,
289        help='Set DEBUGLEVEL (default: 1)')
290    parser.add_argument(
291        '--force-memory-access',
292        dest='memory_access',
293        type=int,
294        default=0,
295        help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
296    parser.add_argument(
297        '--fuzz-rng-seed-size',
298        dest='fuzz_rng_seed_size',
299        type=int,
300        default=4,
301        help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
302    parser.add_argument(
303        '--disable-fuzzing-mode',
304        dest='fuzzing_mode',
305        action='store_false',
306        help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
307    parser.add_argument(
308        '--enable-stateful-fuzzing',
309        dest='stateful_fuzzing',
310        action='store_true',
311        help='Reuse contexts between runs (makes reproduction impossible)')
312    parser.add_argument(
313        '--cc',
314        dest='cc',
315        type=str,
316        default=CC,
317        help="CC (default: $CC='{}')".format(CC))
318    parser.add_argument(
319        '--cxx',
320        dest='cxx',
321        type=str,
322        default=CXX,
323        help="CXX (default: $CXX='{}')".format(CXX))
324    parser.add_argument(
325        '--cppflags',
326        dest='cppflags',
327        type=str,
328        default=CPPFLAGS,
329        help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
330    parser.add_argument(
331        '--cflags',
332        dest='cflags',
333        type=str,
334        default=CFLAGS,
335        help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
336    parser.add_argument(
337        '--cxxflags',
338        dest='cxxflags',
339        type=str,
340        default=CXXFLAGS,
341        help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
342    parser.add_argument(
343        '--ldflags',
344        dest='ldflags',
345        type=str,
346        default=LDFLAGS,
347        help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
348    parser.add_argument(
349        '--mflags',
350        dest='mflags',
351        type=str,
352        default=MFLAGS,
353        help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
354    parser.add_argument(
355        'TARGET',
356        nargs='*',
357        type=str,
358        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
359    )
360    args = parser.parse_args(args)
361    args = parse_env_flags(args, ' '.join(
362        [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
363
364    # Check option sanity
365    if args.msan and (args.asan or args.ubsan):
366        raise RuntimeError('MSAN may not be used with any other sanitizers')
367    if args.msan_track_origins and not args.msan:
368        raise RuntimeError('--enable-msan-track-origins requires MSAN')
369    if args.ubsan_pointer_overflow and not args.ubsan:
370        raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
371    if args.sanitize_recover and not args.sanitize:
372        raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
373
374    return args
375
376
377def build(args):
378    try:
379        args = build_parser(args)
380    except Exception as e:
381        print(e)
382        return 1
383    # The compilation flags we are setting
384    targets = args.TARGET
385    cc = args.cc
386    cxx = args.cxx
387    cppflags = shlex.split(args.cppflags)
388    cflags = shlex.split(args.cflags)
389    ldflags = shlex.split(args.ldflags)
390    cxxflags = shlex.split(args.cxxflags)
391    mflags = shlex.split(args.mflags)
392    # Flags to be added to both cflags and cxxflags
393    common_flags = []
394
395    cppflags += [
396        '-DDEBUGLEVEL={}'.format(args.debug),
397        '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
398        '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
399    ]
400
401    # Set flags for options
402    assert not (args.fuzzer and args.coverage)
403    if args.coverage:
404        common_flags += [
405            '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
406        ]
407    if args.fuzzer:
408        common_flags += ['-fsanitize=fuzzer']
409        args.lib_fuzzing_engine = ''
410
411    mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
412
413    if args.sanitize_recover:
414        recover_flags = ['-fsanitize-recover=all']
415    else:
416        recover_flags = ['-fno-sanitize-recover=all']
417    if args.sanitize:
418        common_flags += recover_flags
419
420    if args.msan:
421        msan_flags = ['-fsanitize=memory']
422        if args.msan_track_origins:
423            msan_flags += ['-fsanitize-memory-track-origins']
424        common_flags += msan_flags
425        # Append extra MSAN flags (it might require special setup)
426        cppflags += [args.msan_extra_cppflags]
427        cflags += [args.msan_extra_cflags]
428        cxxflags += [args.msan_extra_cxxflags]
429        ldflags += [args.msan_extra_ldflags]
430
431    if args.asan:
432        common_flags += ['-fsanitize=address']
433
434    if args.ubsan:
435        ubsan_flags = ['-fsanitize=undefined']
436        if not args.ubsan_pointer_overflow:
437            ubsan_flags += overflow_ubsan_flags(cc, cxx)
438        common_flags += ubsan_flags
439
440    if args.stateful_fuzzing:
441        cppflags += ['-DSTATEFUL_FUZZING']
442
443    if args.fuzzing_mode:
444        cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
445
446    if args.lib_fuzzing_engine == 'libregression.a':
447        targets = ['libregression.a'] + targets
448
449    # Append the common flags
450    cflags += common_flags
451    cxxflags += common_flags
452
453    # Prepare the flags for Make
454    cc_str = "CC={}".format(cc)
455    cxx_str = "CXX={}".format(cxx)
456    cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
457    cflags_str = "CFLAGS={}".format(' '.join(cflags))
458    cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
459    ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
460
461    # Print the flags
462    print('MFLAGS={}'.format(' '.join(mflags)))
463    print(cc_str)
464    print(cxx_str)
465    print(cppflags_str)
466    print(cflags_str)
467    print(cxxflags_str)
468    print(ldflags_str)
469
470    # Clean and build
471    clean_cmd = ['make', 'clean'] + mflags
472    print(' '.join(clean_cmd))
473    subprocess.check_call(clean_cmd)
474    build_cmd = [
475        'make',
476        cc_str,
477        cxx_str,
478        cppflags_str,
479        cflags_str,
480        cxxflags_str,
481        ldflags_str,
482    ] + mflags + targets
483    print(' '.join(build_cmd))
484    subprocess.check_call(build_cmd)
485    return 0
486
487
488def libfuzzer_parser(args):
489    description = """
490    Runs a libfuzzer binary.
491    Passes all extra arguments to libfuzzer.
492    The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
493    libFuzzer.a.
494    Generates output in the CORPORA directory, puts crashes in the ARTIFACT
495    directory, and takes extra input from the SEED directory.
496    To merge AFL's output pass the SEED as AFL's output directory and pass
497    '-merge=1'.
498    """
499    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
500    parser.add_argument(
501        '--corpora',
502        type=str,
503        help='Override the default corpora dir (default: {})'.format(
504            abs_join(CORPORA_DIR, 'TARGET')))
505    parser.add_argument(
506        '--artifact',
507        type=str,
508        help='Override the default artifact dir (default: {})'.format(
509            abs_join(CORPORA_DIR, 'TARGET-crash')))
510    parser.add_argument(
511        '--seed',
512        type=str,
513        help='Override the default seed dir (default: {})'.format(
514            abs_join(CORPORA_DIR, 'TARGET-seed')))
515    parser.add_argument(
516        'TARGET',
517        type=str,
518        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
519    args, extra = parser.parse_known_args(args)
520    args.extra = extra
521
522    if args.TARGET and args.TARGET not in TARGETS:
523        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
524
525    return args
526
527
528def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
529    if corpora is None:
530        corpora = abs_join(CORPORA_DIR, target)
531    if artifact is None:
532        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
533    if seed is None:
534        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
535    if extra_args is None:
536        extra_args = []
537
538    target = abs_join(FUZZ_DIR, target)
539
540    corpora = [create(corpora)]
541    artifact = create(artifact)
542    seed = check(seed)
543
544    corpora += [artifact]
545    if seed is not None:
546        corpora += [seed]
547
548    cmd = [target, '-artifact_prefix={}/'.format(artifact)]
549    cmd += corpora + extra_args
550    print(' '.join(cmd))
551    subprocess.check_call(cmd)
552
553
554def libfuzzer_cmd(args):
555    try:
556        args = libfuzzer_parser(args)
557    except Exception as e:
558        print(e)
559        return 1
560    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
561    return 0
562
563
564def afl_parser(args):
565    description = """
566    Runs an afl-fuzz job.
567    Passes all extra arguments to afl-fuzz.
568    The fuzzer should have been built with CC/CXX set to the AFL compilers,
569    and with LIB_FUZZING_ENGINE='libregression.a'.
570    Takes input from CORPORA and writes output to OUTPUT.
571    Uses AFL_FUZZ as the binary (set from flag or environment variable).
572    """
573    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
574    parser.add_argument(
575        '--corpora',
576        type=str,
577        help='Override the default corpora dir (default: {})'.format(
578            abs_join(CORPORA_DIR, 'TARGET')))
579    parser.add_argument(
580        '--output',
581        type=str,
582        help='Override the default AFL output dir (default: {})'.format(
583            abs_join(CORPORA_DIR, 'TARGET-afl')))
584    parser.add_argument(
585        '--afl-fuzz',
586        type=str,
587        default=AFL_FUZZ,
588        help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
589    parser.add_argument(
590        'TARGET',
591        type=str,
592        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
593    args, extra = parser.parse_known_args(args)
594    args.extra = extra
595
596    if args.TARGET and args.TARGET not in TARGETS:
597        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
598
599    if not args.corpora:
600        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
601    if not args.output:
602        args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
603
604    return args
605
606
607def afl(args):
608    try:
609        args = afl_parser(args)
610    except Exception as e:
611        print(e)
612        return 1
613    target = abs_join(FUZZ_DIR, args.TARGET)
614
615    corpora = create(args.corpora)
616    output = create(args.output)
617
618    cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
619    cmd += [target, '@@']
620    print(' '.join(cmd))
621    subprocess.call(cmd)
622    return 0
623
624
625def regression(args):
626    try:
627        description = """
628        Runs one or more regression tests.
629        The fuzzer should have been built with with
630        LIB_FUZZING_ENGINE='libregression.a'.
631        Takes input from CORPORA.
632        """
633        args = targets_parser(args, description)
634    except Exception as e:
635        print(e)
636        return 1
637    for target in args.TARGET:
638        corpora = create(abs_join(CORPORA_DIR, target))
639        target = abs_join(FUZZ_DIR, target)
640        cmd = [target, corpora]
641        print(' '.join(cmd))
642        subprocess.check_call(cmd)
643    return 0
644
645
646def gen_parser(args):
647    description = """
648    Generate a seed corpus appropriate for TARGET with data generated with
649    decodecorpus.
650    The fuzz inputs are prepended with a seed before the zstd data, so the
651    output of decodecorpus shouldn't be used directly.
652    Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
653    puts the output in SEED.
654    DECODECORPUS is the decodecorpus binary, and must already be built.
655    """
656    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
657    parser.add_argument(
658        '--number',
659        '-n',
660        type=int,
661        default=100,
662        help='Number of samples to generate')
663    parser.add_argument(
664        '--max-size-log',
665        type=int,
666        default=18,
667        help='Maximum sample size to generate')
668    parser.add_argument(
669        '--seed',
670        type=str,
671        help='Override the default seed dir (default: {})'.format(
672            abs_join(CORPORA_DIR, 'TARGET-seed')))
673    parser.add_argument(
674        '--decodecorpus',
675        type=str,
676        default=DECODECORPUS,
677        help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
678            DECODECORPUS))
679    parser.add_argument(
680        '--zstd',
681        type=str,
682        default=ZSTD,
683        help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
684    parser.add_argument(
685        '--fuzz-rng-seed-size',
686        type=int,
687        default=4,
688        help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
689    )
690    parser.add_argument(
691        'TARGET',
692        type=str,
693        help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
694    args, extra = parser.parse_known_args(args)
695    args.extra = extra
696
697    if args.TARGET and args.TARGET not in TARGETS:
698        raise RuntimeError('{} is not a valid target'.format(args.TARGET))
699
700    if not args.seed:
701        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
702
703    if not os.path.isfile(args.decodecorpus):
704        raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
705                           format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
706
707    return args
708
709
710def gen(args):
711    try:
712        args = gen_parser(args)
713    except Exception as e:
714        print(e)
715        return 1
716
717    seed = create(args.seed)
718    with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
719        info = TARGET_INFO[args.TARGET]
720
721        if info.input_type == InputType.DICTIONARY_DATA:
722            number = max(args.number, 1000)
723        else:
724            number = args.number
725        cmd = [
726            args.decodecorpus,
727            '-n{}'.format(args.number),
728            '-p{}/'.format(compressed),
729            '-o{}'.format(decompressed),
730        ]
731
732        if info.frame_type == FrameType.BLOCK:
733            cmd += [
734                '--gen-blocks',
735                '--max-block-size-log={}'.format(min(args.max_size_log, 17))
736            ]
737        else:
738            cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
739
740        print(' '.join(cmd))
741        subprocess.check_call(cmd)
742
743        if info.input_type == InputType.RAW_DATA:
744            print('using decompressed data in {}'.format(decompressed))
745            samples = decompressed
746        elif info.input_type == InputType.COMPRESSED_DATA:
747            print('using compressed data in {}'.format(compressed))
748            samples = compressed
749        else:
750            assert info.input_type == InputType.DICTIONARY_DATA
751            print('making dictionary data from {}'.format(decompressed))
752            samples = dict
753            min_dict_size_log = 9
754            max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
755            for dict_size_log in range(min_dict_size_log, max_dict_size_log):
756                dict_size = 1 << dict_size_log
757                cmd = [
758                    args.zstd,
759                    '--train',
760                    '-r', decompressed,
761                    '--maxdict={}'.format(dict_size),
762                    '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
763                ]
764                print(' '.join(cmd))
765                subprocess.check_call(cmd)
766
767        # Copy the samples over and prepend the RNG seeds
768        for name in os.listdir(samples):
769            samplename = abs_join(samples, name)
770            outname = abs_join(seed, name)
771            with open(samplename, 'rb') as sample:
772                with open(outname, 'wb') as out:
773                    CHUNK_SIZE = 131072
774                    chunk = sample.read(CHUNK_SIZE)
775                    while len(chunk) > 0:
776                        out.write(chunk)
777                        chunk = sample.read(CHUNK_SIZE)
778    return 0
779
780
781def minimize(args):
782    try:
783        description = """
784        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
785        TARGET_seed_corpus. All extra args are passed to libfuzzer.
786        """
787        args = targets_parser(args, description)
788    except Exception as e:
789        print(e)
790        return 1
791
792    for target in args.TARGET:
793        # Merge the corpus + anything else into the seed_corpus
794        corpus = abs_join(CORPORA_DIR, target)
795        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
796        extra_args = [corpus, "-merge=1"] + args.extra
797        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
798        seeds = set(os.listdir(seed_corpus))
799        # Copy all crashes directly into the seed_corpus if not already present
800        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
801        for crash in os.listdir(crashes):
802            if crash not in seeds:
803                shutil.copy(abs_join(crashes, crash), seed_corpus)
804                seeds.add(crash)
805
806
807def zip_cmd(args):
808    try:
809        description = """
810        Zips up the seed corpus.
811        """
812        args = targets_parser(args, description)
813    except Exception as e:
814        print(e)
815        return 1
816
817    for target in args.TARGET:
818        # Zip the seed_corpus
819        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
820        zip_file = "{}.zip".format(seed_corpus)
821        cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
822        print(' '.join(cmd))
823        subprocess.check_call(cmd, cwd=seed_corpus)
824
825
826def list_cmd(args):
827    print("\n".join(TARGETS))
828
829
830def short_help(args):
831    name = args[0]
832    print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
833
834
835def help(args):
836    short_help(args)
837    print("\tfuzzing helpers (select a command and pass -h for help)\n")
838    print("Options:")
839    print("\t-h, --help\tPrint this message")
840    print("")
841    print("Commands:")
842    print("\tbuild\t\tBuild a fuzzer")
843    print("\tlibfuzzer\tRun a libFuzzer fuzzer")
844    print("\tafl\t\tRun an AFL fuzzer")
845    print("\tregression\tRun a regression test")
846    print("\tgen\t\tGenerate a seed corpus for a fuzzer")
847    print("\tminimize\tMinimize the test corpora")
848    print("\tzip\t\tZip the minimized corpora up")
849    print("\tlist\t\tList the available targets")
850
851
852def main():
853    args = sys.argv
854    if len(args) < 2:
855        help(args)
856        return 1
857    if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
858        help(args)
859        return 1
860    command = args.pop(1)
861    args[0] = "{} {}".format(args[0], command)
862    if command == "build":
863        return build(args)
864    if command == "libfuzzer":
865        return libfuzzer_cmd(args)
866    if command == "regression":
867        return regression(args)
868    if command == "afl":
869        return afl(args)
870    if command == "gen":
871        return gen(args)
872    if command == "minimize":
873        return minimize(args)
874    if command == "zip":
875        return zip_cmd(args)
876    if command == "list":
877        return list_cmd(args)
878    short_help(args)
879    print("Error: No such command {} (pass -h for help)".format(command))
880    return 1
881
882
883if __name__ == "__main__":
884    sys.exit(main())
885