1#!/usr/bin/env python 2 3# ################################################################ 4# Copyright (c) Facebook, Inc. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# You may select, at your option, one of the above-listed licenses. 11# ########################################################################## 12 13import argparse 14import contextlib 15import os 16import re 17import shlex 18import shutil 19import subprocess 20import sys 21import tempfile 22 23 24def abs_join(a, *p): 25 return os.path.abspath(os.path.join(a, *p)) 26 27 28class InputType(object): 29 RAW_DATA = 1 30 COMPRESSED_DATA = 2 31 DICTIONARY_DATA = 3 32 33 34class FrameType(object): 35 ZSTD = 1 36 BLOCK = 2 37 38 39class TargetInfo(object): 40 def __init__(self, input_type, frame_type=FrameType.ZSTD): 41 self.input_type = input_type 42 self.frame_type = frame_type 43 44 45# Constants 46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) 47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') 48TARGET_INFO = { 49 'simple_round_trip': TargetInfo(InputType.RAW_DATA), 50 'stream_round_trip': TargetInfo(InputType.RAW_DATA), 51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), 52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), 53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), 54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), 55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 58 'simple_compress': TargetInfo(InputType.RAW_DATA), 59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), 62 'decompress_dstSize_tooSmall': TargetInfo(InputType.RAW_DATA), 63 'fse_read_ncount': TargetInfo(InputType.RAW_DATA), 64 'sequence_compression_api': TargetInfo(InputType.RAW_DATA), 65 'seekable_roundtrip': TargetInfo(InputType.RAW_DATA), 66} 67TARGETS = list(TARGET_INFO.keys()) 68ALL_TARGETS = TARGETS + ['all'] 69FUZZ_RNG_SEED_SIZE = 4 70 71# Standard environment variables 72CC = os.environ.get('CC', 'cc') 73CXX = os.environ.get('CXX', 'c++') 74CPPFLAGS = os.environ.get('CPPFLAGS', '') 75CFLAGS = os.environ.get('CFLAGS', '-O3') 76CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) 77LDFLAGS = os.environ.get('LDFLAGS', '') 78MFLAGS = os.environ.get('MFLAGS', '-j') 79 80# Fuzzing environment variables 81LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') 82AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') 83DECODECORPUS = os.environ.get('DECODECORPUS', 84 abs_join(FUZZ_DIR, '..', 'decodecorpus')) 85ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) 86 87# Sanitizer environment variables 88MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') 89MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') 90MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') 91MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') 92 93 94def create(r): 95 d = os.path.abspath(r) 96 if not os.path.isdir(d): 97 os.makedirs(d) 98 return d 99 100 101def check(r): 102 d = os.path.abspath(r) 103 if not os.path.isdir(d): 104 return None 105 return d 106 107 108@contextlib.contextmanager 109def tmpdir(): 110 dirpath = tempfile.mkdtemp() 111 try: 112 yield dirpath 113 finally: 114 shutil.rmtree(dirpath, ignore_errors=True) 115 116 117def parse_targets(in_targets): 118 targets = set() 119 for target in in_targets: 120 if not target: 121 continue 122 if target == 'all': 123 targets = targets.union(TARGETS) 124 elif target in TARGETS: 125 targets.add(target) 126 else: 127 raise RuntimeError('{} is not a valid target'.format(target)) 128 return list(targets) 129 130 131def targets_parser(args, description): 132 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 133 parser.add_argument( 134 'TARGET', 135 nargs='*', 136 type=str, 137 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) 138 args, extra = parser.parse_known_args(args) 139 args.extra = extra 140 141 args.TARGET = parse_targets(args.TARGET) 142 143 return args 144 145 146def parse_env_flags(args, flags): 147 """ 148 Look for flags set by environment variables. 149 """ 150 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) 151 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) 152 153 def set_sanitizer(sanitizer, default, san, nosan): 154 if sanitizer in san and sanitizer in nosan: 155 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. 156 format(s=sanitizer)) 157 if sanitizer in san: 158 return True 159 if sanitizer in nosan: 160 return False 161 return default 162 163 san = set(san_flags.split(',')) 164 nosan = set(nosan_flags.split(',')) 165 166 args.asan = set_sanitizer('address', args.asan, san, nosan) 167 args.msan = set_sanitizer('memory', args.msan, san, nosan) 168 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) 169 170 args.sanitize = args.asan or args.msan or args.ubsan 171 172 return args 173 174 175def compiler_version(cc, cxx): 176 """ 177 Determines the compiler and version. 178 Only works for clang and gcc. 179 """ 180 cc_version_bytes = subprocess.check_output([cc, "--version"]) 181 cxx_version_bytes = subprocess.check_output([cxx, "--version"]) 182 compiler = None 183 version = None 184 print("{} --version:\n{}".format(cc, cc_version_bytes.decode('ascii'))) 185 if b'clang' in cc_version_bytes: 186 assert(b'clang' in cxx_version_bytes) 187 compiler = 'clang' 188 elif b'gcc' in cc_version_bytes or b'GCC' in cc_version_bytes: 189 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) 190 compiler = 'gcc' 191 if compiler is not None: 192 version_regex = b'([0-9]+)\.([0-9]+)\.([0-9]+)' 193 version_match = re.search(version_regex, cc_version_bytes) 194 version = tuple(int(version_match.group(i)) for i in range(1, 4)) 195 return compiler, version 196 197 198def overflow_ubsan_flags(cc, cxx): 199 compiler, version = compiler_version(cc, cxx) 200 if compiler == 'gcc' and version < (8, 0, 0): 201 return ['-fno-sanitize=signed-integer-overflow'] 202 if compiler == 'gcc' or (compiler == 'clang' and version >= (5, 0, 0)): 203 return ['-fno-sanitize=pointer-overflow'] 204 return [] 205 206 207def build_parser(args): 208 description = """ 209 Cleans the repository and builds a fuzz target (or all). 210 Many flags default to environment variables (default says $X='y'). 211 Options that aren't enabling features default to the correct values for 212 zstd. 213 Enable sanitizers with --enable-*san. 214 For regression testing just build. 215 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. 216 For AFL set CC and CXX to AFL's compilers and set 217 LIB_FUZZING_ENGINE='libregression.a'. 218 """ 219 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 220 parser.add_argument( 221 '--lib-fuzzing-engine', 222 dest='lib_fuzzing_engine', 223 type=str, 224 default=LIB_FUZZING_ENGINE, 225 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' 226 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) 227 228 fuzz_group = parser.add_mutually_exclusive_group() 229 fuzz_group.add_argument( 230 '--enable-coverage', 231 dest='coverage', 232 action='store_true', 233 help='Enable coverage instrumentation (-fsanitize-coverage)') 234 fuzz_group.add_argument( 235 '--enable-fuzzer', 236 dest='fuzzer', 237 action='store_true', 238 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' 239 'LIB_FUZZING_ENGINE is ignored') 240 ) 241 242 parser.add_argument( 243 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') 244 parser.add_argument( 245 '--enable-ubsan', 246 dest='ubsan', 247 action='store_true', 248 help='Enable UBSAN') 249 parser.add_argument( 250 '--enable-ubsan-pointer-overflow', 251 dest='ubsan_pointer_overflow', 252 action='store_true', 253 help='Enable UBSAN pointer overflow check (known failure)') 254 parser.add_argument( 255 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') 256 parser.add_argument( 257 '--enable-msan-track-origins', dest='msan_track_origins', 258 action='store_true', help='Enable MSAN origin tracking') 259 parser.add_argument( 260 '--msan-extra-cppflags', 261 dest='msan_extra_cppflags', 262 type=str, 263 default=MSAN_EXTRA_CPPFLAGS, 264 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". 265 format(MSAN_EXTRA_CPPFLAGS)) 266 parser.add_argument( 267 '--msan-extra-cflags', 268 dest='msan_extra_cflags', 269 type=str, 270 default=MSAN_EXTRA_CFLAGS, 271 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( 272 MSAN_EXTRA_CFLAGS)) 273 parser.add_argument( 274 '--msan-extra-cxxflags', 275 dest='msan_extra_cxxflags', 276 type=str, 277 default=MSAN_EXTRA_CXXFLAGS, 278 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". 279 format(MSAN_EXTRA_CXXFLAGS)) 280 parser.add_argument( 281 '--msan-extra-ldflags', 282 dest='msan_extra_ldflags', 283 type=str, 284 default=MSAN_EXTRA_LDFLAGS, 285 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". 286 format(MSAN_EXTRA_LDFLAGS)) 287 parser.add_argument( 288 '--enable-sanitize-recover', 289 dest='sanitize_recover', 290 action='store_true', 291 help='Non-fatal sanitizer errors where possible') 292 parser.add_argument( 293 '--debug', 294 dest='debug', 295 type=int, 296 default=1, 297 help='Set DEBUGLEVEL (default: 1)') 298 parser.add_argument( 299 '--force-memory-access', 300 dest='memory_access', 301 type=int, 302 default=0, 303 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') 304 parser.add_argument( 305 '--fuzz-rng-seed-size', 306 dest='fuzz_rng_seed_size', 307 type=int, 308 default=4, 309 help='Set FUZZ_RNG_SEED_SIZE (default: 4)') 310 parser.add_argument( 311 '--disable-fuzzing-mode', 312 dest='fuzzing_mode', 313 action='store_false', 314 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') 315 parser.add_argument( 316 '--enable-stateful-fuzzing', 317 dest='stateful_fuzzing', 318 action='store_true', 319 help='Reuse contexts between runs (makes reproduction impossible)') 320 parser.add_argument( 321 '--cc', 322 dest='cc', 323 type=str, 324 default=CC, 325 help="CC (default: $CC='{}')".format(CC)) 326 parser.add_argument( 327 '--cxx', 328 dest='cxx', 329 type=str, 330 default=CXX, 331 help="CXX (default: $CXX='{}')".format(CXX)) 332 parser.add_argument( 333 '--cppflags', 334 dest='cppflags', 335 type=str, 336 default=CPPFLAGS, 337 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) 338 parser.add_argument( 339 '--cflags', 340 dest='cflags', 341 type=str, 342 default=CFLAGS, 343 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) 344 parser.add_argument( 345 '--cxxflags', 346 dest='cxxflags', 347 type=str, 348 default=CXXFLAGS, 349 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) 350 parser.add_argument( 351 '--ldflags', 352 dest='ldflags', 353 type=str, 354 default=LDFLAGS, 355 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) 356 parser.add_argument( 357 '--mflags', 358 dest='mflags', 359 type=str, 360 default=MFLAGS, 361 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) 362 parser.add_argument( 363 'TARGET', 364 nargs='*', 365 type=str, 366 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) 367 ) 368 args = parser.parse_args(args) 369 args = parse_env_flags(args, ' '.join( 370 [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) 371 372 # Check option sanity 373 if args.msan and (args.asan or args.ubsan): 374 raise RuntimeError('MSAN may not be used with any other sanitizers') 375 if args.msan_track_origins and not args.msan: 376 raise RuntimeError('--enable-msan-track-origins requires MSAN') 377 if args.ubsan_pointer_overflow and not args.ubsan: 378 raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') 379 if args.sanitize_recover and not args.sanitize: 380 raise RuntimeError('--enable-sanitize-recover but no sanitizers used') 381 382 return args 383 384 385def build(args): 386 try: 387 args = build_parser(args) 388 except Exception as e: 389 print(e) 390 return 1 391 # The compilation flags we are setting 392 targets = args.TARGET 393 cc = args.cc 394 cxx = args.cxx 395 cppflags = shlex.split(args.cppflags) 396 cflags = shlex.split(args.cflags) 397 ldflags = shlex.split(args.ldflags) 398 cxxflags = shlex.split(args.cxxflags) 399 mflags = shlex.split(args.mflags) 400 # Flags to be added to both cflags and cxxflags 401 common_flags = [] 402 403 cppflags += [ 404 '-DDEBUGLEVEL={}'.format(args.debug), 405 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), 406 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), 407 ] 408 409 # Set flags for options 410 assert not (args.fuzzer and args.coverage) 411 if args.coverage: 412 common_flags += [ 413 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' 414 ] 415 if args.fuzzer: 416 common_flags += ['-fsanitize=fuzzer'] 417 args.lib_fuzzing_engine = '' 418 419 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] 420 421 if args.sanitize_recover: 422 recover_flags = ['-fsanitize-recover=all'] 423 else: 424 recover_flags = ['-fno-sanitize-recover=all'] 425 if args.sanitize: 426 common_flags += recover_flags 427 428 if args.msan: 429 msan_flags = ['-fsanitize=memory'] 430 if args.msan_track_origins: 431 msan_flags += ['-fsanitize-memory-track-origins'] 432 common_flags += msan_flags 433 # Append extra MSAN flags (it might require special setup) 434 cppflags += [args.msan_extra_cppflags] 435 cflags += [args.msan_extra_cflags] 436 cxxflags += [args.msan_extra_cxxflags] 437 ldflags += [args.msan_extra_ldflags] 438 439 if args.asan: 440 common_flags += ['-fsanitize=address'] 441 442 if args.ubsan: 443 ubsan_flags = ['-fsanitize=undefined'] 444 if not args.ubsan_pointer_overflow: 445 ubsan_flags += overflow_ubsan_flags(cc, cxx) 446 common_flags += ubsan_flags 447 448 if args.stateful_fuzzing: 449 cppflags += ['-DSTATEFUL_FUZZING'] 450 451 if args.fuzzing_mode: 452 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] 453 454 if args.lib_fuzzing_engine == 'libregression.a': 455 targets = ['libregression.a'] + targets 456 457 # Append the common flags 458 cflags += common_flags 459 cxxflags += common_flags 460 461 # Prepare the flags for Make 462 cc_str = "CC={}".format(cc) 463 cxx_str = "CXX={}".format(cxx) 464 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) 465 cflags_str = "CFLAGS={}".format(' '.join(cflags)) 466 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) 467 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) 468 469 # Print the flags 470 print('MFLAGS={}'.format(' '.join(mflags))) 471 print(cc_str) 472 print(cxx_str) 473 print(cppflags_str) 474 print(cflags_str) 475 print(cxxflags_str) 476 print(ldflags_str) 477 478 # Clean and build 479 clean_cmd = ['make', 'clean'] + mflags 480 print(' '.join(clean_cmd)) 481 subprocess.check_call(clean_cmd) 482 build_cmd = [ 483 'make', 484 cc_str, 485 cxx_str, 486 cppflags_str, 487 cflags_str, 488 cxxflags_str, 489 ldflags_str, 490 ] + mflags + targets 491 print(' '.join(build_cmd)) 492 subprocess.check_call(build_cmd) 493 return 0 494 495 496def libfuzzer_parser(args): 497 description = """ 498 Runs a libfuzzer binary. 499 Passes all extra arguments to libfuzzer. 500 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to 501 libFuzzer.a. 502 Generates output in the CORPORA directory, puts crashes in the ARTIFACT 503 directory, and takes extra input from the SEED directory. 504 To merge AFL's output pass the SEED as AFL's output directory and pass 505 '-merge=1'. 506 """ 507 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 508 parser.add_argument( 509 '--corpora', 510 type=str, 511 help='Override the default corpora dir (default: {})'.format( 512 abs_join(CORPORA_DIR, 'TARGET'))) 513 parser.add_argument( 514 '--artifact', 515 type=str, 516 help='Override the default artifact dir (default: {})'.format( 517 abs_join(CORPORA_DIR, 'TARGET-crash'))) 518 parser.add_argument( 519 '--seed', 520 type=str, 521 help='Override the default seed dir (default: {})'.format( 522 abs_join(CORPORA_DIR, 'TARGET-seed'))) 523 parser.add_argument( 524 'TARGET', 525 type=str, 526 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 527 args, extra = parser.parse_known_args(args) 528 args.extra = extra 529 530 if args.TARGET and args.TARGET not in TARGETS: 531 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 532 533 return args 534 535 536def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): 537 if corpora is None: 538 corpora = abs_join(CORPORA_DIR, target) 539 if artifact is None: 540 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 541 if seed is None: 542 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) 543 if extra_args is None: 544 extra_args = [] 545 546 target = abs_join(FUZZ_DIR, target) 547 548 corpora = [create(corpora)] 549 artifact = create(artifact) 550 seed = check(seed) 551 552 corpora += [artifact] 553 if seed is not None: 554 corpora += [seed] 555 556 cmd = [target, '-artifact_prefix={}/'.format(artifact)] 557 cmd += corpora + extra_args 558 print(' '.join(cmd)) 559 subprocess.check_call(cmd) 560 561 562def libfuzzer_cmd(args): 563 try: 564 args = libfuzzer_parser(args) 565 except Exception as e: 566 print(e) 567 return 1 568 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) 569 return 0 570 571 572def afl_parser(args): 573 description = """ 574 Runs an afl-fuzz job. 575 Passes all extra arguments to afl-fuzz. 576 The fuzzer should have been built with CC/CXX set to the AFL compilers, 577 and with LIB_FUZZING_ENGINE='libregression.a'. 578 Takes input from CORPORA and writes output to OUTPUT. 579 Uses AFL_FUZZ as the binary (set from flag or environment variable). 580 """ 581 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 582 parser.add_argument( 583 '--corpora', 584 type=str, 585 help='Override the default corpora dir (default: {})'.format( 586 abs_join(CORPORA_DIR, 'TARGET'))) 587 parser.add_argument( 588 '--output', 589 type=str, 590 help='Override the default AFL output dir (default: {})'.format( 591 abs_join(CORPORA_DIR, 'TARGET-afl'))) 592 parser.add_argument( 593 '--afl-fuzz', 594 type=str, 595 default=AFL_FUZZ, 596 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) 597 parser.add_argument( 598 'TARGET', 599 type=str, 600 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 601 args, extra = parser.parse_known_args(args) 602 args.extra = extra 603 604 if args.TARGET and args.TARGET not in TARGETS: 605 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 606 607 if not args.corpora: 608 args.corpora = abs_join(CORPORA_DIR, args.TARGET) 609 if not args.output: 610 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) 611 612 return args 613 614 615def afl(args): 616 try: 617 args = afl_parser(args) 618 except Exception as e: 619 print(e) 620 return 1 621 target = abs_join(FUZZ_DIR, args.TARGET) 622 623 corpora = create(args.corpora) 624 output = create(args.output) 625 626 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra 627 cmd += [target, '@@'] 628 print(' '.join(cmd)) 629 subprocess.call(cmd) 630 return 0 631 632 633def regression(args): 634 try: 635 description = """ 636 Runs one or more regression tests. 637 The fuzzer should have been built with with 638 LIB_FUZZING_ENGINE='libregression.a'. 639 Takes input from CORPORA. 640 """ 641 args = targets_parser(args, description) 642 except Exception as e: 643 print(e) 644 return 1 645 for target in args.TARGET: 646 corpora = create(abs_join(CORPORA_DIR, target)) 647 target = abs_join(FUZZ_DIR, target) 648 cmd = [target, corpora] 649 print(' '.join(cmd)) 650 subprocess.check_call(cmd) 651 return 0 652 653 654def gen_parser(args): 655 description = """ 656 Generate a seed corpus appropriate for TARGET with data generated with 657 decodecorpus. 658 The fuzz inputs are prepended with a seed before the zstd data, so the 659 output of decodecorpus shouldn't be used directly. 660 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and 661 puts the output in SEED. 662 DECODECORPUS is the decodecorpus binary, and must already be built. 663 """ 664 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 665 parser.add_argument( 666 '--number', 667 '-n', 668 type=int, 669 default=100, 670 help='Number of samples to generate') 671 parser.add_argument( 672 '--max-size-log', 673 type=int, 674 default=18, 675 help='Maximum sample size to generate') 676 parser.add_argument( 677 '--seed', 678 type=str, 679 help='Override the default seed dir (default: {})'.format( 680 abs_join(CORPORA_DIR, 'TARGET-seed'))) 681 parser.add_argument( 682 '--decodecorpus', 683 type=str, 684 default=DECODECORPUS, 685 help="decodecorpus binary (default: $DECODECORPUS='{}')".format( 686 DECODECORPUS)) 687 parser.add_argument( 688 '--zstd', 689 type=str, 690 default=ZSTD, 691 help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) 692 parser.add_argument( 693 '--fuzz-rng-seed-size', 694 type=int, 695 default=4, 696 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" 697 ) 698 parser.add_argument( 699 'TARGET', 700 type=str, 701 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 702 args, extra = parser.parse_known_args(args) 703 args.extra = extra 704 705 if args.TARGET and args.TARGET not in TARGETS: 706 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 707 708 if not args.seed: 709 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) 710 711 if not os.path.isfile(args.decodecorpus): 712 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". 713 format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) 714 715 return args 716 717 718def gen(args): 719 try: 720 args = gen_parser(args) 721 except Exception as e: 722 print(e) 723 return 1 724 725 seed = create(args.seed) 726 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: 727 info = TARGET_INFO[args.TARGET] 728 729 if info.input_type == InputType.DICTIONARY_DATA: 730 number = max(args.number, 1000) 731 else: 732 number = args.number 733 cmd = [ 734 args.decodecorpus, 735 '-n{}'.format(args.number), 736 '-p{}/'.format(compressed), 737 '-o{}'.format(decompressed), 738 ] 739 740 if info.frame_type == FrameType.BLOCK: 741 cmd += [ 742 '--gen-blocks', 743 '--max-block-size-log={}'.format(min(args.max_size_log, 17)) 744 ] 745 else: 746 cmd += ['--max-content-size-log={}'.format(args.max_size_log)] 747 748 print(' '.join(cmd)) 749 subprocess.check_call(cmd) 750 751 if info.input_type == InputType.RAW_DATA: 752 print('using decompressed data in {}'.format(decompressed)) 753 samples = decompressed 754 elif info.input_type == InputType.COMPRESSED_DATA: 755 print('using compressed data in {}'.format(compressed)) 756 samples = compressed 757 else: 758 assert info.input_type == InputType.DICTIONARY_DATA 759 print('making dictionary data from {}'.format(decompressed)) 760 samples = dict 761 min_dict_size_log = 9 762 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) 763 for dict_size_log in range(min_dict_size_log, max_dict_size_log): 764 dict_size = 1 << dict_size_log 765 cmd = [ 766 args.zstd, 767 '--train', 768 '-r', decompressed, 769 '--maxdict={}'.format(dict_size), 770 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) 771 ] 772 print(' '.join(cmd)) 773 subprocess.check_call(cmd) 774 775 # Copy the samples over and prepend the RNG seeds 776 for name in os.listdir(samples): 777 samplename = abs_join(samples, name) 778 outname = abs_join(seed, name) 779 with open(samplename, 'rb') as sample: 780 with open(outname, 'wb') as out: 781 CHUNK_SIZE = 131072 782 chunk = sample.read(CHUNK_SIZE) 783 while len(chunk) > 0: 784 out.write(chunk) 785 chunk = sample.read(CHUNK_SIZE) 786 return 0 787 788 789def minimize(args): 790 try: 791 description = """ 792 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in 793 TARGET_seed_corpus. All extra args are passed to libfuzzer. 794 """ 795 args = targets_parser(args, description) 796 except Exception as e: 797 print(e) 798 return 1 799 800 for target in args.TARGET: 801 # Merge the corpus + anything else into the seed_corpus 802 corpus = abs_join(CORPORA_DIR, target) 803 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 804 extra_args = [corpus, "-merge=1"] + args.extra 805 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) 806 seeds = set(os.listdir(seed_corpus)) 807 # Copy all crashes directly into the seed_corpus if not already present 808 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 809 for crash in os.listdir(crashes): 810 if crash not in seeds: 811 shutil.copy(abs_join(crashes, crash), seed_corpus) 812 seeds.add(crash) 813 814 815def zip_cmd(args): 816 try: 817 description = """ 818 Zips up the seed corpus. 819 """ 820 args = targets_parser(args, description) 821 except Exception as e: 822 print(e) 823 return 1 824 825 for target in args.TARGET: 826 # Zip the seed_corpus 827 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 828 zip_file = "{}.zip".format(seed_corpus) 829 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] 830 print(' '.join(cmd)) 831 subprocess.check_call(cmd, cwd=seed_corpus) 832 833 834def list_cmd(args): 835 print("\n".join(TARGETS)) 836 837 838def short_help(args): 839 name = args[0] 840 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) 841 842 843def help(args): 844 short_help(args) 845 print("\tfuzzing helpers (select a command and pass -h for help)\n") 846 print("Options:") 847 print("\t-h, --help\tPrint this message") 848 print("") 849 print("Commands:") 850 print("\tbuild\t\tBuild a fuzzer") 851 print("\tlibfuzzer\tRun a libFuzzer fuzzer") 852 print("\tafl\t\tRun an AFL fuzzer") 853 print("\tregression\tRun a regression test") 854 print("\tgen\t\tGenerate a seed corpus for a fuzzer") 855 print("\tminimize\tMinimize the test corpora") 856 print("\tzip\t\tZip the minimized corpora up") 857 print("\tlist\t\tList the available targets") 858 859 860def main(): 861 args = sys.argv 862 if len(args) < 2: 863 help(args) 864 return 1 865 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': 866 help(args) 867 return 1 868 command = args.pop(1) 869 args[0] = "{} {}".format(args[0], command) 870 if command == "build": 871 return build(args) 872 if command == "libfuzzer": 873 return libfuzzer_cmd(args) 874 if command == "regression": 875 return regression(args) 876 if command == "afl": 877 return afl(args) 878 if command == "gen": 879 return gen(args) 880 if command == "minimize": 881 return minimize(args) 882 if command == "zip": 883 return zip_cmd(args) 884 if command == "list": 885 return list_cmd(args) 886 short_help(args) 887 print("Error: No such command {} (pass -h for help)".format(command)) 888 return 1 889 890 891if __name__ == "__main__": 892 sys.exit(main()) 893