1#!/usr/bin/env python 2 3# ################################################################ 4# Copyright (c) 2016-present, Facebook, Inc. 5# All rights reserved. 6# 7# This source code is licensed under both the BSD-style license (found in the 8# LICENSE file in the root directory of this source tree) and the GPLv2 (found 9# in the COPYING file in the root directory of this source tree). 10# ########################################################################## 11 12import argparse 13import contextlib 14import os 15import re 16import shlex 17import shutil 18import subprocess 19import sys 20import tempfile 21 22 23def abs_join(a, *p): 24 return os.path.abspath(os.path.join(a, *p)) 25 26 27class InputType(object): 28 RAW_DATA = 1 29 COMPRESSED_DATA = 2 30 DICTIONARY_DATA = 3 31 32 33class FrameType(object): 34 ZSTD = 1 35 BLOCK = 2 36 37 38class TargetInfo(object): 39 def __init__(self, input_type, frame_type=FrameType.ZSTD): 40 self.input_type = input_type 41 self.frame_type = frame_type 42 43 44# Constants 45FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) 46CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') 47TARGET_INFO = { 48 'simple_round_trip': TargetInfo(InputType.RAW_DATA), 49 'stream_round_trip': TargetInfo(InputType.RAW_DATA), 50 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), 51 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), 52 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), 53 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), 54 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), 55 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), 56 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), 57 'simple_compress': TargetInfo(InputType.RAW_DATA), 58 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), 59} 60TARGETS = list(TARGET_INFO.keys()) 61ALL_TARGETS = TARGETS + ['all'] 62FUZZ_RNG_SEED_SIZE = 4 63 64# Standard environment variables 65CC = os.environ.get('CC', 'cc') 66CXX = os.environ.get('CXX', 'c++') 67CPPFLAGS = os.environ.get('CPPFLAGS', '') 68CFLAGS = os.environ.get('CFLAGS', '-O3') 69CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) 70LDFLAGS = os.environ.get('LDFLAGS', '') 71MFLAGS = os.environ.get('MFLAGS', '-j') 72 73# Fuzzing environment variables 74LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') 75AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') 76DECODECORPUS = os.environ.get('DECODECORPUS', 77 abs_join(FUZZ_DIR, '..', 'decodecorpus')) 78ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) 79 80# Sanitizer environment variables 81MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') 82MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') 83MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') 84MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') 85 86 87def create(r): 88 d = os.path.abspath(r) 89 if not os.path.isdir(d): 90 os.makedirs(d) 91 return d 92 93 94def check(r): 95 d = os.path.abspath(r) 96 if not os.path.isdir(d): 97 return None 98 return d 99 100 101@contextlib.contextmanager 102def tmpdir(): 103 dirpath = tempfile.mkdtemp() 104 try: 105 yield dirpath 106 finally: 107 shutil.rmtree(dirpath, ignore_errors=True) 108 109 110def parse_targets(in_targets): 111 targets = set() 112 for target in in_targets: 113 if not target: 114 continue 115 if target == 'all': 116 targets = targets.union(TARGETS) 117 elif target in TARGETS: 118 targets.add(target) 119 else: 120 raise RuntimeError('{} is not a valid target'.format(target)) 121 return list(targets) 122 123 124def targets_parser(args, description): 125 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 126 parser.add_argument( 127 'TARGET', 128 nargs='*', 129 type=str, 130 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) 131 args, extra = parser.parse_known_args(args) 132 args.extra = extra 133 134 args.TARGET = parse_targets(args.TARGET) 135 136 return args 137 138 139def parse_env_flags(args, flags): 140 """ 141 Look for flags set by environment variables. 142 """ 143 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) 144 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) 145 146 def set_sanitizer(sanitizer, default, san, nosan): 147 if sanitizer in san and sanitizer in nosan: 148 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. 149 format(s=sanitizer)) 150 if sanitizer in san: 151 return True 152 if sanitizer in nosan: 153 return False 154 return default 155 156 san = set(san_flags.split(',')) 157 nosan = set(nosan_flags.split(',')) 158 159 args.asan = set_sanitizer('address', args.asan, san, nosan) 160 args.msan = set_sanitizer('memory', args.msan, san, nosan) 161 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) 162 163 args.sanitize = args.asan or args.msan or args.ubsan 164 165 return args 166 167 168def compiler_version(cc, cxx): 169 """ 170 Determines the compiler and version. 171 Only works for clang and gcc. 172 """ 173 cc_version_bytes = subprocess.check_output([cc, "--version"]) 174 cxx_version_bytes = subprocess.check_output([cxx, "--version"]) 175 compiler = None 176 version = None 177 if b'clang' in cc_version_bytes: 178 assert(b'clang' in cxx_version_bytes) 179 compiler = 'clang' 180 elif b'gcc' in cc_version_bytes: 181 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) 182 compiler = 'gcc' 183 if compiler is not None: 184 version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' 185 version_match = re.search(version_regex, cc_version_bytes) 186 version = tuple(int(version_match.group(i)) for i in range(1, 4)) 187 return compiler, version 188 189 190def overflow_ubsan_flags(cc, cxx): 191 compiler, version = compiler_version(cc, cxx) 192 if compiler == 'gcc': 193 return ['-fno-sanitize=signed-integer-overflow'] 194 if compiler == 'clang' and version >= (5, 0, 0): 195 return ['-fno-sanitize=pointer-overflow'] 196 return [] 197 198 199def build_parser(args): 200 description = """ 201 Cleans the repository and builds a fuzz target (or all). 202 Many flags default to environment variables (default says $X='y'). 203 Options that aren't enabling features default to the correct values for 204 zstd. 205 Enable sanitizers with --enable-*san. 206 For regression testing just build. 207 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. 208 For AFL set CC and CXX to AFL's compilers and set 209 LIB_FUZZING_ENGINE='libregression.a'. 210 """ 211 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 212 parser.add_argument( 213 '--lib-fuzzing-engine', 214 dest='lib_fuzzing_engine', 215 type=str, 216 default=LIB_FUZZING_ENGINE, 217 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' 218 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) 219 220 fuzz_group = parser.add_mutually_exclusive_group() 221 fuzz_group.add_argument( 222 '--enable-coverage', 223 dest='coverage', 224 action='store_true', 225 help='Enable coverage instrumentation (-fsanitize-coverage)') 226 fuzz_group.add_argument( 227 '--enable-fuzzer', 228 dest='fuzzer', 229 action='store_true', 230 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' 231 'LIB_FUZZING_ENGINE is ignored') 232 ) 233 234 parser.add_argument( 235 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') 236 parser.add_argument( 237 '--enable-ubsan', 238 dest='ubsan', 239 action='store_true', 240 help='Enable UBSAN') 241 parser.add_argument( 242 '--enable-ubsan-pointer-overflow', 243 dest='ubsan_pointer_overflow', 244 action='store_true', 245 help='Enable UBSAN pointer overflow check (known failure)') 246 parser.add_argument( 247 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') 248 parser.add_argument( 249 '--enable-msan-track-origins', dest='msan_track_origins', 250 action='store_true', help='Enable MSAN origin tracking') 251 parser.add_argument( 252 '--msan-extra-cppflags', 253 dest='msan_extra_cppflags', 254 type=str, 255 default=MSAN_EXTRA_CPPFLAGS, 256 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". 257 format(MSAN_EXTRA_CPPFLAGS)) 258 parser.add_argument( 259 '--msan-extra-cflags', 260 dest='msan_extra_cflags', 261 type=str, 262 default=MSAN_EXTRA_CFLAGS, 263 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( 264 MSAN_EXTRA_CFLAGS)) 265 parser.add_argument( 266 '--msan-extra-cxxflags', 267 dest='msan_extra_cxxflags', 268 type=str, 269 default=MSAN_EXTRA_CXXFLAGS, 270 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". 271 format(MSAN_EXTRA_CXXFLAGS)) 272 parser.add_argument( 273 '--msan-extra-ldflags', 274 dest='msan_extra_ldflags', 275 type=str, 276 default=MSAN_EXTRA_LDFLAGS, 277 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". 278 format(MSAN_EXTRA_LDFLAGS)) 279 parser.add_argument( 280 '--enable-sanitize-recover', 281 dest='sanitize_recover', 282 action='store_true', 283 help='Non-fatal sanitizer errors where possible') 284 parser.add_argument( 285 '--debug', 286 dest='debug', 287 type=int, 288 default=1, 289 help='Set DEBUGLEVEL (default: 1)') 290 parser.add_argument( 291 '--force-memory-access', 292 dest='memory_access', 293 type=int, 294 default=0, 295 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') 296 parser.add_argument( 297 '--fuzz-rng-seed-size', 298 dest='fuzz_rng_seed_size', 299 type=int, 300 default=4, 301 help='Set FUZZ_RNG_SEED_SIZE (default: 4)') 302 parser.add_argument( 303 '--disable-fuzzing-mode', 304 dest='fuzzing_mode', 305 action='store_false', 306 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') 307 parser.add_argument( 308 '--enable-stateful-fuzzing', 309 dest='stateful_fuzzing', 310 action='store_true', 311 help='Reuse contexts between runs (makes reproduction impossible)') 312 parser.add_argument( 313 '--cc', 314 dest='cc', 315 type=str, 316 default=CC, 317 help="CC (default: $CC='{}')".format(CC)) 318 parser.add_argument( 319 '--cxx', 320 dest='cxx', 321 type=str, 322 default=CXX, 323 help="CXX (default: $CXX='{}')".format(CXX)) 324 parser.add_argument( 325 '--cppflags', 326 dest='cppflags', 327 type=str, 328 default=CPPFLAGS, 329 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) 330 parser.add_argument( 331 '--cflags', 332 dest='cflags', 333 type=str, 334 default=CFLAGS, 335 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) 336 parser.add_argument( 337 '--cxxflags', 338 dest='cxxflags', 339 type=str, 340 default=CXXFLAGS, 341 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) 342 parser.add_argument( 343 '--ldflags', 344 dest='ldflags', 345 type=str, 346 default=LDFLAGS, 347 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) 348 parser.add_argument( 349 '--mflags', 350 dest='mflags', 351 type=str, 352 default=MFLAGS, 353 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) 354 parser.add_argument( 355 'TARGET', 356 nargs='*', 357 type=str, 358 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) 359 ) 360 args = parser.parse_args(args) 361 args = parse_env_flags(args, ' '.join( 362 [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) 363 364 # Check option sanity 365 if args.msan and (args.asan or args.ubsan): 366 raise RuntimeError('MSAN may not be used with any other sanitizers') 367 if args.msan_track_origins and not args.msan: 368 raise RuntimeError('--enable-msan-track-origins requires MSAN') 369 if args.ubsan_pointer_overflow and not args.ubsan: 370 raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') 371 if args.sanitize_recover and not args.sanitize: 372 raise RuntimeError('--enable-sanitize-recover but no sanitizers used') 373 374 return args 375 376 377def build(args): 378 try: 379 args = build_parser(args) 380 except Exception as e: 381 print(e) 382 return 1 383 # The compilation flags we are setting 384 targets = args.TARGET 385 cc = args.cc 386 cxx = args.cxx 387 cppflags = shlex.split(args.cppflags) 388 cflags = shlex.split(args.cflags) 389 ldflags = shlex.split(args.ldflags) 390 cxxflags = shlex.split(args.cxxflags) 391 mflags = shlex.split(args.mflags) 392 # Flags to be added to both cflags and cxxflags 393 common_flags = [] 394 395 cppflags += [ 396 '-DDEBUGLEVEL={}'.format(args.debug), 397 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), 398 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), 399 ] 400 401 # Set flags for options 402 assert not (args.fuzzer and args.coverage) 403 if args.coverage: 404 common_flags += [ 405 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' 406 ] 407 if args.fuzzer: 408 common_flags += ['-fsanitize=fuzzer'] 409 args.lib_fuzzing_engine = '' 410 411 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] 412 413 if args.sanitize_recover: 414 recover_flags = ['-fsanitize-recover=all'] 415 else: 416 recover_flags = ['-fno-sanitize-recover=all'] 417 if args.sanitize: 418 common_flags += recover_flags 419 420 if args.msan: 421 msan_flags = ['-fsanitize=memory'] 422 if args.msan_track_origins: 423 msan_flags += ['-fsanitize-memory-track-origins'] 424 common_flags += msan_flags 425 # Append extra MSAN flags (it might require special setup) 426 cppflags += [args.msan_extra_cppflags] 427 cflags += [args.msan_extra_cflags] 428 cxxflags += [args.msan_extra_cxxflags] 429 ldflags += [args.msan_extra_ldflags] 430 431 if args.asan: 432 common_flags += ['-fsanitize=address'] 433 434 if args.ubsan: 435 ubsan_flags = ['-fsanitize=undefined'] 436 if not args.ubsan_pointer_overflow: 437 ubsan_flags += overflow_ubsan_flags(cc, cxx) 438 common_flags += ubsan_flags 439 440 if args.stateful_fuzzing: 441 cppflags += ['-DSTATEFUL_FUZZING'] 442 443 if args.fuzzing_mode: 444 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] 445 446 if args.lib_fuzzing_engine == 'libregression.a': 447 targets = ['libregression.a'] + targets 448 449 # Append the common flags 450 cflags += common_flags 451 cxxflags += common_flags 452 453 # Prepare the flags for Make 454 cc_str = "CC={}".format(cc) 455 cxx_str = "CXX={}".format(cxx) 456 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) 457 cflags_str = "CFLAGS={}".format(' '.join(cflags)) 458 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) 459 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) 460 461 # Print the flags 462 print('MFLAGS={}'.format(' '.join(mflags))) 463 print(cc_str) 464 print(cxx_str) 465 print(cppflags_str) 466 print(cflags_str) 467 print(cxxflags_str) 468 print(ldflags_str) 469 470 # Clean and build 471 clean_cmd = ['make', 'clean'] + mflags 472 print(' '.join(clean_cmd)) 473 subprocess.check_call(clean_cmd) 474 build_cmd = [ 475 'make', 476 cc_str, 477 cxx_str, 478 cppflags_str, 479 cflags_str, 480 cxxflags_str, 481 ldflags_str, 482 ] + mflags + targets 483 print(' '.join(build_cmd)) 484 subprocess.check_call(build_cmd) 485 return 0 486 487 488def libfuzzer_parser(args): 489 description = """ 490 Runs a libfuzzer binary. 491 Passes all extra arguments to libfuzzer. 492 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to 493 libFuzzer.a. 494 Generates output in the CORPORA directory, puts crashes in the ARTIFACT 495 directory, and takes extra input from the SEED directory. 496 To merge AFL's output pass the SEED as AFL's output directory and pass 497 '-merge=1'. 498 """ 499 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 500 parser.add_argument( 501 '--corpora', 502 type=str, 503 help='Override the default corpora dir (default: {})'.format( 504 abs_join(CORPORA_DIR, 'TARGET'))) 505 parser.add_argument( 506 '--artifact', 507 type=str, 508 help='Override the default artifact dir (default: {})'.format( 509 abs_join(CORPORA_DIR, 'TARGET-crash'))) 510 parser.add_argument( 511 '--seed', 512 type=str, 513 help='Override the default seed dir (default: {})'.format( 514 abs_join(CORPORA_DIR, 'TARGET-seed'))) 515 parser.add_argument( 516 'TARGET', 517 type=str, 518 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 519 args, extra = parser.parse_known_args(args) 520 args.extra = extra 521 522 if args.TARGET and args.TARGET not in TARGETS: 523 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 524 525 return args 526 527 528def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): 529 if corpora is None: 530 corpora = abs_join(CORPORA_DIR, target) 531 if artifact is None: 532 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 533 if seed is None: 534 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) 535 if extra_args is None: 536 extra_args = [] 537 538 target = abs_join(FUZZ_DIR, target) 539 540 corpora = [create(corpora)] 541 artifact = create(artifact) 542 seed = check(seed) 543 544 corpora += [artifact] 545 if seed is not None: 546 corpora += [seed] 547 548 cmd = [target, '-artifact_prefix={}/'.format(artifact)] 549 cmd += corpora + extra_args 550 print(' '.join(cmd)) 551 subprocess.check_call(cmd) 552 553 554def libfuzzer_cmd(args): 555 try: 556 args = libfuzzer_parser(args) 557 except Exception as e: 558 print(e) 559 return 1 560 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) 561 return 0 562 563 564def afl_parser(args): 565 description = """ 566 Runs an afl-fuzz job. 567 Passes all extra arguments to afl-fuzz. 568 The fuzzer should have been built with CC/CXX set to the AFL compilers, 569 and with LIB_FUZZING_ENGINE='libregression.a'. 570 Takes input from CORPORA and writes output to OUTPUT. 571 Uses AFL_FUZZ as the binary (set from flag or environment variable). 572 """ 573 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 574 parser.add_argument( 575 '--corpora', 576 type=str, 577 help='Override the default corpora dir (default: {})'.format( 578 abs_join(CORPORA_DIR, 'TARGET'))) 579 parser.add_argument( 580 '--output', 581 type=str, 582 help='Override the default AFL output dir (default: {})'.format( 583 abs_join(CORPORA_DIR, 'TARGET-afl'))) 584 parser.add_argument( 585 '--afl-fuzz', 586 type=str, 587 default=AFL_FUZZ, 588 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) 589 parser.add_argument( 590 'TARGET', 591 type=str, 592 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 593 args, extra = parser.parse_known_args(args) 594 args.extra = extra 595 596 if args.TARGET and args.TARGET not in TARGETS: 597 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 598 599 if not args.corpora: 600 args.corpora = abs_join(CORPORA_DIR, args.TARGET) 601 if not args.output: 602 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) 603 604 return args 605 606 607def afl(args): 608 try: 609 args = afl_parser(args) 610 except Exception as e: 611 print(e) 612 return 1 613 target = abs_join(FUZZ_DIR, args.TARGET) 614 615 corpora = create(args.corpora) 616 output = create(args.output) 617 618 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra 619 cmd += [target, '@@'] 620 print(' '.join(cmd)) 621 subprocess.call(cmd) 622 return 0 623 624 625def regression(args): 626 try: 627 description = """ 628 Runs one or more regression tests. 629 The fuzzer should have been built with with 630 LIB_FUZZING_ENGINE='libregression.a'. 631 Takes input from CORPORA. 632 """ 633 args = targets_parser(args, description) 634 except Exception as e: 635 print(e) 636 return 1 637 for target in args.TARGET: 638 corpora = create(abs_join(CORPORA_DIR, target)) 639 target = abs_join(FUZZ_DIR, target) 640 cmd = [target, corpora] 641 print(' '.join(cmd)) 642 subprocess.check_call(cmd) 643 return 0 644 645 646def gen_parser(args): 647 description = """ 648 Generate a seed corpus appropriate for TARGET with data generated with 649 decodecorpus. 650 The fuzz inputs are prepended with a seed before the zstd data, so the 651 output of decodecorpus shouldn't be used directly. 652 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and 653 puts the output in SEED. 654 DECODECORPUS is the decodecorpus binary, and must already be built. 655 """ 656 parser = argparse.ArgumentParser(prog=args.pop(0), description=description) 657 parser.add_argument( 658 '--number', 659 '-n', 660 type=int, 661 default=100, 662 help='Number of samples to generate') 663 parser.add_argument( 664 '--max-size-log', 665 type=int, 666 default=18, 667 help='Maximum sample size to generate') 668 parser.add_argument( 669 '--seed', 670 type=str, 671 help='Override the default seed dir (default: {})'.format( 672 abs_join(CORPORA_DIR, 'TARGET-seed'))) 673 parser.add_argument( 674 '--decodecorpus', 675 type=str, 676 default=DECODECORPUS, 677 help="decodecorpus binary (default: $DECODECORPUS='{}')".format( 678 DECODECORPUS)) 679 parser.add_argument( 680 '--zstd', 681 type=str, 682 default=ZSTD, 683 help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) 684 parser.add_argument( 685 '--fuzz-rng-seed-size', 686 type=int, 687 default=4, 688 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" 689 ) 690 parser.add_argument( 691 'TARGET', 692 type=str, 693 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) 694 args, extra = parser.parse_known_args(args) 695 args.extra = extra 696 697 if args.TARGET and args.TARGET not in TARGETS: 698 raise RuntimeError('{} is not a valid target'.format(args.TARGET)) 699 700 if not args.seed: 701 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) 702 703 if not os.path.isfile(args.decodecorpus): 704 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". 705 format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) 706 707 return args 708 709 710def gen(args): 711 try: 712 args = gen_parser(args) 713 except Exception as e: 714 print(e) 715 return 1 716 717 seed = create(args.seed) 718 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: 719 info = TARGET_INFO[args.TARGET] 720 721 if info.input_type == InputType.DICTIONARY_DATA: 722 number = max(args.number, 1000) 723 else: 724 number = args.number 725 cmd = [ 726 args.decodecorpus, 727 '-n{}'.format(args.number), 728 '-p{}/'.format(compressed), 729 '-o{}'.format(decompressed), 730 ] 731 732 if info.frame_type == FrameType.BLOCK: 733 cmd += [ 734 '--gen-blocks', 735 '--max-block-size-log={}'.format(min(args.max_size_log, 17)) 736 ] 737 else: 738 cmd += ['--max-content-size-log={}'.format(args.max_size_log)] 739 740 print(' '.join(cmd)) 741 subprocess.check_call(cmd) 742 743 if info.input_type == InputType.RAW_DATA: 744 print('using decompressed data in {}'.format(decompressed)) 745 samples = decompressed 746 elif info.input_type == InputType.COMPRESSED_DATA: 747 print('using compressed data in {}'.format(compressed)) 748 samples = compressed 749 else: 750 assert info.input_type == InputType.DICTIONARY_DATA 751 print('making dictionary data from {}'.format(decompressed)) 752 samples = dict 753 min_dict_size_log = 9 754 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) 755 for dict_size_log in range(min_dict_size_log, max_dict_size_log): 756 dict_size = 1 << dict_size_log 757 cmd = [ 758 args.zstd, 759 '--train', 760 '-r', decompressed, 761 '--maxdict={}'.format(dict_size), 762 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) 763 ] 764 print(' '.join(cmd)) 765 subprocess.check_call(cmd) 766 767 # Copy the samples over and prepend the RNG seeds 768 for name in os.listdir(samples): 769 samplename = abs_join(samples, name) 770 outname = abs_join(seed, name) 771 with open(samplename, 'rb') as sample: 772 with open(outname, 'wb') as out: 773 CHUNK_SIZE = 131072 774 chunk = sample.read(CHUNK_SIZE) 775 while len(chunk) > 0: 776 out.write(chunk) 777 chunk = sample.read(CHUNK_SIZE) 778 return 0 779 780 781def minimize(args): 782 try: 783 description = """ 784 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in 785 TARGET_seed_corpus. All extra args are passed to libfuzzer. 786 """ 787 args = targets_parser(args, description) 788 except Exception as e: 789 print(e) 790 return 1 791 792 for target in args.TARGET: 793 # Merge the corpus + anything else into the seed_corpus 794 corpus = abs_join(CORPORA_DIR, target) 795 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 796 extra_args = [corpus, "-merge=1"] + args.extra 797 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) 798 seeds = set(os.listdir(seed_corpus)) 799 # Copy all crashes directly into the seed_corpus if not already present 800 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) 801 for crash in os.listdir(crashes): 802 if crash not in seeds: 803 shutil.copy(abs_join(crashes, crash), seed_corpus) 804 seeds.add(crash) 805 806 807def zip_cmd(args): 808 try: 809 description = """ 810 Zips up the seed corpus. 811 """ 812 args = targets_parser(args, description) 813 except Exception as e: 814 print(e) 815 return 1 816 817 for target in args.TARGET: 818 # Zip the seed_corpus 819 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) 820 zip_file = "{}.zip".format(seed_corpus) 821 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] 822 print(' '.join(cmd)) 823 subprocess.check_call(cmd, cwd=seed_corpus) 824 825 826def list_cmd(args): 827 print("\n".join(TARGETS)) 828 829 830def short_help(args): 831 name = args[0] 832 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) 833 834 835def help(args): 836 short_help(args) 837 print("\tfuzzing helpers (select a command and pass -h for help)\n") 838 print("Options:") 839 print("\t-h, --help\tPrint this message") 840 print("") 841 print("Commands:") 842 print("\tbuild\t\tBuild a fuzzer") 843 print("\tlibfuzzer\tRun a libFuzzer fuzzer") 844 print("\tafl\t\tRun an AFL fuzzer") 845 print("\tregression\tRun a regression test") 846 print("\tgen\t\tGenerate a seed corpus for a fuzzer") 847 print("\tminimize\tMinimize the test corpora") 848 print("\tzip\t\tZip the minimized corpora up") 849 print("\tlist\t\tList the available targets") 850 851 852def main(): 853 args = sys.argv 854 if len(args) < 2: 855 help(args) 856 return 1 857 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': 858 help(args) 859 return 1 860 command = args.pop(1) 861 args[0] = "{} {}".format(args[0], command) 862 if command == "build": 863 return build(args) 864 if command == "libfuzzer": 865 return libfuzzer_cmd(args) 866 if command == "regression": 867 return regression(args) 868 if command == "afl": 869 return afl(args) 870 if command == "gen": 871 return gen(args) 872 if command == "minimize": 873 return minimize(args) 874 if command == "zip": 875 return zip_cmd(args) 876 if command == "list": 877 return list_cmd(args) 878 short_help(args) 879 print("Error: No such command {} (pass -h for help)".format(command)) 880 return 1 881 882 883if __name__ == "__main__": 884 sys.exit(main()) 885