1#!/usr/bin/env sh 2export AFL_QUIET=1 3export ASAN_OPTIONS=detect_leaks=0 4THISPATH=`dirname ${0}` 5export PATH="${THISPATH}:$PATH" 6awk -f - -- ${@+"$@"} <<'EOF' 7#!/usr/bin/awk -f 8 9# awk script to minimize a test corpus of input files 10# 11# based on afl-cmin bash script written by Michal Zalewski 12# rewritten by Heiko Eißfeldt (hexcoder-) 13# tested with: 14# gnu awk (x86 Linux) 15# bsd awk (x86 *BSD) 16# mawk (arm32 raspbian) 17# 18# uses getopt.awk package from Arnold Robbins 19# 20# external tools used by this script: 21# test 22# grep 23# rm 24# mkdir 25# ln 26# cp 27# pwd 28# type 29# cd 30# find 31# stat 32# sort 33# cut 34# and afl-showmap from this project :-) 35 36# getopt.awk --- Do C library getopt(3) function in awk 37 38# External variables: 39# Optind -- index in ARGV of first nonoption argument 40# Optarg -- string value of argument to current option 41# Opterr -- if nonzero, print our own diagnostic 42# Optopt -- current option letter 43 44# Returns: 45# -1 at end of options 46# "?" for unrecognized option 47# <c> a character representing the current option 48 49# Private Data: 50# _opti -- index in multiflag option, e.g., -abc 51 52function getopt(argc, argv, options, thisopt, i) 53{ 54 if (length(options) == 0) # no options given 55 return -1 56 57 if (argv[Optind] == "--") { # all done 58 Optind++ 59 _opti = 0 60 return -1 61 } else if (argv[Optind] !~ /^-[^:\t ]/) { 62 _opti = 0 63 return -1 64 } 65 if (_opti == 0) 66 _opti = 2 67 thisopt = substr(argv[Optind], _opti, 1) 68 Optopt = thisopt 69 i = index(options, thisopt) 70 if (i == 0) { 71 if (Opterr) 72 printf("%c -- invalid option\n", thisopt) > "/dev/stderr" 73 if (_opti >= length(argv[Optind])) { 74 Optind++ 75 _opti = 0 76 } else 77 _opti++ 78 return "?" 79 } 80 if (substr(options, i + 1, 1) == ":") { 81 # get option argument 82 if (length(substr(argv[Optind], _opti + 1)) > 0) 83 Optarg = substr(argv[Optind], _opti + 1) 84 else 85 Optarg = argv[++Optind] 86 _opti = 0 87 } else 88 Optarg = "" 89 if (_opti == 0 || _opti >= length(argv[Optind])) { 90 Optind++ 91 _opti = 0 92 } else 93 _opti++ 94 return thisopt 95} 96 97function usage() { 98 print \ 99"afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ 100"\n" \ 101"Required parameters:\n" \ 102" -i dir - input directory with starting corpus\n" \ 103" -o dir - output directory for minimized files\n" \ 104"\n" \ 105"Execution control settings:\n" \ 106" -f file - location read by the fuzzed program (stdin)\n" \ 107" -m megs - memory limit for child process ("mem_limit" MB)\n" \ 108" -t msec - run time limit for child process (none)\n" \ 109" -O - use binary-only instrumentation (FRIDA mode)\n" \ 110" -Q - use binary-only instrumentation (QEMU mode)\n" \ 111" -U - use unicorn-based instrumentation (unicorn mode)\n" \ 112"\n" \ 113"Minimization settings:\n" \ 114" -C - keep crashing inputs, reject everything else\n" \ 115" -e - solve for edge coverage only, ignore hit counts\n" \ 116"\n" \ 117"For additional tips, please consult README.md\n" \ 118"\n" \ 119"Environment variables used:\n" \ 120"AFL_ALLOW_TMP: allow unsafe use of input/output directories under {/var}/tmp\n" \ 121"AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n" \ 122"AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up\n" \ 123"AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory\n" \ 124"AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)\n" \ 125"AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n" \ 126"AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH\n" \ 127"AFL_PRINT_FILENAMES: If set, the filename currently processed will be " \ 128 "printed to stdout\n" \ 129"AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary\n" 130 exit 1 131} 132 133function exists_and_is_executable(binarypath) { 134 return 0 == system("test -f "binarypath" -a -x "binarypath) 135} 136 137BEGIN { 138 print "corpus minimization tool for afl++ (awk version)\n" 139 140 # defaults 141 extra_par = "" 142 AFL_CMIN_CRASHES_ONLY = "" 143 144 # process options 145 Opterr = 1 # default is to diagnose 146 Optind = 1 # skip ARGV[0] 147 while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCOQU?")) != -1) { 148 if (_go_c == "i") { 149 if (!Optarg) usage() 150 if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 151 in_dir = Optarg 152 continue 153 } else 154 if (_go_c == "o") { 155 if (!Optarg) usage() 156 if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 157 out_dir = Optarg 158 continue 159 } else 160 if (_go_c == "f") { 161 if (!Optarg) usage() 162 if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 163 stdin_file = Optarg 164 continue 165 } else 166 if (_go_c == "m") { 167 if (!Optarg) usage() 168 if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 169 mem_limit = Optarg 170 mem_limit_given = 1 171 continue 172 } else 173 if (_go_c == "t") { 174 if (!Optarg) usage() 175 if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 176 timeout = Optarg 177 continue 178 } else 179 if (_go_c == "C") { 180 AFL_CMIN_CRASHES_ONLY = "AFL_CMIN_CRASHES_ONLY=1 " 181 continue 182 } else 183 if (_go_c == "e") { 184 extra_par = extra_par " -e" 185 continue 186 } else 187 if (_go_c == "O") { 188 if (frida_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 189 extra_par = extra_par " -O" 190 frida_mode = 1 191 continue 192 } else 193 if (_go_c == "Q") { 194 if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 195 extra_par = extra_par " -Q" 196 qemu_mode = 1 197 continue 198 } else 199 if (_go_c == "U") { 200 if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} 201 extra_par = extra_par " -U" 202 unicorn_mode = 1 203 continue 204 } else 205 if (_go_c == "?") { 206 exit 1 207 } else 208 usage() 209 } # while options 210 211 if (!mem_limit) mem_limit = "none" 212 if (!timeout) timeout = "none" 213 214 # get program args 215 i = 0 216 prog_args_string = "" 217 for (; Optind < ARGC; Optind++) { 218 prog_args[i++] = ARGV[Optind] 219 if (i > 1) 220 prog_args_string = prog_args_string" "ARGV[Optind] 221 } 222 223 # sanity checks 224 if (!prog_args[0] || !in_dir || !out_dir) usage() 225 226 target_bin = prog_args[0] 227 228 # Do a sanity check to discourage the use of /tmp, since we can't really 229 # handle this safely from an awk script. 230 231 if (!ENVIRON["AFL_ALLOW_TMP"]) { 232 dirlist[0] = in_dir 233 dirlist[1] = target_bin 234 dirlist[2] = out_dir 235 dirlist[3] = stdin_file 236 "pwd" | getline dirlist[4] # current directory 237 for (dirind in dirlist) { 238 dir = dirlist[dirind] 239 240 if (dir ~ /^(\/var)?\/tmp/) { 241 print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" 242 exit 1 243 } 244 } 245 delete dirlist 246 } 247 248 # If @@ is specified, but there's no -f, let's come up with a temporary input 249 # file name. 250 251 trace_dir = out_dir "/.traces" 252 253 if (!stdin_file) { 254 found_atat = 0 255 for (prog_args_ind in prog_args) { 256 if (match(prog_args[prog_args_ind], "@@") != 0) { 257 found_atat = 1 258 break 259 } 260 } 261 if (found_atat) { 262 stdin_file = trace_dir "/.cur_input" 263 } 264 } 265 266 # Check for obvious errors. 267 268 if (mem_limit && mem_limit != "none" && mem_limit < 5) { 269 print "[-] Error: dangerously low memory limit." > "/dev/stderr" 270 exit 1 271 } 272 273 if (timeout && timeout != "none" && timeout < 10) { 274 print "[-] Error: dangerously low timeout." > "/dev/stderr" 275 exit 1 276 } 277 278 if (target_bin && !exists_and_is_executable(target_bin)) { 279 280 "command -v "target_bin" 2>/dev/null" | getline tnew 281 if (!tnew || !exists_and_is_executable(tnew)) { 282 print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" 283 exit 1 284 } 285 target_bin = tnew 286 } 287 288 if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !frida_mode && !unicorn_mode) { 289 if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { 290 print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" 291 exit 1 292 } 293 } 294 295 if (0 != system( "test -d "in_dir )) { 296 print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" 297 exit 1 298 } 299 300 #if (0 == system( "test -d "in_dir"/default" )) { 301 # in_dir = in_dir "/default" 302 #} 303 # 304 #if (0 == system( "test -d "in_dir"/queue" )) { 305 # in_dir = in_dir "/queue" 306 #} 307 308 system("rm -rf "trace_dir" 2>/dev/null"); 309 system("rm "out_dir"/id[:_]* 2>/dev/null") 310 311 "ls "out_dir"/* 2>/dev/null | wc -l" | getline noofentries 312 if (0 == system( "test -d "out_dir" -a "noofentries" -gt 0" )) { 313 print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" 314 exit 1 315 } 316 317 # Check for the more efficient way to copy files... 318 if (0 != system("mkdir -p -m 0700 "trace_dir)) { 319 print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" 320 exit 1 321 } 322 323 if (stdin_file) { 324 # truncate input file 325 printf "" > stdin_file 326 close( stdin_file ) 327 } 328 329 # First we look in PATH 330 if (0 == system("command -v afl-showmap >/dev/null 2>&1")) { 331 "command -v afl-showmap 2>/dev/null" | getline showmap 332 } else { 333 # then we look in the current directory 334 if (0 == system("test -x ./afl-showmap")) { 335 showmap = "./afl-showmap" 336 } else { 337 if (ENVIRON["AFL_PATH"]) { 338 showmap = ENVIRON["AFL_PATH"] "/afl-showmap" 339 } 340 } 341 } 342 343 if (!showmap || 0 != system("test -x "showmap )) { 344 print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" 345 exit 1 346 } 347 348 # get list of input filenames sorted by size 349 i = 0 350 # yuck, gnu stat is option incompatible to bsd stat 351 # we use a heuristic to differentiate between 352 # GNU stat and other stats 353 "stat --version 2>/dev/null" | getline statversion 354 if (statversion ~ /GNU coreutils/) { 355 stat_format = "-c '%s %n'" # GNU 356 } else { 357 stat_format = "-f '%z %N'" # *BSD, MacOS 358 } 359 cmdline = "(cd "in_dir" && find . \\( ! -name \".*\" -a -type d \\) -o -type f -exec stat "stat_format" \\{\\} + | sort -k1n -k2r)" 360 #cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r" 361 #cmdline = "(cd "in_dir" && stat "stat_format" *) | sort -k1n -k2r" 362 #cmdline = "(cd "in_dir" && ls | xargs stat "stat_format" ) | sort -k1n -k2r" 363 while (cmdline | getline) { 364 sub(/^[0-9]+ (\.\/)?/,"",$0) 365 infilesSmallToBigFull[i] = $0 366 sub(/.*\//, "", $0) 367 infilesSmallToBig[i] = $0 368 infilesSmallToBigMap[infilesSmallToBig[i]] = infilesSmallToBigFull[i] 369 infilesSmallToBigFullMap[infilesSmallToBigFull[i]] = infilesSmallToBig[i] 370 i++ 371 } 372 in_count = i 373 374 first_file = infilesSmallToBigFull[0] 375 376 #if (0 == system("test -d ""\""in_dir"/"first_file"\"")) { 377 # print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr" 378 # exit 1 379 #} 380 381 system(">\""in_dir"/.afl-cmin.test\"") 382 if (0 == system("ln \""in_dir"/.afl-cmin.test\" "trace_dir"/.link_test")) { 383 cp_tool = "ln" 384 } else { 385 cp_tool = "cp" 386 } 387 system("rm -f \""in_dir"/.afl-cmin.test\"") 388 389 if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) { 390 # Make sure that we can actually get anything out of afl-showmap before we 391 # waste too much time. 392 393 print "[*] Testing the target binary..." 394 395 if (!stdin_file) { 396 system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") 397 } else { 398 system("cp \""in_dir"/"first_file"\" "stdin_file) 399 system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null") 400 } 401 402 first_count = 0 403 404 runtest = trace_dir"/.run_test" 405 while ((getline < runtest) > 0) { 406 ++first_count 407 } 408 409 if (first_count) { 410 print "[+] OK, "first_count" tuples recorded." 411 } else { 412 print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" 413 if (!ENVIRON["AFL_KEEP_TRACES"]) { 414 system("rm -rf "trace_dir" 2>/dev/null") 415 } 416 exit 1 417 } 418 } 419 420 # Let's roll! 421 422 ############################# 423 # STEP 1: Collecting traces # 424 ############################# 425 426 print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." 427 428 cur = 0; 429 if (!stdin_file) { 430 print " Processing "in_count" files (forkserver mode)..." 431# print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string 432 retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string) 433 } else { 434 print " Processing "in_count" files (forkserver mode)..." 435# print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null" 436 retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null") 437 } 438 439 if (retval && !AFL_CMIN_CRASHES_ONLY) { 440 print "[!] Exit code "retval" != 0 received from afl-showmap, terminating..." 441 442 if (!ENVIRON["AFL_KEEP_TRACES"]) { 443 system("rm -rf "trace_dir" 2>/dev/null") 444 system("rmdir "out_dir) 445 } 446 exit retval 447 } 448 449 ####################################################### 450 # STEP 2: register smallest input file for each tuple # 451 # STEP 3: copy that file (at most once) # 452 ####################################################### 453 454 print "[*] Processing traces for input files in '"in_dir"'." 455 456 cur = 0 457 out_count = 0 458 tuple_count = 0 459 460 # from rare to frequent new tuples 461 # get the best (smallest) file for it 462 # and copy it 463 while (cur < in_count) { 464 fn = infilesSmallToBig[cur] 465 ++cur 466 printf "\r Processing file "cur"/"in_count 467 # create path for the trace file from afl-showmap 468 tracefile_path = trace_dir"/"fn 469 # gather all keys, and count them 470 while ((getline line < tracefile_path) > 0) { 471 key = line 472 if (!(key in key_count)) { 473 ++tuple_count 474 } 475 ++key_count[key] 476 if (! (key in best_file)) { 477 # this is the best file for this key 478 best_file[key] = fn 479#printf "BEST_FILE[%d]=\"%s\"\n",key,fn | "sort -t'[' -k2 > "trace_dir"/.candidate_script" 480 } 481#printf "%d %s\n",key,fn > trace_dir"/.candidate_list" 482 } 483 close(tracefile_path) 484 } 485 print "" 486 487 # sort keys 488 sortedKeys = trace_dir"/.all_uniq" 489 sortKeysCmd = "sort -k1n > "sortedKeys 490 for (key in key_count) { 491 printf "%7d %s\n",key_count[key],key | sortKeysCmd 492 } 493 close(sortKeysCmd) 494 495 # iterate over keys from rare to frequent and 496 # copy best file 497 while ((getline < sortedKeys) > 0) { 498 499 # split 500 nrFields = split($0, field, / +/) 501#print nrFields" Felder: '"field[0]"', '"field[1]"', '"field[2]"', '"field[3]"'" 502 key = field[nrFields] 503 504 ++tcnt; 505 printf "\r Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..." 506 if (key in keyAlreadyKnown) { 507 continue 508 } 509 510 fn = best_file[key] 511 # gather all tuples from the best file for this key 512 tracedfn = trace_dir"/"fn 513 while ((getline < tracedfn) > 0) { 514 keyAlreadyKnown[$0] = "" 515 } 516 close(tracedfn) 517 518 # copy file unless already done 519 if (! (fn in file_already_copied)) { 520 realfile = infilesSmallToBigMap[fn] 521 system(cp_tool" \""in_dir"/"realfile"\" \""out_dir"/"fn"\"") 522 file_already_copied[fn] = "" 523 ++out_count 524 #printf "tuple nr %d (%d cnt=%d) -> %s\n",tcnt,key,key_count[key],fn > trace_dir"/.log" 525 } 526 } 527 close(sortedKeys) 528 print "" 529 print "[+] Found "tuple_count" unique tuples across "in_count" files." 530 531 if (out_count == 1) { 532 print "[!] WARNING: All test cases had the same traces, check syntax!" 533 } 534 print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." 535 536 if (!ENVIRON["AFL_KEEP_TRACES"]) { 537 system("rm -rf "trace_dir" 2>/dev/null") 538 } 539 540 exit 0 541} 542EOF 543