1#!/usr/bin/env sh
2export AFL_QUIET=1
3export ASAN_OPTIONS=detect_leaks=0
4THISPATH=`dirname ${0}`
5export PATH="${THISPATH}:$PATH"
6awk -f - -- ${@+"$@"} <<'EOF'
7#!/usr/bin/awk -f
8
9# awk script to minimize a test corpus of input files
10#
11# based on afl-cmin bash script written by Michal Zalewski
12# rewritten by Heiko Eißfeldt (hexcoder-)
13# tested with:
14#   gnu awk (x86 Linux)
15#   bsd awk (x86 *BSD)
16#   mawk (arm32 raspbian)
17#
18# uses getopt.awk package from Arnold Robbins
19#
20# external tools used by this script:
21# test
22# grep
23# rm
24# mkdir
25# ln
26# cp
27# pwd
28# type
29# cd
30# find
31# stat
32# sort
33# cut
34# and afl-showmap from this project :-)
35
36# getopt.awk --- Do C library getopt(3) function in awk
37
38# External variables:
39#    Optind -- index in ARGV of first nonoption argument
40#    Optarg -- string value of argument to current option
41#    Opterr -- if nonzero, print our own diagnostic
42#    Optopt -- current option letter
43
44# Returns:
45#    -1     at end of options
46#    "?"    for unrecognized option
47#    <c>    a character representing the current option
48
49# Private Data:
50#    _opti  -- index in multiflag option, e.g., -abc
51
52function getopt(argc, argv, options,    thisopt, i)
53{
54    if (length(options) == 0)    # no options given
55        return -1
56
57    if (argv[Optind] == "--") {  # all done
58        Optind++
59        _opti = 0
60        return -1
61    } else if (argv[Optind] !~ /^-[^:\t ]/) {
62        _opti = 0
63        return -1
64    }
65    if (_opti == 0)
66        _opti = 2
67    thisopt = substr(argv[Optind], _opti, 1)
68    Optopt = thisopt
69    i = index(options, thisopt)
70    if (i == 0) {
71        if (Opterr)
72            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
73        if (_opti >= length(argv[Optind])) {
74            Optind++
75            _opti = 0
76        } else
77            _opti++
78        return "?"
79    }
80    if (substr(options, i + 1, 1) == ":") {
81        # get option argument
82        if (length(substr(argv[Optind], _opti + 1)) > 0)
83            Optarg = substr(argv[Optind], _opti + 1)
84        else
85            Optarg = argv[++Optind]
86        _opti = 0
87    } else
88        Optarg = ""
89    if (_opti == 0 || _opti >= length(argv[Optind])) {
90        Optind++
91        _opti = 0
92    } else
93        _opti++
94    return thisopt
95}
96
97function usage() {
98   print \
99"afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \
100"\n" \
101"Required parameters:\n" \
102"  -i dir        - input directory with starting corpus\n" \
103"  -o dir        - output directory for minimized files\n" \
104"\n" \
105"Execution control settings:\n" \
106"  -f file       - location read by the fuzzed program (stdin)\n" \
107"  -m megs       - memory limit for child process ("mem_limit" MB)\n" \
108"  -t msec       - run time limit for child process (none)\n" \
109"  -O            - use binary-only instrumentation (FRIDA mode)\n" \
110"  -Q            - use binary-only instrumentation (QEMU mode)\n" \
111"  -U            - use unicorn-based instrumentation (unicorn mode)\n" \
112"\n" \
113"Minimization settings:\n" \
114"  -C            - keep crashing inputs, reject everything else\n" \
115"  -e            - solve for edge coverage only, ignore hit counts\n" \
116"\n" \
117"For additional tips, please consult README.md\n" \
118"\n" \
119"Environment variables used:\n" \
120"AFL_ALLOW_TMP: allow unsafe use of input/output directories under {/var}/tmp\n" \
121"AFL_CRASH_EXITCODE: optional child exit code to be interpreted as crash\n" \
122"AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up\n" \
123"AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory\n" \
124"AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)\n" \
125"AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n" \
126"AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH\n" \
127"AFL_PRINT_FILENAMES: If set, the filename currently processed will be " \
128      "printed to stdout\n" \
129"AFL_SKIP_BIN_CHECK: skip afl instrumentation checks for target binary\n"
130   exit 1
131}
132
133function exists_and_is_executable(binarypath) {
134  return 0 == system("test -f "binarypath" -a -x "binarypath)
135}
136
137BEGIN {
138  print "corpus minimization tool for afl++ (awk version)\n"
139
140  # defaults
141  extra_par = ""
142  AFL_CMIN_CRASHES_ONLY = ""
143
144  # process options
145  Opterr = 1    # default is to diagnose
146  Optind = 1    # skip ARGV[0]
147  while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCOQU?")) != -1) {
148    if (_go_c == "i") {
149      if (!Optarg) usage()
150      if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
151      in_dir = Optarg
152      continue
153    } else
154    if (_go_c == "o") {
155      if (!Optarg) usage()
156      if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
157      out_dir = Optarg
158      continue
159    } else
160    if (_go_c == "f") {
161      if (!Optarg) usage()
162      if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
163      stdin_file = Optarg
164      continue
165    } else
166    if (_go_c == "m") {
167      if (!Optarg) usage()
168      if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
169      mem_limit = Optarg
170      mem_limit_given = 1
171      continue
172    } else
173    if (_go_c == "t") {
174      if (!Optarg) usage()
175      if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
176      timeout = Optarg
177      continue
178    } else
179    if (_go_c == "C") {
180      AFL_CMIN_CRASHES_ONLY = "AFL_CMIN_CRASHES_ONLY=1 "
181      continue
182    } else
183    if (_go_c == "e") {
184      extra_par = extra_par " -e"
185      continue
186    } else
187    if (_go_c == "O") {
188      if (frida_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
189      extra_par = extra_par " -O"
190      frida_mode = 1
191      continue
192    } else
193    if (_go_c == "Q") {
194      if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
195      extra_par = extra_par " -Q"
196      qemu_mode = 1
197      continue
198    } else
199    if (_go_c == "U") {
200      if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
201      extra_par = extra_par " -U"
202      unicorn_mode = 1
203      continue
204    } else
205    if (_go_c == "?") {
206      exit 1
207    } else
208      usage()
209  } # while options
210
211  if (!mem_limit) mem_limit = "none"
212  if (!timeout) timeout = "none"
213
214  # get program args
215  i = 0
216  prog_args_string = ""
217  for (; Optind < ARGC; Optind++) {
218    prog_args[i++] = ARGV[Optind]
219    if (i > 1)
220      prog_args_string = prog_args_string" "ARGV[Optind]
221  }
222
223  # sanity checks
224  if (!prog_args[0] || !in_dir || !out_dir) usage()
225
226  target_bin = prog_args[0]
227
228  # Do a sanity check to discourage the use of /tmp, since we can't really
229  # handle this safely from an awk script.
230
231  if (!ENVIRON["AFL_ALLOW_TMP"]) {
232    dirlist[0] = in_dir
233    dirlist[1] = target_bin
234    dirlist[2] = out_dir
235    dirlist[3] = stdin_file
236    "pwd" | getline dirlist[4] # current directory
237    for (dirind in dirlist) {
238      dir = dirlist[dirind]
239
240      if (dir ~ /^(\/var)?\/tmp/) {
241        print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr"
242        exit 1
243      }
244    }
245    delete dirlist
246  }
247
248  # If @@ is specified, but there's no -f, let's come up with a temporary input
249  # file name.
250
251  trace_dir = out_dir "/.traces"
252
253  if (!stdin_file) {
254    found_atat = 0
255    for (prog_args_ind in prog_args) {
256      if (match(prog_args[prog_args_ind], "@@") != 0) {
257        found_atat = 1
258        break
259      }
260    }
261    if (found_atat) {
262      stdin_file = trace_dir "/.cur_input"
263    }
264  }
265
266  # Check for obvious errors.
267
268  if (mem_limit && mem_limit != "none" && mem_limit < 5) {
269    print "[-] Error: dangerously low memory limit." > "/dev/stderr"
270    exit 1
271  }
272
273  if (timeout && timeout != "none" && timeout < 10) {
274    print "[-] Error: dangerously low timeout." > "/dev/stderr"
275    exit 1
276  }
277
278  if (target_bin && !exists_and_is_executable(target_bin)) {
279
280    "command -v "target_bin" 2>/dev/null" | getline tnew
281    if (!tnew || !exists_and_is_executable(tnew)) {
282      print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
283      exit 1
284    }
285    target_bin = tnew
286  }
287
288  if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !frida_mode && !unicorn_mode) {
289    if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) {
290      print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr"
291      exit 1
292    }
293  }
294
295  if (0 != system( "test -d "in_dir )) {
296    print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr"
297    exit 1
298  }
299
300  #if (0 == system( "test -d "in_dir"/default" )) {
301  #  in_dir = in_dir "/default"
302  #}
303  #
304  #if (0 == system( "test -d "in_dir"/queue" )) {
305  #  in_dir = in_dir "/queue"
306  #}
307
308  system("rm -rf "trace_dir" 2>/dev/null");
309  system("rm "out_dir"/id[:_]* 2>/dev/null")
310
311  "ls "out_dir"/* 2>/dev/null | wc -l" | getline noofentries
312  if (0 == system( "test -d "out_dir" -a "noofentries" -gt 0" )) {
313    print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr"
314    exit 1
315  }
316
317  # Check for the more efficient way to copy files...
318  if (0 != system("mkdir -p -m 0700 "trace_dir)) {
319    print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr"
320    exit 1
321  }
322
323  if (stdin_file) {
324    # truncate input file
325    printf "" > stdin_file
326    close( stdin_file )
327  }
328
329  # First we look in PATH
330  if (0 == system("command -v afl-showmap >/dev/null 2>&1")) {
331    "command -v afl-showmap 2>/dev/null" | getline showmap
332  } else {
333    # then we look in the current directory
334    if (0 == system("test -x ./afl-showmap")) {
335      showmap = "./afl-showmap"
336    } else {
337      if (ENVIRON["AFL_PATH"]) {
338        showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
339      }
340    }
341  }
342
343  if (!showmap || 0 != system("test -x "showmap )) {
344    print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr"
345    exit 1
346  }
347
348  # get list of input filenames sorted by size
349  i = 0
350  # yuck, gnu stat is option incompatible to bsd stat
351  # we use a heuristic to differentiate between
352  # GNU stat and other stats
353  "stat --version 2>/dev/null" | getline statversion
354  if (statversion ~ /GNU coreutils/) {
355    stat_format = "-c '%s %n'" # GNU
356  } else {
357    stat_format = "-f '%z %N'" # *BSD, MacOS
358  }
359  cmdline = "(cd "in_dir" && find . \\( ! -name \".*\" -a -type d \\) -o -type f -exec stat "stat_format" \\{\\} + | sort -k1n -k2r)"
360  #cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r"
361  #cmdline = "(cd "in_dir" && stat "stat_format" *) | sort -k1n -k2r"
362  #cmdline = "(cd "in_dir" && ls | xargs stat "stat_format" ) | sort -k1n -k2r"
363  while (cmdline | getline) {
364    sub(/^[0-9]+ (\.\/)?/,"",$0)
365    infilesSmallToBigFull[i] = $0
366    sub(/.*\//, "", $0)
367    infilesSmallToBig[i] = $0
368    infilesSmallToBigMap[infilesSmallToBig[i]] = infilesSmallToBigFull[i]
369    infilesSmallToBigFullMap[infilesSmallToBigFull[i]] = infilesSmallToBig[i]
370    i++
371  }
372  in_count = i
373
374  first_file = infilesSmallToBigFull[0]
375
376  #if (0 == system("test -d ""\""in_dir"/"first_file"\"")) {
377  #  print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr"
378  #  exit 1
379  #}
380
381  system(">\""in_dir"/.afl-cmin.test\"")
382  if (0 == system("ln \""in_dir"/.afl-cmin.test\" "trace_dir"/.link_test")) {
383    cp_tool = "ln"
384  } else {
385    cp_tool = "cp"
386  }
387  system("rm -f \""in_dir"/.afl-cmin.test\"")
388
389  if (!ENVIRON["AFL_SKIP_BIN_CHECK"]) {
390    # Make sure that we can actually get anything out of afl-showmap before we
391    # waste too much time.
392
393    print "[*] Testing the target binary..."
394
395    if (!stdin_file) {
396      system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
397    } else {
398      system("cp \""in_dir"/"first_file"\" "stdin_file)
399      system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
400    }
401
402    first_count = 0
403
404    runtest = trace_dir"/.run_test"
405    while ((getline < runtest) > 0) {
406      ++first_count
407    }
408
409    if (first_count) {
410      print "[+] OK, "first_count" tuples recorded."
411    } else {
412      print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
413      if (!ENVIRON["AFL_KEEP_TRACES"]) {
414        system("rm -rf "trace_dir" 2>/dev/null")
415      }
416      exit 1
417    }
418  }
419
420  # Let's roll!
421
422  #############################
423  # STEP 1: Collecting traces #
424  #############################
425
426  print "[*] Obtaining traces for "in_count" input files in '"in_dir"'."
427
428  cur = 0;
429  if (!stdin_file) {
430    print "    Processing "in_count" files (forkserver mode)..."
431#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string
432    retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string)
433  } else {
434    print "    Processing "in_count" files (forkserver mode)..."
435#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null"
436    retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
437  }
438
439  if (retval && !AFL_CMIN_CRASHES_ONLY) {
440    print "[!] Exit code "retval" != 0 received from afl-showmap, terminating..."
441
442    if (!ENVIRON["AFL_KEEP_TRACES"]) {
443      system("rm -rf "trace_dir" 2>/dev/null")
444      system("rmdir "out_dir)
445    }
446    exit retval
447  }
448
449  #######################################################
450  # STEP 2: register smallest input file for each tuple #
451  # STEP 3: copy that file (at most once)               #
452  #######################################################
453
454  print "[*] Processing traces for input files in '"in_dir"'."
455
456  cur = 0
457  out_count = 0
458  tuple_count = 0
459
460  # from rare to frequent new tuples
461  # get the best (smallest) file for it
462  # and copy it
463  while (cur < in_count) {
464    fn = infilesSmallToBig[cur]
465    ++cur
466    printf "\r    Processing file "cur"/"in_count
467    # create path for the trace file from afl-showmap
468    tracefile_path = trace_dir"/"fn
469    # gather all keys, and count them
470    while ((getline line < tracefile_path) > 0) {
471        key = line
472        if (!(key in key_count)) {
473          ++tuple_count
474        }
475        ++key_count[key]
476        if (! (key in best_file)) {
477            # this is the best file for this key
478            best_file[key] = fn
479#printf "BEST_FILE[%d]=\"%s\"\n",key,fn | "sort -t'[' -k2 > "trace_dir"/.candidate_script"
480        }
481#printf "%d %s\n",key,fn > trace_dir"/.candidate_list"
482    }
483    close(tracefile_path)
484  }
485  print ""
486
487  # sort keys
488  sortedKeys = trace_dir"/.all_uniq"
489  sortKeysCmd = "sort -k1n > "sortedKeys
490  for (key in key_count) {
491     printf "%7d %s\n",key_count[key],key | sortKeysCmd
492  }
493  close(sortKeysCmd)
494
495  # iterate over keys from rare to frequent and
496  # copy best file
497  while ((getline < sortedKeys) > 0) {
498
499    # split
500    nrFields = split($0, field, / +/)
501#print nrFields" Felder: '"field[0]"',  '"field[1]"',  '"field[2]"',  '"field[3]"'"
502    key = field[nrFields]
503
504    ++tcnt;
505    printf "\r    Processing tuple "tcnt"/"tuple_count" with count "key_count[key]"..."
506    if (key in keyAlreadyKnown) {
507      continue
508    }
509
510    fn = best_file[key]
511    # gather all tuples from the best file for this key
512    tracedfn = trace_dir"/"fn
513    while ((getline < tracedfn) > 0) {
514      keyAlreadyKnown[$0] = ""
515    }
516    close(tracedfn)
517
518    # copy file unless already done
519    if (! (fn in file_already_copied)) {
520      realfile = infilesSmallToBigMap[fn]
521      system(cp_tool" \""in_dir"/"realfile"\" \""out_dir"/"fn"\"")
522      file_already_copied[fn] = ""
523      ++out_count
524      #printf "tuple nr %d (%d cnt=%d) -> %s\n",tcnt,key,key_count[key],fn > trace_dir"/.log"
525    }
526  }
527  close(sortedKeys)
528  print ""
529  print "[+] Found "tuple_count" unique tuples across "in_count" files."
530
531  if (out_count == 1) {
532    print "[!] WARNING: All test cases had the same traces, check syntax!"
533  }
534  print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'."
535
536  if (!ENVIRON["AFL_KEEP_TRACES"]) {
537    system("rm -rf "trace_dir" 2>/dev/null")
538  }
539
540  exit 0
541}
542EOF
543