1 /*
2    american fuzzy lop - wrapper for GNU as
3    ---------------------------------------
4 
5    Written and maintained by Michal Zalewski <lcamtuf@google.com>
6 
7    Copyright 2013, 2014, 2015 Google Inc. All rights reserved.
8 
9    Licensed under the Apache License, Version 2.0 (the "License");
10    you may not use this file except in compliance with the License.
11    You may obtain a copy of the License at:
12 
13      http://www.apache.org/licenses/LICENSE-2.0
14 
15    The sole purpose of this wrapper is to preprocess assembly files generated
16    by GCC / clang and inject the instrumentation bits included from afl-as.h. It
17    is automatically invoked by the toolchain when compiling programs using
18    afl-gcc / afl-clang.
19 
20    Note that it's an explicit non-goal to instrument hand-written assembly,
21    be it in separate .s files or in __asm__ blocks. The only aspiration this
22    utility has right now is to be able to skip them gracefully and allow the
23    compilation process to continue.
24 
25    That said, see experimental/clang_asm_normalize/ for a solution that may
26    allow clang users to make things work even with hand-crafted assembly. Just
27    note that there is no equivalent for GCC.
28 
29  */
30 
31 #define AFL_MAIN
32 
33 #include "config.h"
34 #include "types.h"
35 #include "debug.h"
36 #include "alloc-inl.h"
37 
38 #include "afl-as.h"
39 
40 #include <stdio.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <time.h>
45 #include <ctype.h>
46 #include <fcntl.h>
47 
48 #include <sys/wait.h>
49 #include <sys/time.h>
50 
51 static u8** as_params;          /* Parameters passed to the real 'as'   */
52 
53 static u8*  input_file;         /* Originally specified input file      */
54 static u8*  modified_file;      /* Instrumented file for the real 'as'  */
55 
56 static u8   be_quiet,           /* Quiet mode (no stderr output)        */
57             clang_mode,         /* Running in clang mode?               */
58             pass_thru,          /* Just pass data through?              */
59             just_version,       /* Just show version?                   */
60             sanitizer;          /* Using ASAN / MSAN                    */
61 
62 static u32  inst_ratio = 100,   /* Instrumentation probability (%)      */
63             as_par_cnt = 1;     /* Number of params to 'as'             */
64 
65 /* If we don't find --32 or --64 in the command line, default to
66    instrumentation for whichever mode we were compiled with. This is not
67    perfect, but should do the trick for almost all use cases. */
68 
69 #ifdef __x86_64__
70 
71 static u8   use_64bit = 1;
72 
73 #else
74 
75 static u8   use_64bit = 0;
76 
77 #ifdef __APPLE__
78 #  error "Sorry, 32-bit Apple platforms are not supported."
79 #endif /* __APPLE__ */
80 
81 #endif /* ^__x86_64__ */
82 
83 
84 /* Examine and modify parameters to pass to 'as'. Note that the file name
85    is always the last parameter passed by GCC, so we exploit this property
86    to keep the code simple. */
87 
edit_params(int argc,char ** argv)88 static void edit_params(int argc, char** argv) {
89 
90   u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
91   u32 i;
92 
93 #ifdef __APPLE__
94 
95   u8 use_clang_as = 0;
96 
97   /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
98      with the code generated by newer versions of clang that are hand-built
99      by the user. See the thread here: http://goo.gl/HBWDtn.
100 
101      To work around this, when using clang and running without AFL_AS
102      specified, we will actually call 'clang -c' instead of 'as -q' to
103      compile the assembly file.
104 
105      The tools aren't cmdline-compatible, but at least for now, we can
106      seemingly get away with this by making only very minor tweaks. Thanks
107      to Nico Weber for the idea. */
108 
109   if (clang_mode && !afl_as) {
110 
111     use_clang_as = 1;
112 
113     afl_as = getenv("AFL_CC");
114     if (!afl_as) afl_as = getenv("AFL_CXX");
115     if (!afl_as) afl_as = "clang";
116 
117   }
118 
119 #endif /* __APPLE__ */
120 
121   /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
122      is not set. We need to check these non-standard variables to properly
123      handle the pass_thru logic later on. */
124 
125   if (!tmp_dir) tmp_dir = getenv("TEMP");
126   if (!tmp_dir) tmp_dir = getenv("TMP");
127   if (!tmp_dir) tmp_dir = "/tmp";
128 
129   as_params = ck_alloc((argc + 32) * sizeof(u8*));
130 
131   as_params[0] = afl_as ? afl_as : (u8*)"as";
132 
133   as_params[argc] = 0;
134 
135   for (i = 1; i < argc - 1; i++) {
136 
137     if (!strcmp(argv[i], "--64")) use_64bit = 1;
138     else if (!strcmp(argv[i], "--32")) use_64bit = 0;
139 
140 #ifdef __APPLE__
141 
142     /* The Apple case is a bit different... */
143 
144     if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
145 
146       if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
147       else if (!strcmp(argv[i + 1], "i386"))
148         FATAL("Sorry, 32-bit Apple platforms are not supported.");
149 
150     }
151 
152     /* Strip options that set the preference for a particular upstream
153        assembler in Xcode. */
154 
155     if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
156       continue;
157 
158 #endif /* __APPLE__ */
159 
160     as_params[as_par_cnt++] = argv[i];
161 
162   }
163 
164 #ifdef __APPLE__
165 
166   /* When calling clang as the upstream assembler, append -c -x assembler
167      and hope for the best. */
168 
169   if (use_clang_as) {
170 
171     as_params[as_par_cnt++] = "-c";
172     as_params[as_par_cnt++] = "-x";
173     as_params[as_par_cnt++] = "assembler";
174 
175   }
176 
177 #endif /* __APPLE__ */
178 
179   input_file = argv[argc - 1];
180 
181   if (input_file[0] == '-') {
182 
183     if (!strcmp(input_file + 1, "-version")) {
184       just_version = 1;
185       modified_file = input_file;
186       goto wrap_things_up;
187     }
188 
189     if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
190       else input_file = NULL;
191 
192   } else {
193 
194     /* Check if this looks like a standard invocation as a part of an attempt
195        to compile a program, rather than using gcc on an ad-hoc .s file in
196        a format we may not understand. This works around an issue compiling
197        NSS. */
198 
199     if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
200         strncmp(input_file, "/var/tmp/", 9) &&
201         strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
202 
203   }
204 
205   modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
206                                (u32)time(NULL));
207 
208 wrap_things_up:
209 
210   as_params[as_par_cnt++] = modified_file;
211   as_params[as_par_cnt]   = NULL;
212 
213 }
214 
215 
216 /* Process input file, generate modified_file. Insert instrumentation in all
217    the appropriate places. */
218 
add_instrumentation(void)219 static void add_instrumentation(void) {
220 
221   static u8 line[MAX_LINE];
222 
223   FILE* inf;
224   FILE* outf;
225   s32 outfd;
226   u32 ins_lines = 0;
227 
228   u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
229       skip_intel = 0, skip_app = 0, instrument_next = 0;
230 
231 #ifdef __APPLE__
232 
233   u8* colon_pos;
234 
235 #endif /* __APPLE__ */
236 
237   if (input_file) {
238 
239     inf = fopen(input_file, "r");
240     if (!inf) PFATAL("Unable to read '%s'", input_file);
241 
242   } else inf = stdin;
243 
244   outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
245 
246   if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
247 
248   outf = fdopen(outfd, "w");
249 
250   if (!outf) PFATAL("fdopen() failed");
251 
252   while (fgets(line, MAX_LINE, inf)) {
253 
254     /* In some cases, we want to defer writing the instrumentation trampoline
255        until after all the labels, macros, comments, etc. If we're in this
256        mode, and if the line starts with a tab followed by a character, dump
257        the trampoline now. */
258 
259     if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
260         instrument_next && line[0] == '\t' && isalpha(line[1])) {
261 
262       fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
263               R(MAP_SIZE));
264 
265       instrument_next = 0;
266       ins_lines++;
267 
268     }
269 
270     /* Output the actual line, call it a day in pass-thru mode. */
271 
272     fputs(line, outf);
273 
274     if (pass_thru) continue;
275 
276     /* All right, this is where the actual fun begins. For one, we only want to
277        instrument the .text section. So, let's keep track of that in processed
278        files - and let's set instr_ok accordingly. */
279 
280     if (line[0] == '\t' && line[1] == '.') {
281 
282       /* OpenBSD puts jump tables directly inline with the code, which is
283          a bit annoying. They use a specific format of p2align directives
284          around them, so we use that as a signal. */
285 
286       if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
287           isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
288 
289       if (!strncmp(line + 2, "text\n", 5) ||
290           !strncmp(line + 2, "section\t.text", 13) ||
291           !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
292           !strncmp(line + 2, "section __TEXT,__text", 21)) {
293         instr_ok = 1;
294         continue;
295       }
296 
297       if (!strncmp(line + 2, "section\t", 8) ||
298           !strncmp(line + 2, "section ", 8) ||
299           !strncmp(line + 2, "bss\n", 4) ||
300           !strncmp(line + 2, "data\n", 5)) {
301         instr_ok = 0;
302         continue;
303       }
304 
305     }
306 
307     /* Detect off-flavor assembly (rare, happens in gdb). When this is
308        encountered, we set skip_csect until the opposite directive is
309        seen, and we do not instrument. */
310 
311     if (strstr(line, ".code")) {
312 
313       if (strstr(line, ".code32")) skip_csect = use_64bit;
314       if (strstr(line, ".code64")) skip_csect = !use_64bit;
315 
316     }
317 
318     /* Detect syntax changes, as could happen with hand-written assembly.
319        Skip Intel blocks, resume instrumentation when back to AT&T. */
320 
321     if (strstr(line, ".intel_syntax")) skip_intel = 1;
322     if (strstr(line, ".att_syntax")) skip_intel = 0;
323 
324     /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
325 
326     if (line[0] == '#' || line[1] == '#') {
327 
328       if (strstr(line, "#APP")) skip_app = 1;
329       if (strstr(line, "#NO_APP")) skip_app = 0;
330 
331     }
332 
333     /* If we're in the right mood for instrumenting, check for function
334        names or conditional labels. This is a bit messy, but in essence,
335        we want to catch:
336 
337          ^main:      - function entry point (always instrumented)
338          ^.L0:       - GCC branch label
339          ^.LBB0_0:   - clang branch label (but only in clang mode)
340          ^\tjnz foo  - conditional branches
341 
342        ...but not:
343 
344          ^# BB#0:    - clang comments
345          ^ # BB#0:   - ditto
346          ^.Ltmp0:    - clang non-branch labels
347          ^.LC0       - GCC non-branch labels
348          ^.LBB0_0:   - ditto (when in GCC mode)
349          ^\tjmp foo  - non-conditional jumps
350 
351        Additionally, clang and GCC on MacOS X follow a different convention
352        with no leading dots on labels, hence the weird maze of #ifdefs
353        later on.
354 
355      */
356 
357     if (skip_intel || skip_app || skip_csect || !instr_ok ||
358         line[0] == '#' || line[0] == ' ') continue;
359 
360     /* Conditional branch instruction (jnz, etc). We append the instrumentation
361        right after the branch (to instrument the not-taken path) and at the
362        branch destination label (handled later on). */
363 
364     if (line[0] == '\t') {
365 
366       if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
367 
368         fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
369                 R(MAP_SIZE));
370 
371         ins_lines++;
372 
373       }
374 
375       continue;
376 
377     }
378 
379     /* Label of some sort. This may be a branch destination, but we need to
380        tread carefully and account for several different formatting
381        conventions. */
382 
383 #ifdef __APPLE__
384 
385     /* Apple: L<whatever><digit>: */
386 
387     if ((colon_pos = strstr(line, ":"))) {
388 
389       if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
390 
391 #else
392 
393     /* Everybody else: .L<whatever>: */
394 
395     if (strstr(line, ":")) {
396 
397       if (line[0] == '.') {
398 
399 #endif /* __APPLE__ */
400 
401         /* .L0: or LBB0_0: style jump destination */
402 
403 #ifdef __APPLE__
404 
405         /* Apple: L<num> / LBB<num> */
406 
407         if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
408             && R(100) < inst_ratio) {
409 
410 #else
411 
412         /* Apple: .L<num> / .LBB<num> */
413 
414         if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
415             && R(100) < inst_ratio) {
416 
417 #endif /* __APPLE__ */
418 
419           /* An optimization is possible here by adding the code only if the
420              label is mentioned in the code in contexts other than call / jmp.
421              That said, this complicates the code by requiring two-pass
422              processing (messy with stdin), and results in a speed gain
423              typically under 10%, because compilers are generally pretty good
424              about not generating spurious intra-function jumps.
425 
426              We use deferred output chiefly to avoid disrupting
427              .Lfunc_begin0-style exception handling calculations (a problem on
428              MacOS X). */
429 
430           if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
431 
432         }
433 
434       } else {
435 
436         /* Function label (always instrumented, deferred mode). */
437 
438         instrument_next = 1;
439 
440       }
441 
442     }
443 
444   }
445 
446   if (ins_lines)
447     fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
448 
449   if (input_file) fclose(inf);
450   fclose(outf);
451 
452   if (!be_quiet) {
453 
454     if (!ins_lines) WARNF("No instrumentation targets found%s.",
455                           pass_thru ? " (pass-thru mode)" : "");
456     else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
457              ins_lines, use_64bit ? "64" : "32",
458              getenv("AFL_HARDEN") ? "hardened" :
459              (sanitizer ? "ASAN/MSAN" : "non-hardened"),
460              inst_ratio);
461 
462   }
463 
464 }
465 
466 
467 /* Main entry point */
468 
469 int main(int argc, char** argv) {
470 
471   s32 pid;
472   u32 rand_seed;
473   int status;
474   u8* inst_ratio_str = getenv("AFL_INST_RATIO");
475 
476   struct timeval tv;
477   struct timezone tz;
478 
479   clang_mode = !!getenv(CLANG_ENV_VAR);
480 
481   if (isatty(2) && !getenv("AFL_QUIET")) {
482 
483     SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
484 
485   } else be_quiet = 1;
486 
487   if (argc < 2) {
488 
489     SAYF("\n"
490          "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
491          "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
492          "don't want to run this program directly.\n\n"
493 
494          "Rarely, when dealing with extremely complex projects, it may be advisable to\n"
495          "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
496          "instrumenting every discovered branch.\n\n");
497 
498     exit(1);
499 
500   }
501 
502   gettimeofday(&tv, &tz);
503 
504   rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
505 
506   srandom(rand_seed);
507 
508   edit_params(argc, argv);
509 
510   if (inst_ratio_str) {
511 
512     if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
513       FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
514 
515   }
516 
517   if (getenv(AS_LOOP_ENV_VAR))
518     FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
519 
520   setenv(AS_LOOP_ENV_VAR, "1", 1);
521 
522   /* When compiling with ASAN, we don't have a particularly elegant way to skip
523      ASAN-specific branches. But we can probabilistically compensate for
524      that... */
525 
526   if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
527     sanitizer = 1;
528     inst_ratio /= 3;
529   }
530 
531   if (!just_version) add_instrumentation();
532 
533   if (!(pid = fork())) {
534 
535     execvp(as_params[0], (char**)as_params);
536     FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
537 
538   }
539 
540   if (pid < 0) PFATAL("fork() failed");
541 
542   if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
543 
544   if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
545 
546   exit(WEXITSTATUS(status));
547 
548 }
549 
550