1 /*
2 american fuzzy lop - wrapper for GNU as
3 ---------------------------------------
4
5 Written and maintained by Michal Zalewski <lcamtuf@google.com>
6
7 Copyright 2013, 2014, 2015 Google Inc. All rights reserved.
8
9 Licensed under the Apache License, Version 2.0 (the "License");
10 you may not use this file except in compliance with the License.
11 You may obtain a copy of the License at:
12
13 http://www.apache.org/licenses/LICENSE-2.0
14
15 The sole purpose of this wrapper is to preprocess assembly files generated
16 by GCC / clang and inject the instrumentation bits included from afl-as.h. It
17 is automatically invoked by the toolchain when compiling programs using
18 afl-gcc / afl-clang.
19
20 Note that it's an explicit non-goal to instrument hand-written assembly,
21 be it in separate .s files or in __asm__ blocks. The only aspiration this
22 utility has right now is to be able to skip them gracefully and allow the
23 compilation process to continue.
24
25 That said, see experimental/clang_asm_normalize/ for a solution that may
26 allow clang users to make things work even with hand-crafted assembly. Just
27 note that there is no equivalent for GCC.
28
29 */
30
31 #define AFL_MAIN
32
33 #include "config.h"
34 #include "types.h"
35 #include "debug.h"
36 #include "alloc-inl.h"
37
38 #include "afl-as.h"
39
40 #include <stdio.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <time.h>
45 #include <ctype.h>
46 #include <fcntl.h>
47
48 #include <sys/wait.h>
49 #include <sys/time.h>
50
51 static u8** as_params; /* Parameters passed to the real 'as' */
52
53 static u8* input_file; /* Originally specified input file */
54 static u8* modified_file; /* Instrumented file for the real 'as' */
55
56 static u8 be_quiet, /* Quiet mode (no stderr output) */
57 clang_mode, /* Running in clang mode? */
58 pass_thru, /* Just pass data through? */
59 just_version, /* Just show version? */
60 sanitizer; /* Using ASAN / MSAN */
61
62 static u32 inst_ratio = 100, /* Instrumentation probability (%) */
63 as_par_cnt = 1; /* Number of params to 'as' */
64
65 /* If we don't find --32 or --64 in the command line, default to
66 instrumentation for whichever mode we were compiled with. This is not
67 perfect, but should do the trick for almost all use cases. */
68
69 #ifdef __x86_64__
70
71 static u8 use_64bit = 1;
72
73 #else
74
75 static u8 use_64bit = 0;
76
77 #ifdef __APPLE__
78 # error "Sorry, 32-bit Apple platforms are not supported."
79 #endif /* __APPLE__ */
80
81 #endif /* ^__x86_64__ */
82
83
84 /* Examine and modify parameters to pass to 'as'. Note that the file name
85 is always the last parameter passed by GCC, so we exploit this property
86 to keep the code simple. */
87
edit_params(int argc,char ** argv)88 static void edit_params(int argc, char** argv) {
89
90 u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
91 u32 i;
92
93 #ifdef __APPLE__
94
95 u8 use_clang_as = 0;
96
97 /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
98 with the code generated by newer versions of clang that are hand-built
99 by the user. See the thread here: http://goo.gl/HBWDtn.
100
101 To work around this, when using clang and running without AFL_AS
102 specified, we will actually call 'clang -c' instead of 'as -q' to
103 compile the assembly file.
104
105 The tools aren't cmdline-compatible, but at least for now, we can
106 seemingly get away with this by making only very minor tweaks. Thanks
107 to Nico Weber for the idea. */
108
109 if (clang_mode && !afl_as) {
110
111 use_clang_as = 1;
112
113 afl_as = getenv("AFL_CC");
114 if (!afl_as) afl_as = getenv("AFL_CXX");
115 if (!afl_as) afl_as = "clang";
116
117 }
118
119 #endif /* __APPLE__ */
120
121 /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
122 is not set. We need to check these non-standard variables to properly
123 handle the pass_thru logic later on. */
124
125 if (!tmp_dir) tmp_dir = getenv("TEMP");
126 if (!tmp_dir) tmp_dir = getenv("TMP");
127 if (!tmp_dir) tmp_dir = "/tmp";
128
129 as_params = ck_alloc((argc + 32) * sizeof(u8*));
130
131 as_params[0] = afl_as ? afl_as : (u8*)"as";
132
133 as_params[argc] = 0;
134
135 for (i = 1; i < argc - 1; i++) {
136
137 if (!strcmp(argv[i], "--64")) use_64bit = 1;
138 else if (!strcmp(argv[i], "--32")) use_64bit = 0;
139
140 #ifdef __APPLE__
141
142 /* The Apple case is a bit different... */
143
144 if (!strcmp(argv[i], "-arch") && i + 1 < argc) {
145
146 if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
147 else if (!strcmp(argv[i + 1], "i386"))
148 FATAL("Sorry, 32-bit Apple platforms are not supported.");
149
150 }
151
152 /* Strip options that set the preference for a particular upstream
153 assembler in Xcode. */
154
155 if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
156 continue;
157
158 #endif /* __APPLE__ */
159
160 as_params[as_par_cnt++] = argv[i];
161
162 }
163
164 #ifdef __APPLE__
165
166 /* When calling clang as the upstream assembler, append -c -x assembler
167 and hope for the best. */
168
169 if (use_clang_as) {
170
171 as_params[as_par_cnt++] = "-c";
172 as_params[as_par_cnt++] = "-x";
173 as_params[as_par_cnt++] = "assembler";
174
175 }
176
177 #endif /* __APPLE__ */
178
179 input_file = argv[argc - 1];
180
181 if (input_file[0] == '-') {
182
183 if (!strcmp(input_file + 1, "-version")) {
184 just_version = 1;
185 modified_file = input_file;
186 goto wrap_things_up;
187 }
188
189 if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
190 else input_file = NULL;
191
192 } else {
193
194 /* Check if this looks like a standard invocation as a part of an attempt
195 to compile a program, rather than using gcc on an ad-hoc .s file in
196 a format we may not understand. This works around an issue compiling
197 NSS. */
198
199 if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
200 strncmp(input_file, "/var/tmp/", 9) &&
201 strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
202
203 }
204
205 modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
206 (u32)time(NULL));
207
208 wrap_things_up:
209
210 as_params[as_par_cnt++] = modified_file;
211 as_params[as_par_cnt] = NULL;
212
213 }
214
215
216 /* Process input file, generate modified_file. Insert instrumentation in all
217 the appropriate places. */
218
add_instrumentation(void)219 static void add_instrumentation(void) {
220
221 static u8 line[MAX_LINE];
222
223 FILE* inf;
224 FILE* outf;
225 s32 outfd;
226 u32 ins_lines = 0;
227
228 u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0,
229 skip_intel = 0, skip_app = 0, instrument_next = 0;
230
231 #ifdef __APPLE__
232
233 u8* colon_pos;
234
235 #endif /* __APPLE__ */
236
237 if (input_file) {
238
239 inf = fopen(input_file, "r");
240 if (!inf) PFATAL("Unable to read '%s'", input_file);
241
242 } else inf = stdin;
243
244 outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
245
246 if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);
247
248 outf = fdopen(outfd, "w");
249
250 if (!outf) PFATAL("fdopen() failed");
251
252 while (fgets(line, MAX_LINE, inf)) {
253
254 /* In some cases, we want to defer writing the instrumentation trampoline
255 until after all the labels, macros, comments, etc. If we're in this
256 mode, and if the line starts with a tab followed by a character, dump
257 the trampoline now. */
258
259 if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
260 instrument_next && line[0] == '\t' && isalpha(line[1])) {
261
262 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
263 R(MAP_SIZE));
264
265 instrument_next = 0;
266 ins_lines++;
267
268 }
269
270 /* Output the actual line, call it a day in pass-thru mode. */
271
272 fputs(line, outf);
273
274 if (pass_thru) continue;
275
276 /* All right, this is where the actual fun begins. For one, we only want to
277 instrument the .text section. So, let's keep track of that in processed
278 files - and let's set instr_ok accordingly. */
279
280 if (line[0] == '\t' && line[1] == '.') {
281
282 /* OpenBSD puts jump tables directly inline with the code, which is
283 a bit annoying. They use a specific format of p2align directives
284 around them, so we use that as a signal. */
285
286 if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
287 isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;
288
289 if (!strncmp(line + 2, "text\n", 5) ||
290 !strncmp(line + 2, "section\t.text", 13) ||
291 !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
292 !strncmp(line + 2, "section __TEXT,__text", 21)) {
293 instr_ok = 1;
294 continue;
295 }
296
297 if (!strncmp(line + 2, "section\t", 8) ||
298 !strncmp(line + 2, "section ", 8) ||
299 !strncmp(line + 2, "bss\n", 4) ||
300 !strncmp(line + 2, "data\n", 5)) {
301 instr_ok = 0;
302 continue;
303 }
304
305 }
306
307 /* Detect off-flavor assembly (rare, happens in gdb). When this is
308 encountered, we set skip_csect until the opposite directive is
309 seen, and we do not instrument. */
310
311 if (strstr(line, ".code")) {
312
313 if (strstr(line, ".code32")) skip_csect = use_64bit;
314 if (strstr(line, ".code64")) skip_csect = !use_64bit;
315
316 }
317
318 /* Detect syntax changes, as could happen with hand-written assembly.
319 Skip Intel blocks, resume instrumentation when back to AT&T. */
320
321 if (strstr(line, ".intel_syntax")) skip_intel = 1;
322 if (strstr(line, ".att_syntax")) skip_intel = 0;
323
324 /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
325
326 if (line[0] == '#' || line[1] == '#') {
327
328 if (strstr(line, "#APP")) skip_app = 1;
329 if (strstr(line, "#NO_APP")) skip_app = 0;
330
331 }
332
333 /* If we're in the right mood for instrumenting, check for function
334 names or conditional labels. This is a bit messy, but in essence,
335 we want to catch:
336
337 ^main: - function entry point (always instrumented)
338 ^.L0: - GCC branch label
339 ^.LBB0_0: - clang branch label (but only in clang mode)
340 ^\tjnz foo - conditional branches
341
342 ...but not:
343
344 ^# BB#0: - clang comments
345 ^ # BB#0: - ditto
346 ^.Ltmp0: - clang non-branch labels
347 ^.LC0 - GCC non-branch labels
348 ^.LBB0_0: - ditto (when in GCC mode)
349 ^\tjmp foo - non-conditional jumps
350
351 Additionally, clang and GCC on MacOS X follow a different convention
352 with no leading dots on labels, hence the weird maze of #ifdefs
353 later on.
354
355 */
356
357 if (skip_intel || skip_app || skip_csect || !instr_ok ||
358 line[0] == '#' || line[0] == ' ') continue;
359
360 /* Conditional branch instruction (jnz, etc). We append the instrumentation
361 right after the branch (to instrument the not-taken path) and at the
362 branch destination label (handled later on). */
363
364 if (line[0] == '\t') {
365
366 if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {
367
368 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
369 R(MAP_SIZE));
370
371 ins_lines++;
372
373 }
374
375 continue;
376
377 }
378
379 /* Label of some sort. This may be a branch destination, but we need to
380 tread carefully and account for several different formatting
381 conventions. */
382
383 #ifdef __APPLE__
384
385 /* Apple: L<whatever><digit>: */
386
387 if ((colon_pos = strstr(line, ":"))) {
388
389 if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
390
391 #else
392
393 /* Everybody else: .L<whatever>: */
394
395 if (strstr(line, ":")) {
396
397 if (line[0] == '.') {
398
399 #endif /* __APPLE__ */
400
401 /* .L0: or LBB0_0: style jump destination */
402
403 #ifdef __APPLE__
404
405 /* Apple: L<num> / LBB<num> */
406
407 if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
408 && R(100) < inst_ratio) {
409
410 #else
411
412 /* Apple: .L<num> / .LBB<num> */
413
414 if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
415 && R(100) < inst_ratio) {
416
417 #endif /* __APPLE__ */
418
419 /* An optimization is possible here by adding the code only if the
420 label is mentioned in the code in contexts other than call / jmp.
421 That said, this complicates the code by requiring two-pass
422 processing (messy with stdin), and results in a speed gain
423 typically under 10%, because compilers are generally pretty good
424 about not generating spurious intra-function jumps.
425
426 We use deferred output chiefly to avoid disrupting
427 .Lfunc_begin0-style exception handling calculations (a problem on
428 MacOS X). */
429
430 if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;
431
432 }
433
434 } else {
435
436 /* Function label (always instrumented, deferred mode). */
437
438 instrument_next = 1;
439
440 }
441
442 }
443
444 }
445
446 if (ins_lines)
447 fputs(use_64bit ? main_payload_64 : main_payload_32, outf);
448
449 if (input_file) fclose(inf);
450 fclose(outf);
451
452 if (!be_quiet) {
453
454 if (!ins_lines) WARNF("No instrumentation targets found%s.",
455 pass_thru ? " (pass-thru mode)" : "");
456 else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
457 ins_lines, use_64bit ? "64" : "32",
458 getenv("AFL_HARDEN") ? "hardened" :
459 (sanitizer ? "ASAN/MSAN" : "non-hardened"),
460 inst_ratio);
461
462 }
463
464 }
465
466
467 /* Main entry point */
468
469 int main(int argc, char** argv) {
470
471 s32 pid;
472 u32 rand_seed;
473 int status;
474 u8* inst_ratio_str = getenv("AFL_INST_RATIO");
475
476 struct timeval tv;
477 struct timezone tz;
478
479 clang_mode = !!getenv(CLANG_ENV_VAR);
480
481 if (isatty(2) && !getenv("AFL_QUIET")) {
482
483 SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
484
485 } else be_quiet = 1;
486
487 if (argc < 2) {
488
489 SAYF("\n"
490 "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
491 "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
492 "don't want to run this program directly.\n\n"
493
494 "Rarely, when dealing with extremely complex projects, it may be advisable to\n"
495 "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
496 "instrumenting every discovered branch.\n\n");
497
498 exit(1);
499
500 }
501
502 gettimeofday(&tv, &tz);
503
504 rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
505
506 srandom(rand_seed);
507
508 edit_params(argc, argv);
509
510 if (inst_ratio_str) {
511
512 if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100)
513 FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
514
515 }
516
517 if (getenv(AS_LOOP_ENV_VAR))
518 FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
519
520 setenv(AS_LOOP_ENV_VAR, "1", 1);
521
522 /* When compiling with ASAN, we don't have a particularly elegant way to skip
523 ASAN-specific branches. But we can probabilistically compensate for
524 that... */
525
526 if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
527 sanitizer = 1;
528 inst_ratio /= 3;
529 }
530
531 if (!just_version) add_instrumentation();
532
533 if (!(pid = fork())) {
534
535 execvp(as_params[0], (char**)as_params);
536 FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
537
538 }
539
540 if (pid < 0) PFATAL("fork() failed");
541
542 if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
543
544 if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);
545
546 exit(WEXITSTATUS(status));
547
548 }
549
550