1 /*
2    american fuzzy lop++ - wrapper for GNU as
3    -----------------------------------------
4 
5    Originally written by Michal Zalewski
6 
7    Now maintained by Marc Heuse <mh@mh-sec.de>,
8                         Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
9                         Andrea Fioraldi <andreafioraldi@gmail.com>
10 
11    Copyright 2016, 2017 Google Inc. All rights reserved.
12    Copyright 2019-2020 AFLplusplus Project. All rights reserved.
13 
14    Licensed under the Apache License, Version 2.0 (the "License");
15    you may not use this file except in compliance with the License.
16    You may obtain a copy of the License at:
17 
18      http://www.apache.org/licenses/LICENSE-2.0
19 
20    The sole purpose of this wrapper is to preprocess assembly files generated
21    by GCC / clang and inject the instrumentation bits included from afl-as.h. It
22    is automatically invoked by the toolchain when compiling programs using
23    afl-gcc / afl-clang.
24 
25    Note that it's an explicit non-goal to instrument hand-written assembly,
26    be it in separate .s files or in __asm__ blocks. The only aspiration this
27    utility has right now is to be able to skip them gracefully and allow the
28    compilation process to continue.
29 
30    That said, see utils/clang_asm_normalize/ for a solution that may
31    allow clang users to make things work even with hand-crafted assembly. Just
32    note that there is no equivalent for GCC.
33 
34  */
35 
36 #define AFL_MAIN
37 
38 #include "config.h"
39 #include "types.h"
40 #include "debug.h"
41 #include "alloc-inl.h"
42 
43 #include "afl-as.h"
44 
45 #include <stdio.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <ctype.h>
52 #include <fcntl.h>
53 
54 #include <sys/wait.h>
55 #include <sys/time.h>
56 
57 static u8 **as_params;              /* Parameters passed to the real 'as'   */
58 
59 static u8 *input_file;              /* Originally specified input file      */
60 static u8 *modified_file;           /* Instrumented file for the real 'as'  */
61 
62 static u8 be_quiet,                 /* Quiet mode (no stderr output)        */
63     clang_mode,                     /* Running in clang mode?               */
64     pass_thru,                      /* Just pass data through?              */
65     just_version,                   /* Just show version?                   */
66     sanitizer;                      /* Using ASAN / MSAN                    */
67 
68 static u32 inst_ratio = 100,        /* Instrumentation probability (%)      */
69     as_par_cnt = 1;                 /* Number of params to 'as'             */
70 
71 /* If we don't find --32 or --64 in the command line, default to
72    instrumentation for whichever mode we were compiled with. This is not
73    perfect, but should do the trick for almost all use cases. */
74 
75 #ifdef WORD_SIZE_64
76 
77 static u8 use_64bit = 1;
78 
79 #else
80 
81 static u8 use_64bit = 0;
82 
83   #ifdef __APPLE__
84     #error "Sorry, 32-bit Apple platforms are not supported."
85   #endif                                                       /* __APPLE__ */
86 
87 #endif                                                     /* ^WORD_SIZE_64 */
88 
89 /* Examine and modify parameters to pass to 'as'. Note that the file name
90    is always the last parameter passed by GCC, so we exploit this property
91    to keep the code simple. */
92 
93 static void edit_params(int argc, char **argv) {
94 
95   u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
96   u32 i;
97 
98 #ifdef __APPLE__
99 
100   u8 use_clang_as = 0;
101 
102   /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
103      with the code generated by newer versions of clang that are hand-built
104      by the user. See the thread here: http://goo.gl/HBWDtn.
105 
106      To work around this, when using clang and running without AFL_AS
107      specified, we will actually call 'clang -c' instead of 'as -q' to
108      compile the assembly file.
109 
110      The tools aren't cmdline-compatible, but at least for now, we can
111      seemingly get away with this by making only very minor tweaks. Thanks
112      to Nico Weber for the idea. */
113 
114   if (clang_mode && !afl_as) {
115 
116     use_clang_as = 1;
117 
118     afl_as = getenv("AFL_CC");
119     if (!afl_as) afl_as = getenv("AFL_CXX");
120     if (!afl_as) afl_as = "clang";
121 
122   }
123 
124 #endif                                                         /* __APPLE__ */
125 
126   /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
127      is not set. We need to check these non-standard variables to properly
128      handle the pass_thru logic later on. */
129 
130   if (!tmp_dir) { tmp_dir = getenv("TEMP"); }
131   if (!tmp_dir) { tmp_dir = getenv("TMP"); }
132   if (!tmp_dir) { tmp_dir = "/tmp"; }
133 
134   as_params = ck_alloc((argc + 32) * sizeof(u8 *));
135   if (unlikely((INT_MAX - 32) < argc || !as_params)) {
136 
137     FATAL("Too many parameters passed to as");
138 
139   }
140 
141   as_params[0] = afl_as ? afl_as : (u8 *)"as";
142 
143   as_params[argc] = 0;
144 
145   for (i = 1; (s32)i < argc - 1; i++) {
146 
147     if (!strcmp(argv[i], "--64")) {
148 
149       use_64bit = 1;
150 
151     } else if (!strcmp(argv[i], "--32")) {
152 
153       use_64bit = 0;
154 
155     }
156 
157 #ifdef __APPLE__
158 
159     /* The Apple case is a bit different... */
160 
161     if (!strcmp(argv[i], "-arch") && i + 1 < (u32)argc) {
162 
163       if (!strcmp(argv[i + 1], "x86_64"))
164         use_64bit = 1;
165       else if (!strcmp(argv[i + 1], "i386"))
166         FATAL("Sorry, 32-bit Apple platforms are not supported.");
167 
168     }
169 
170     /* Strip options that set the preference for a particular upstream
171        assembler in Xcode. */
172 
173     if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
174       continue;
175 
176 #endif                                                         /* __APPLE__ */
177 
178     as_params[as_par_cnt++] = argv[i];
179 
180   }
181 
182 #ifdef __APPLE__
183 
184   /* When calling clang as the upstream assembler, append -c -x assembler
185      and hope for the best. */
186 
187   if (use_clang_as) {
188 
189     as_params[as_par_cnt++] = "-c";
190     as_params[as_par_cnt++] = "-x";
191     as_params[as_par_cnt++] = "assembler";
192 
193   }
194 
195 #endif                                                         /* __APPLE__ */
196 
197   input_file = argv[argc - 1];
198 
199   if (input_file[0] == '-') {
200 
201     if (!strcmp(input_file + 1, "-version")) {
202 
203       just_version = 1;
204       modified_file = input_file;
205       goto wrap_things_up;
206 
207     }
208 
209     if (input_file[1]) {
210 
211       FATAL("Incorrect use (not called through afl-gcc?)");
212 
213     } else {
214 
215       input_file = NULL;
216 
217     }
218 
219   } else {
220 
221     /* Check if this looks like a standard invocation as a part of an attempt
222        to compile a program, rather than using gcc on an ad-hoc .s file in
223        a format we may not understand. This works around an issue compiling
224        NSS. */
225 
226     if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
227         strncmp(input_file, "/var/tmp/", 9) &&
228         strncmp(input_file, "/tmp/", 5) &&
229         getenv("AFL_AS_FORCE_INSTRUMENT") == NULL) {
230 
231       pass_thru = 1;
232 
233     } else if (getenv("AFL_AS_FORCE_INSTRUMENT")) {
234 
235       unsetenv("AFL_AS_FORCE_INSTRUMENT");
236 
237     }
238 
239   }
240 
241   modified_file = alloc_printf("%s/.afl-%u-%u-%u.s", tmp_dir, (u32)getpid(),
242                                (u32)time(NULL), (u32)random());
243 
244 wrap_things_up:
245 
246   as_params[as_par_cnt++] = modified_file;
247   as_params[as_par_cnt] = NULL;
248 
249 }
250 
251 /* Process input file, generate modified_file. Insert instrumentation in all
252    the appropriate places. */
253 
254 static void add_instrumentation(void) {
255 
256   static u8 line[MAX_LINE];
257 
258   FILE *inf;
259   FILE *outf;
260   s32   outfd;
261   u32   ins_lines = 0;
262 
263   u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, skip_intel = 0,
264      skip_app = 0, instrument_next = 0;
265 
266 #ifdef __APPLE__
267 
268   u8 *colon_pos;
269 
270 #endif                                                         /* __APPLE__ */
271 
272   if (input_file) {
273 
274     inf = fopen(input_file, "r");
275     if (!inf) { PFATAL("Unable to read '%s'", input_file); }
276 
277   } else {
278 
279     inf = stdin;
280 
281   }
282 
283   outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, DEFAULT_PERMISSION);
284 
285   if (outfd < 0) { PFATAL("Unable to write to '%s'", modified_file); }
286 
287   outf = fdopen(outfd, "w");
288 
289   if (!outf) { PFATAL("fdopen() failed"); }
290 
291   while (fgets(line, MAX_LINE, inf)) {
292 
293     /* In some cases, we want to defer writing the instrumentation trampoline
294        until after all the labels, macros, comments, etc. If we're in this
295        mode, and if the line starts with a tab followed by a character, dump
296        the trampoline now. */
297 
298     if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
299         instrument_next && line[0] == '\t' && isalpha(line[1])) {
300 
301       fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
302               R(MAP_SIZE));
303 
304       instrument_next = 0;
305       ins_lines++;
306 
307     }
308 
309     /* Output the actual line, call it a day in pass-thru mode. */
310 
311     fputs(line, outf);
312 
313     if (pass_thru) { continue; }
314 
315     /* All right, this is where the actual fun begins. For one, we only want to
316        instrument the .text section. So, let's keep track of that in processed
317        files - and let's set instr_ok accordingly. */
318 
319     if (line[0] == '\t' && line[1] == '.') {
320 
321       /* OpenBSD puts jump tables directly inline with the code, which is
322          a bit annoying. They use a specific format of p2align directives
323          around them, so we use that as a signal. */
324 
325       if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
326           isdigit(line[10]) && line[11] == '\n') {
327 
328         skip_next_label = 1;
329 
330       }
331 
332       if (!strncmp(line + 2, "text\n", 5) ||
333           !strncmp(line + 2, "section\t.text", 13) ||
334           !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
335           !strncmp(line + 2, "section __TEXT,__text", 21)) {
336 
337         instr_ok = 1;
338         continue;
339 
340       }
341 
342       if (!strncmp(line + 2, "section\t", 8) ||
343           !strncmp(line + 2, "section ", 8) || !strncmp(line + 2, "bss\n", 4) ||
344           !strncmp(line + 2, "data\n", 5)) {
345 
346         instr_ok = 0;
347         continue;
348 
349       }
350 
351     }
352 
353     /* Detect off-flavor assembly (rare, happens in gdb). When this is
354        encountered, we set skip_csect until the opposite directive is
355        seen, and we do not instrument. */
356 
357     if (strstr(line, ".code")) {
358 
359       if (strstr(line, ".code32")) { skip_csect = use_64bit; }
360       if (strstr(line, ".code64")) { skip_csect = !use_64bit; }
361 
362     }
363 
364     /* Detect syntax changes, as could happen with hand-written assembly.
365        Skip Intel blocks, resume instrumentation when back to AT&T. */
366 
367     if (strstr(line, ".intel_syntax")) { skip_intel = 1; }
368     if (strstr(line, ".att_syntax")) { skip_intel = 0; }
369 
370     /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
371 
372     if (line[0] == '#' || line[1] == '#') {
373 
374       if (strstr(line, "#APP")) { skip_app = 1; }
375       if (strstr(line, "#NO_APP")) { skip_app = 0; }
376 
377     }
378 
379     /* If we're in the right mood for instrumenting, check for function
380        names or conditional labels. This is a bit messy, but in essence,
381        we want to catch:
382 
383          ^main:      - function entry point (always instrumented)
384          ^.L0:       - GCC branch label
385          ^.LBB0_0:   - clang branch label (but only in clang mode)
386          ^\tjnz foo  - conditional branches
387 
388        ...but not:
389 
390          ^# BB#0:    - clang comments
391          ^ # BB#0:   - ditto
392          ^.Ltmp0:    - clang non-branch labels
393          ^.LC0       - GCC non-branch labels
394          ^.LBB0_0:   - ditto (when in GCC mode)
395          ^\tjmp foo  - non-conditional jumps
396 
397        Additionally, clang and GCC on MacOS X follow a different convention
398        with no leading dots on labels, hence the weird maze of #ifdefs
399        later on.
400 
401      */
402 
403     if (skip_intel || skip_app || skip_csect || !instr_ok || line[0] == '#' ||
404         line[0] == ' ') {
405 
406       continue;
407 
408     }
409 
410     /* Conditional branch instruction (jnz, etc). We append the instrumentation
411        right after the branch (to instrument the not-taken path) and at the
412        branch destination label (handled later on). */
413 
414     if (line[0] == '\t') {
415 
416       if (line[1] == 'j' && line[2] != 'm' && R(100) < (long)inst_ratio) {
417 
418         fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
419                 R(MAP_SIZE));
420 
421         ins_lines++;
422 
423       }
424 
425       continue;
426 
427     }
428 
429     /* Label of some sort. This may be a branch destination, but we need to
430        read carefully and account for several different formatting
431        conventions. */
432 
433 #ifdef __APPLE__
434 
435     /* Apple: L<whatever><digit>: */
436 
437     if ((colon_pos = strstr(line, ":"))) {
438 
439       if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
440 
441 #else
442 
443     /* Everybody else: .L<whatever>: */
444 
445     if (strstr(line, ":")) {
446 
447       if (line[0] == '.') {
448 
449 #endif                                                         /* __APPLE__ */
450 
451         /* .L0: or LBB0_0: style jump destination */
452 
453 #ifdef __APPLE__
454 
455         /* Apple: L<num> / LBB<num> */
456 
457         if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) &&
458             R(100) < (long)inst_ratio) {
459 
460 #else
461 
462         /* Apple: .L<num> / .LBB<num> */
463 
464         if ((isdigit(line[2]) ||
465              (clang_mode && !strncmp(line + 1, "LBB", 3))) &&
466             R(100) < (long)inst_ratio) {
467 
468 #endif                                                         /* __APPLE__ */
469 
470           /* An optimization is possible here by adding the code only if the
471              label is mentioned in the code in contexts other than call / jmp.
472              That said, this complicates the code by requiring two-pass
473              processing (messy with stdin), and results in a speed gain
474              typically under 10%, because compilers are generally pretty good
475              about not generating spurious intra-function jumps.
476 
477              We use deferred output chiefly to avoid disrupting
478              .Lfunc_begin0-style exception handling calculations (a problem on
479              MacOS X). */
480 
481           if (!skip_next_label) {
482 
483             instrument_next = 1;
484 
485           } else {
486 
487             skip_next_label = 0;
488 
489           }
490 
491         }
492 
493       } else {
494 
495         /* Function label (always instrumented, deferred mode). */
496 
497         instrument_next = 1;
498 
499       }
500 
501     }
502 
503   }
504 
505   if (ins_lines) { fputs(use_64bit ? main_payload_64 : main_payload_32, outf); }
506 
507   if (input_file) { fclose(inf); }
508   fclose(outf);
509 
510   if (!be_quiet) {
511 
512     if (!ins_lines) {
513 
514       WARNF("No instrumentation targets found%s.",
515             pass_thru ? " (pass-thru mode)" : "");
516 
517     } else {
518 
519       char modeline[100];
520       snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
521                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
522                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
523                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
524                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "",
525                getenv("AFL_USE_LSAN") ? ", LSAN" : "");
526 
527       OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", ins_lines,
528           use_64bit ? "64" : "32", modeline, inst_ratio);
529 
530     }
531 
532   }
533 
534 }
535 
536 /* Main entry point */
537 
538 int main(int argc, char **argv) {
539 
540   s32 pid;
541   u32 rand_seed, i, j;
542   int status;
543   u8 *inst_ratio_str = getenv("AFL_INST_RATIO");
544 
545   struct timeval  tv;
546   struct timezone tz;
547 
548   clang_mode = !!getenv(CLANG_ENV_VAR);
549 
550   if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
551 
552     SAYF(cCYA "afl-as" VERSION cRST " by Michal Zalewski\n");
553 
554   } else {
555 
556     be_quiet = 1;
557 
558   }
559 
560   if (argc < 2 || (argc == 2 && strcmp(argv[1], "-h") == 0)) {
561 
562     fprintf(
563         stdout,
564         "afl-as" VERSION
565         " by Michal Zalewski\n"
566         "\n%s [-h]\n\n"
567         "This is a helper application for afl-fuzz. It is a wrapper around GNU "
568         "'as',\n"
569         "executed by the toolchain whenever using afl-gcc or afl-clang. You "
570         "probably\n"
571         "don't want to run this program directly.\n\n"
572 
573         "Rarely, when dealing with extremely complex projects, it may be "
574         "advisable\n"
575         "to set AFL_INST_RATIO to a value less than 100 in order to reduce "
576         "the\n"
577         "odds of instrumenting every discovered branch.\n\n"
578         "Environment variables used:\n"
579         "AFL_AS: path to assembler to use for instrumented files\n"
580         "AFL_CC: fall back path to assembler\n"
581         "AFL_CXX: fall back path to assembler\n"
582         "TMPDIR: directory to use for temporary files\n"
583         "TEMP: fall back path to directory for temporary files\n"
584         "TMP: fall back path to directory for temporary files\n"
585         "AFL_INST_RATIO: user specified instrumentation ratio\n"
586         "AFL_QUIET: suppress verbose output\n"
587         "AFL_KEEP_ASSEMBLY: leave instrumented assembly files\n"
588         "AFL_AS_FORCE_INSTRUMENT: force instrumentation for asm sources\n"
589         "AFL_HARDEN, AFL_USE_ASAN, AFL_USE_MSAN, AFL_USE_UBSAN, AFL_USE_LSAN:\n"
590         "  used in the instrumentation summary message\n",
591         argv[0]);
592 
593     exit(1);
594 
595   }
596 
597   gettimeofday(&tv, &tz);
598 
599   rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
600   // in fast systems where pids can repeat in the same seconds we need this
601   for (i = 1; (s32)i < argc; i++)
602     for (j = 0; j < strlen(argv[i]); j++)
603       rand_seed += argv[i][j];
604 
605   srandom(rand_seed);
606 
607   edit_params(argc, argv);
608 
609   if (inst_ratio_str) {
610 
611     if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) {
612 
613       FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
614 
615     }
616 
617   }
618 
619   if (getenv(AS_LOOP_ENV_VAR)) {
620 
621     FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
622 
623   }
624 
625   setenv(AS_LOOP_ENV_VAR, "1", 1);
626 
627   /* When compiling with ASAN, we don't have a particularly elegant way to skip
628      ASAN-specific branches. But we can probabilistically compensate for
629      that... */
630 
631   if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
632 
633     sanitizer = 1;
634     if (!getenv("AFL_INST_RATIO")) { inst_ratio /= 3; }
635 
636   }
637 
638   if (!just_version) { add_instrumentation(); }
639 
640   if (!(pid = fork())) {
641 
642     execvp(as_params[0], (char **)as_params);
643     FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
644 
645   }
646 
647   if (pid < 0) { PFATAL("fork() failed"); }
648 
649   if (waitpid(pid, &status, 0) <= 0) { PFATAL("waitpid() failed"); }
650 
651   if (!getenv("AFL_KEEP_ASSEMBLY")) { unlink(modified_file); }
652 
653   exit(WEXITSTATUS(status));
654 
655 }
656 
657