1 /* 2 american fuzzy lop++ - wrapper for GNU as 3 ----------------------------------------- 4 5 Originally written by Michal Zalewski 6 7 Now maintained by Marc Heuse <mh@mh-sec.de>, 8 Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and 9 Andrea Fioraldi <andreafioraldi@gmail.com> 10 11 Copyright 2016, 2017 Google Inc. All rights reserved. 12 Copyright 2019-2020 AFLplusplus Project. All rights reserved. 13 14 Licensed under the Apache License, Version 2.0 (the "License"); 15 you may not use this file except in compliance with the License. 16 You may obtain a copy of the License at: 17 18 http://www.apache.org/licenses/LICENSE-2.0 19 20 The sole purpose of this wrapper is to preprocess assembly files generated 21 by GCC / clang and inject the instrumentation bits included from afl-as.h. It 22 is automatically invoked by the toolchain when compiling programs using 23 afl-gcc / afl-clang. 24 25 Note that it's an explicit non-goal to instrument hand-written assembly, 26 be it in separate .s files or in __asm__ blocks. The only aspiration this 27 utility has right now is to be able to skip them gracefully and allow the 28 compilation process to continue. 29 30 That said, see utils/clang_asm_normalize/ for a solution that may 31 allow clang users to make things work even with hand-crafted assembly. Just 32 note that there is no equivalent for GCC. 33 34 */ 35 36 #define AFL_MAIN 37 38 #include "config.h" 39 #include "types.h" 40 #include "debug.h" 41 #include "alloc-inl.h" 42 43 #include "afl-as.h" 44 45 #include <stdio.h> 46 #include <unistd.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <time.h> 50 #include <limits.h> 51 #include <ctype.h> 52 #include <fcntl.h> 53 54 #include <sys/wait.h> 55 #include <sys/time.h> 56 57 static u8 **as_params; /* Parameters passed to the real 'as' */ 58 59 static u8 *input_file; /* Originally specified input file */ 60 static u8 *modified_file; /* Instrumented file for the real 'as' */ 61 62 static u8 be_quiet, /* Quiet mode (no stderr output) */ 63 clang_mode, /* Running in clang mode? */ 64 pass_thru, /* Just pass data through? */ 65 just_version, /* Just show version? */ 66 sanitizer; /* Using ASAN / MSAN */ 67 68 static u32 inst_ratio = 100, /* Instrumentation probability (%) */ 69 as_par_cnt = 1; /* Number of params to 'as' */ 70 71 /* If we don't find --32 or --64 in the command line, default to 72 instrumentation for whichever mode we were compiled with. This is not 73 perfect, but should do the trick for almost all use cases. */ 74 75 #ifdef WORD_SIZE_64 76 77 static u8 use_64bit = 1; 78 79 #else 80 81 static u8 use_64bit = 0; 82 83 #ifdef __APPLE__ 84 #error "Sorry, 32-bit Apple platforms are not supported." 85 #endif /* __APPLE__ */ 86 87 #endif /* ^WORD_SIZE_64 */ 88 89 /* Examine and modify parameters to pass to 'as'. Note that the file name 90 is always the last parameter passed by GCC, so we exploit this property 91 to keep the code simple. */ 92 93 static void edit_params(int argc, char **argv) { 94 95 u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS"); 96 u32 i; 97 98 #ifdef __APPLE__ 99 100 u8 use_clang_as = 0; 101 102 /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work 103 with the code generated by newer versions of clang that are hand-built 104 by the user. See the thread here: http://goo.gl/HBWDtn. 105 106 To work around this, when using clang and running without AFL_AS 107 specified, we will actually call 'clang -c' instead of 'as -q' to 108 compile the assembly file. 109 110 The tools aren't cmdline-compatible, but at least for now, we can 111 seemingly get away with this by making only very minor tweaks. Thanks 112 to Nico Weber for the idea. */ 113 114 if (clang_mode && !afl_as) { 115 116 use_clang_as = 1; 117 118 afl_as = getenv("AFL_CC"); 119 if (!afl_as) afl_as = getenv("AFL_CXX"); 120 if (!afl_as) afl_as = "clang"; 121 122 } 123 124 #endif /* __APPLE__ */ 125 126 /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR 127 is not set. We need to check these non-standard variables to properly 128 handle the pass_thru logic later on. */ 129 130 if (!tmp_dir) { tmp_dir = getenv("TEMP"); } 131 if (!tmp_dir) { tmp_dir = getenv("TMP"); } 132 if (!tmp_dir) { tmp_dir = "/tmp"; } 133 134 as_params = ck_alloc((argc + 32) * sizeof(u8 *)); 135 if (unlikely((INT_MAX - 32) < argc || !as_params)) { 136 137 FATAL("Too many parameters passed to as"); 138 139 } 140 141 as_params[0] = afl_as ? afl_as : (u8 *)"as"; 142 143 as_params[argc] = 0; 144 145 for (i = 1; (s32)i < argc - 1; i++) { 146 147 if (!strcmp(argv[i], "--64")) { 148 149 use_64bit = 1; 150 151 } else if (!strcmp(argv[i], "--32")) { 152 153 use_64bit = 0; 154 155 } 156 157 #ifdef __APPLE__ 158 159 /* The Apple case is a bit different... */ 160 161 if (!strcmp(argv[i], "-arch") && i + 1 < (u32)argc) { 162 163 if (!strcmp(argv[i + 1], "x86_64")) 164 use_64bit = 1; 165 else if (!strcmp(argv[i + 1], "i386")) 166 FATAL("Sorry, 32-bit Apple platforms are not supported."); 167 168 } 169 170 /* Strip options that set the preference for a particular upstream 171 assembler in Xcode. */ 172 173 if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q"))) 174 continue; 175 176 #endif /* __APPLE__ */ 177 178 as_params[as_par_cnt++] = argv[i]; 179 180 } 181 182 #ifdef __APPLE__ 183 184 /* When calling clang as the upstream assembler, append -c -x assembler 185 and hope for the best. */ 186 187 if (use_clang_as) { 188 189 as_params[as_par_cnt++] = "-c"; 190 as_params[as_par_cnt++] = "-x"; 191 as_params[as_par_cnt++] = "assembler"; 192 193 } 194 195 #endif /* __APPLE__ */ 196 197 input_file = argv[argc - 1]; 198 199 if (input_file[0] == '-') { 200 201 if (!strcmp(input_file + 1, "-version")) { 202 203 just_version = 1; 204 modified_file = input_file; 205 goto wrap_things_up; 206 207 } 208 209 if (input_file[1]) { 210 211 FATAL("Incorrect use (not called through afl-gcc?)"); 212 213 } else { 214 215 input_file = NULL; 216 217 } 218 219 } else { 220 221 /* Check if this looks like a standard invocation as a part of an attempt 222 to compile a program, rather than using gcc on an ad-hoc .s file in 223 a format we may not understand. This works around an issue compiling 224 NSS. */ 225 226 if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) && 227 strncmp(input_file, "/var/tmp/", 9) && 228 strncmp(input_file, "/tmp/", 5) && 229 getenv("AFL_AS_FORCE_INSTRUMENT") == NULL) { 230 231 pass_thru = 1; 232 233 } else if (getenv("AFL_AS_FORCE_INSTRUMENT")) { 234 235 unsetenv("AFL_AS_FORCE_INSTRUMENT"); 236 237 } 238 239 } 240 241 modified_file = alloc_printf("%s/.afl-%u-%u-%u.s", tmp_dir, (u32)getpid(), 242 (u32)time(NULL), (u32)random()); 243 244 wrap_things_up: 245 246 as_params[as_par_cnt++] = modified_file; 247 as_params[as_par_cnt] = NULL; 248 249 } 250 251 /* Process input file, generate modified_file. Insert instrumentation in all 252 the appropriate places. */ 253 254 static void add_instrumentation(void) { 255 256 static u8 line[MAX_LINE]; 257 258 FILE *inf; 259 FILE *outf; 260 s32 outfd; 261 u32 ins_lines = 0; 262 263 u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, skip_intel = 0, 264 skip_app = 0, instrument_next = 0; 265 266 #ifdef __APPLE__ 267 268 u8 *colon_pos; 269 270 #endif /* __APPLE__ */ 271 272 if (input_file) { 273 274 inf = fopen(input_file, "r"); 275 if (!inf) { PFATAL("Unable to read '%s'", input_file); } 276 277 } else { 278 279 inf = stdin; 280 281 } 282 283 outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, DEFAULT_PERMISSION); 284 285 if (outfd < 0) { PFATAL("Unable to write to '%s'", modified_file); } 286 287 outf = fdopen(outfd, "w"); 288 289 if (!outf) { PFATAL("fdopen() failed"); } 290 291 while (fgets(line, MAX_LINE, inf)) { 292 293 /* In some cases, we want to defer writing the instrumentation trampoline 294 until after all the labels, macros, comments, etc. If we're in this 295 mode, and if the line starts with a tab followed by a character, dump 296 the trampoline now. */ 297 298 if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok && 299 instrument_next && line[0] == '\t' && isalpha(line[1])) { 300 301 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, 302 R(MAP_SIZE)); 303 304 instrument_next = 0; 305 ins_lines++; 306 307 } 308 309 /* Output the actual line, call it a day in pass-thru mode. */ 310 311 fputs(line, outf); 312 313 if (pass_thru) { continue; } 314 315 /* All right, this is where the actual fun begins. For one, we only want to 316 instrument the .text section. So, let's keep track of that in processed 317 files - and let's set instr_ok accordingly. */ 318 319 if (line[0] == '\t' && line[1] == '.') { 320 321 /* OpenBSD puts jump tables directly inline with the code, which is 322 a bit annoying. They use a specific format of p2align directives 323 around them, so we use that as a signal. */ 324 325 if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) && 326 isdigit(line[10]) && line[11] == '\n') { 327 328 skip_next_label = 1; 329 330 } 331 332 if (!strncmp(line + 2, "text\n", 5) || 333 !strncmp(line + 2, "section\t.text", 13) || 334 !strncmp(line + 2, "section\t__TEXT,__text", 21) || 335 !strncmp(line + 2, "section __TEXT,__text", 21)) { 336 337 instr_ok = 1; 338 continue; 339 340 } 341 342 if (!strncmp(line + 2, "section\t", 8) || 343 !strncmp(line + 2, "section ", 8) || !strncmp(line + 2, "bss\n", 4) || 344 !strncmp(line + 2, "data\n", 5)) { 345 346 instr_ok = 0; 347 continue; 348 349 } 350 351 } 352 353 /* Detect off-flavor assembly (rare, happens in gdb). When this is 354 encountered, we set skip_csect until the opposite directive is 355 seen, and we do not instrument. */ 356 357 if (strstr(line, ".code")) { 358 359 if (strstr(line, ".code32")) { skip_csect = use_64bit; } 360 if (strstr(line, ".code64")) { skip_csect = !use_64bit; } 361 362 } 363 364 /* Detect syntax changes, as could happen with hand-written assembly. 365 Skip Intel blocks, resume instrumentation when back to AT&T. */ 366 367 if (strstr(line, ".intel_syntax")) { skip_intel = 1; } 368 if (strstr(line, ".att_syntax")) { skip_intel = 0; } 369 370 /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */ 371 372 if (line[0] == '#' || line[1] == '#') { 373 374 if (strstr(line, "#APP")) { skip_app = 1; } 375 if (strstr(line, "#NO_APP")) { skip_app = 0; } 376 377 } 378 379 /* If we're in the right mood for instrumenting, check for function 380 names or conditional labels. This is a bit messy, but in essence, 381 we want to catch: 382 383 ^main: - function entry point (always instrumented) 384 ^.L0: - GCC branch label 385 ^.LBB0_0: - clang branch label (but only in clang mode) 386 ^\tjnz foo - conditional branches 387 388 ...but not: 389 390 ^# BB#0: - clang comments 391 ^ # BB#0: - ditto 392 ^.Ltmp0: - clang non-branch labels 393 ^.LC0 - GCC non-branch labels 394 ^.LBB0_0: - ditto (when in GCC mode) 395 ^\tjmp foo - non-conditional jumps 396 397 Additionally, clang and GCC on MacOS X follow a different convention 398 with no leading dots on labels, hence the weird maze of #ifdefs 399 later on. 400 401 */ 402 403 if (skip_intel || skip_app || skip_csect || !instr_ok || line[0] == '#' || 404 line[0] == ' ') { 405 406 continue; 407 408 } 409 410 /* Conditional branch instruction (jnz, etc). We append the instrumentation 411 right after the branch (to instrument the not-taken path) and at the 412 branch destination label (handled later on). */ 413 414 if (line[0] == '\t') { 415 416 if (line[1] == 'j' && line[2] != 'm' && R(100) < (long)inst_ratio) { 417 418 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, 419 R(MAP_SIZE)); 420 421 ins_lines++; 422 423 } 424 425 continue; 426 427 } 428 429 /* Label of some sort. This may be a branch destination, but we need to 430 read carefully and account for several different formatting 431 conventions. */ 432 433 #ifdef __APPLE__ 434 435 /* Apple: L<whatever><digit>: */ 436 437 if ((colon_pos = strstr(line, ":"))) { 438 439 if (line[0] == 'L' && isdigit(*(colon_pos - 1))) { 440 441 #else 442 443 /* Everybody else: .L<whatever>: */ 444 445 if (strstr(line, ":")) { 446 447 if (line[0] == '.') { 448 449 #endif /* __APPLE__ */ 450 451 /* .L0: or LBB0_0: style jump destination */ 452 453 #ifdef __APPLE__ 454 455 /* Apple: L<num> / LBB<num> */ 456 457 if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) && 458 R(100) < (long)inst_ratio) { 459 460 #else 461 462 /* Apple: .L<num> / .LBB<num> */ 463 464 if ((isdigit(line[2]) || 465 (clang_mode && !strncmp(line + 1, "LBB", 3))) && 466 R(100) < (long)inst_ratio) { 467 468 #endif /* __APPLE__ */ 469 470 /* An optimization is possible here by adding the code only if the 471 label is mentioned in the code in contexts other than call / jmp. 472 That said, this complicates the code by requiring two-pass 473 processing (messy with stdin), and results in a speed gain 474 typically under 10%, because compilers are generally pretty good 475 about not generating spurious intra-function jumps. 476 477 We use deferred output chiefly to avoid disrupting 478 .Lfunc_begin0-style exception handling calculations (a problem on 479 MacOS X). */ 480 481 if (!skip_next_label) { 482 483 instrument_next = 1; 484 485 } else { 486 487 skip_next_label = 0; 488 489 } 490 491 } 492 493 } else { 494 495 /* Function label (always instrumented, deferred mode). */ 496 497 instrument_next = 1; 498 499 } 500 501 } 502 503 } 504 505 if (ins_lines) { fputs(use_64bit ? main_payload_64 : main_payload_32, outf); } 506 507 if (input_file) { fclose(inf); } 508 fclose(outf); 509 510 if (!be_quiet) { 511 512 if (!ins_lines) { 513 514 WARNF("No instrumentation targets found%s.", 515 pass_thru ? " (pass-thru mode)" : ""); 516 517 } else { 518 519 char modeline[100]; 520 snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", 521 getenv("AFL_HARDEN") ? "hardened" : "non-hardened", 522 getenv("AFL_USE_ASAN") ? ", ASAN" : "", 523 getenv("AFL_USE_MSAN") ? ", MSAN" : "", 524 getenv("AFL_USE_UBSAN") ? ", UBSAN" : "", 525 getenv("AFL_USE_LSAN") ? ", LSAN" : ""); 526 527 OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", ins_lines, 528 use_64bit ? "64" : "32", modeline, inst_ratio); 529 530 } 531 532 } 533 534 } 535 536 /* Main entry point */ 537 538 int main(int argc, char **argv) { 539 540 s32 pid; 541 u32 rand_seed, i, j; 542 int status; 543 u8 *inst_ratio_str = getenv("AFL_INST_RATIO"); 544 545 struct timeval tv; 546 struct timezone tz; 547 548 clang_mode = !!getenv(CLANG_ENV_VAR); 549 550 if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { 551 552 SAYF(cCYA "afl-as" VERSION cRST " by Michal Zalewski\n"); 553 554 } else { 555 556 be_quiet = 1; 557 558 } 559 560 if (argc < 2 || (argc == 2 && strcmp(argv[1], "-h") == 0)) { 561 562 fprintf( 563 stdout, 564 "afl-as" VERSION 565 " by Michal Zalewski\n" 566 "\n%s [-h]\n\n" 567 "This is a helper application for afl-fuzz. It is a wrapper around GNU " 568 "'as',\n" 569 "executed by the toolchain whenever using afl-gcc or afl-clang. You " 570 "probably\n" 571 "don't want to run this program directly.\n\n" 572 573 "Rarely, when dealing with extremely complex projects, it may be " 574 "advisable\n" 575 "to set AFL_INST_RATIO to a value less than 100 in order to reduce " 576 "the\n" 577 "odds of instrumenting every discovered branch.\n\n" 578 "Environment variables used:\n" 579 "AFL_AS: path to assembler to use for instrumented files\n" 580 "AFL_CC: fall back path to assembler\n" 581 "AFL_CXX: fall back path to assembler\n" 582 "TMPDIR: directory to use for temporary files\n" 583 "TEMP: fall back path to directory for temporary files\n" 584 "TMP: fall back path to directory for temporary files\n" 585 "AFL_INST_RATIO: user specified instrumentation ratio\n" 586 "AFL_QUIET: suppress verbose output\n" 587 "AFL_KEEP_ASSEMBLY: leave instrumented assembly files\n" 588 "AFL_AS_FORCE_INSTRUMENT: force instrumentation for asm sources\n" 589 "AFL_HARDEN, AFL_USE_ASAN, AFL_USE_MSAN, AFL_USE_UBSAN, AFL_USE_LSAN:\n" 590 " used in the instrumentation summary message\n", 591 argv[0]); 592 593 exit(1); 594 595 } 596 597 gettimeofday(&tv, &tz); 598 599 rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); 600 // in fast systems where pids can repeat in the same seconds we need this 601 for (i = 1; (s32)i < argc; i++) 602 for (j = 0; j < strlen(argv[i]); j++) 603 rand_seed += argv[i][j]; 604 605 srandom(rand_seed); 606 607 edit_params(argc, argv); 608 609 if (inst_ratio_str) { 610 611 if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) { 612 613 FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)"); 614 615 } 616 617 } 618 619 if (getenv(AS_LOOP_ENV_VAR)) { 620 621 FATAL("Endless loop when calling 'as' (remove '.' from your PATH)"); 622 623 } 624 625 setenv(AS_LOOP_ENV_VAR, "1", 1); 626 627 /* When compiling with ASAN, we don't have a particularly elegant way to skip 628 ASAN-specific branches. But we can probabilistically compensate for 629 that... */ 630 631 if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) { 632 633 sanitizer = 1; 634 if (!getenv("AFL_INST_RATIO")) { inst_ratio /= 3; } 635 636 } 637 638 if (!just_version) { add_instrumentation(); } 639 640 if (!(pid = fork())) { 641 642 execvp(as_params[0], (char **)as_params); 643 FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]); 644 645 } 646 647 if (pid < 0) { PFATAL("fork() failed"); } 648 649 if (waitpid(pid, &status, 0) <= 0) { PFATAL("waitpid() failed"); } 650 651 if (!getenv("AFL_KEEP_ASSEMBLY")) { unlink(modified_file); } 652 653 exit(WEXITSTATUS(status)); 654 655 } 656 657