1 /* diff - compare files line by line 2 3 Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007, 4 2009-2011 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #define GDIFF_MAIN 22 #include "diff.h" 23 #include <assert.h> 24 #include "paths.h" 25 #include <c-stack.h> 26 #include <dirname.h> 27 #include <error.h> 28 #include <exclude.h> 29 #include <exitfail.h> 30 #include <filenamecat.h> 31 #include <file-type.h> 32 #include <fnmatch.h> 33 #include <getopt.h> 34 #include <hard-locale.h> 35 #include <prepargs.h> 36 #include <progname.h> 37 #include <sh-quote.h> 38 #include <stat-time.h> 39 #include <timespec.h> 40 #include <version-etc.h> 41 #include <xalloc.h> 42 #include <xfreopen.h> 43 44 /* The official name of this program (e.g., no `g' prefix). */ 45 #define PROGRAM_NAME "diff" 46 47 #define AUTHORS \ 48 proper_name ("Paul Eggert"), \ 49 proper_name ("Mike Haertel"), \ 50 proper_name ("David Hayes"), \ 51 proper_name ("Richard Stallman"), \ 52 proper_name ("Len Tower") 53 54 #ifndef GUTTER_WIDTH_MINIMUM 55 # define GUTTER_WIDTH_MINIMUM 3 56 #endif 57 58 struct regexp_list 59 { 60 char *regexps; /* chars representing disjunction of the regexps */ 61 size_t len; /* chars used in `regexps' */ 62 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 63 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 64 struct re_pattern_buffer *buf; 65 }; 66 67 static int compare_files (struct comparison const *, char const *, char const *); 68 static void add_regexp (struct regexp_list *, char const *); 69 static void summarize_regexp_list (struct regexp_list *); 70 static void specify_style (enum output_style); 71 static void specify_value (char const **, char const *, char const *); 72 static void try_help (char const *, char const *) __attribute__((noreturn)); 73 static void check_stdout (void); 74 static void usage (void); 75 76 /* If comparing directories, compare their common subdirectories 77 recursively. */ 78 static bool recursive; 79 80 /* In context diffs, show previous lines that match these regexps. */ 81 static struct regexp_list function_regexp_list; 82 83 /* Ignore changes affecting only lines that match these regexps. */ 84 static struct regexp_list ignore_regexp_list; 85 86 #if O_BINARY 87 /* Use binary I/O when reading and writing data (--binary). 88 On POSIX hosts, this has no effect. */ 89 static bool binary; 90 #else 91 enum { binary = true }; 92 #endif 93 94 /* When comparing directories, if a file appears only in one 95 directory, treat it as present but empty in the other (-N). 96 Then `patch' would create the file with appropriate contents. */ 97 static bool new_file; 98 99 /* When comparing directories, if a file appears only in the second 100 directory of the two, treat it as present but empty in the other 101 (--unidirectional-new-file). 102 Then `patch' would create the file with appropriate contents. */ 103 static bool unidirectional_new_file; 104 105 /* Report files compared that are the same (-s). 106 Normally nothing is output when that happens. */ 107 static bool report_identical_files; 108 109 static char const shortopts[] = 110 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ"; 111 112 /* Values for long options that do not have single-letter equivalents. */ 113 enum 114 { 115 BINARY_OPTION = CHAR_MAX + 1, 116 FROM_FILE_OPTION, 117 HELP_OPTION, 118 HORIZON_LINES_OPTION, 119 IGNORE_FILE_NAME_CASE_OPTION, 120 INHIBIT_HUNK_MERGE_OPTION, 121 LEFT_COLUMN_OPTION, 122 LINE_FORMAT_OPTION, 123 NO_IGNORE_FILE_NAME_CASE_OPTION, 124 NORMAL_OPTION, 125 SDIFF_MERGE_ASSIST_OPTION, 126 STRIP_TRAILING_CR_OPTION, 127 SUPPRESS_BLANK_EMPTY_OPTION, 128 SUPPRESS_COMMON_LINES_OPTION, 129 TABSIZE_OPTION, 130 TO_FILE_OPTION, 131 132 /* These options must be in sequence. */ 133 UNCHANGED_LINE_FORMAT_OPTION, 134 OLD_LINE_FORMAT_OPTION, 135 NEW_LINE_FORMAT_OPTION, 136 137 /* These options must be in sequence. */ 138 UNCHANGED_GROUP_FORMAT_OPTION, 139 OLD_GROUP_FORMAT_OPTION, 140 NEW_GROUP_FORMAT_OPTION, 141 CHANGED_GROUP_FORMAT_OPTION 142 }; 143 144 static char const group_format_option[][sizeof "--unchanged-group-format"] = 145 { 146 "--unchanged-group-format", 147 "--old-group-format", 148 "--new-group-format", 149 "--changed-group-format" 150 }; 151 152 static char const line_format_option[][sizeof "--unchanged-line-format"] = 153 { 154 "--unchanged-line-format", 155 "--old-line-format", 156 "--new-line-format" 157 }; 158 159 static struct option const longopts[] = 160 { 161 {"binary", 0, 0, BINARY_OPTION}, 162 {"brief", 0, 0, 'q'}, 163 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 164 {"context", 2, 0, 'C'}, 165 {"ed", 0, 0, 'e'}, 166 {"exclude", 1, 0, 'x'}, 167 {"exclude-from", 1, 0, 'X'}, 168 {"expand-tabs", 0, 0, 't'}, 169 {"forward-ed", 0, 0, 'f'}, 170 {"from-file", 1, 0, FROM_FILE_OPTION}, 171 {"help", 0, 0, HELP_OPTION}, 172 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 173 {"ifdef", 1, 0, 'D'}, 174 {"ignore-all-space", 0, 0, 'w'}, 175 {"ignore-blank-lines", 0, 0, 'B'}, 176 {"ignore-case", 0, 0, 'i'}, 177 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 178 {"ignore-matching-lines", 1, 0, 'I'}, 179 {"ignore-space-change", 0, 0, 'b'}, 180 {"ignore-tab-expansion", 0, 0, 'E'}, 181 {"ignore-trailing-space", 0, 0, 'Z'}, 182 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 183 {"initial-tab", 0, 0, 'T'}, 184 {"label", 1, 0, 'L'}, 185 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 186 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 187 {"minimal", 0, 0, 'd'}, 188 {"new-file", 0, 0, 'N'}, 189 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 190 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 191 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 192 {"normal", 0, 0, NORMAL_OPTION}, 193 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 194 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 195 {"paginate", 0, 0, 'l'}, 196 {"rcs", 0, 0, 'n'}, 197 {"recursive", 0, 0, 'r'}, 198 {"report-identical-files", 0, 0, 's'}, 199 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 200 {"show-c-function", 0, 0, 'p'}, 201 {"show-function-line", 1, 0, 'F'}, 202 {"side-by-side", 0, 0, 'y'}, 203 {"speed-large-files", 0, 0, 'H'}, 204 {"starting-file", 1, 0, 'S'}, 205 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 206 {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION}, 207 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 208 {"tabsize", 1, 0, TABSIZE_OPTION}, 209 {"text", 0, 0, 'a'}, 210 {"to-file", 1, 0, TO_FILE_OPTION}, 211 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 212 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 213 {"unidirectional-new-file", 0, 0, 'P'}, 214 {"unified", 2, 0, 'U'}, 215 {"version", 0, 0, 'v'}, 216 {"width", 1, 0, 'W'}, 217 {0, 0, 0, 0} 218 }; 219 220 /* Return a string containing the command options with which diff was invoked. 221 Spaces appear between what were separate ARGV-elements. 222 There is a space at the beginning but none at the end. 223 If there were no options, the result is an empty string. 224 225 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 226 the length of that vector. */ 227 228 static char * 229 option_list (char **optionvec, int count) 230 { 231 int i; 232 size_t size = 1; 233 char *result; 234 char *p; 235 236 for (i = 0; i < count; i++) 237 size += 1 + shell_quote_length (optionvec[i]); 238 239 p = result = xmalloc (size); 240 241 for (i = 0; i < count; i++) 242 { 243 *p++ = ' '; 244 p = shell_quote_copy (p, optionvec[i]); 245 } 246 247 *p = '\0'; 248 return result; 249 } 250 251 252 /* Return an option value suitable for add_exclude. */ 253 254 static int 255 exclude_options (void) 256 { 257 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 258 } 259 260 int 261 main (int argc, char **argv) 262 { 263 int exit_status = EXIT_SUCCESS; 264 int c; 265 int i; 266 int prev = -1; 267 lin ocontext = -1; 268 bool explicit_context = false; 269 size_t width = 0; 270 bool show_c_function = false; 271 char const *from_file = NULL; 272 char const *to_file = NULL; 273 uintmax_t numval; 274 char *numend; 275 276 /* Do our initializations. */ 277 exit_failure = EXIT_TROUBLE; 278 initialize_main (&argc, &argv); 279 set_program_name (argv[0]); 280 setlocale (LC_ALL, ""); 281 textdomain (PACKAGE); 282 c_stack_action (0); 283 function_regexp_list.buf = &function_regexp; 284 ignore_regexp_list.buf = &ignore_regexp; 285 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); 286 excluded = new_exclude (); 287 288 /* Decode the options. */ 289 290 while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 291 { 292 switch (c) 293 { 294 case 0: 295 break; 296 297 case '0': 298 case '1': 299 case '2': 300 case '3': 301 case '4': 302 case '5': 303 case '6': 304 case '7': 305 case '8': 306 case '9': 307 if (! ISDIGIT (prev)) 308 ocontext = c - '0'; 309 else if (LIN_MAX / 10 < ocontext 310 || ((ocontext = 10 * ocontext + c - '0') < 0)) 311 ocontext = LIN_MAX; 312 break; 313 314 case 'a': 315 text = true; 316 break; 317 318 case 'b': 319 if (ignore_white_space < IGNORE_SPACE_CHANGE) 320 ignore_white_space = IGNORE_SPACE_CHANGE; 321 break; 322 323 case 'Z': 324 if (ignore_white_space < IGNORE_SPACE_CHANGE) 325 ignore_white_space |= IGNORE_TRAILING_SPACE; 326 break; 327 328 case 'B': 329 ignore_blank_lines = true; 330 break; 331 332 case 'C': 333 case 'U': 334 { 335 if (optarg) 336 { 337 numval = strtoumax (optarg, &numend, 10); 338 if (*numend) 339 try_help ("invalid context length `%s'", optarg); 340 if (LIN_MAX < numval) 341 numval = LIN_MAX; 342 } 343 else 344 numval = 3; 345 346 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 347 if (context < numval) 348 context = numval; 349 explicit_context = true; 350 } 351 break; 352 353 case 'c': 354 specify_style (OUTPUT_CONTEXT); 355 if (context < 3) 356 context = 3; 357 break; 358 359 case 'd': 360 minimal = true; 361 break; 362 363 case 'D': 364 specify_style (OUTPUT_IFDEF); 365 { 366 static char const C_ifdef_group_formats[] = 367 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 368 char *b = xmalloc (sizeof C_ifdef_group_formats 369 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 370 - 8 /* 5*"%%" + 3*"%c" */); 371 sprintf (b, C_ifdef_group_formats, 372 0, 373 optarg, optarg, 0, 374 optarg, optarg, 0, 375 optarg, optarg, optarg); 376 for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++) 377 { 378 specify_value (&group_format[i], b, "-D"); 379 b += strlen (b) + 1; 380 } 381 } 382 break; 383 384 case 'e': 385 specify_style (OUTPUT_ED); 386 break; 387 388 case 'E': 389 if (ignore_white_space < IGNORE_SPACE_CHANGE) 390 ignore_white_space |= IGNORE_TAB_EXPANSION; 391 break; 392 393 case 'f': 394 specify_style (OUTPUT_FORWARD_ED); 395 break; 396 397 case 'F': 398 add_regexp (&function_regexp_list, optarg); 399 break; 400 401 case 'h': 402 /* Split the files into chunks for faster processing. 403 Usually does not change the result. 404 405 This currently has no effect. */ 406 break; 407 408 case 'H': 409 speed_large_files = true; 410 break; 411 412 case 'i': 413 ignore_case = true; 414 break; 415 416 case 'I': 417 add_regexp (&ignore_regexp_list, optarg); 418 break; 419 420 case 'l': 421 if (!pr_program[0]) 422 try_help ("pagination not supported on this host", NULL); 423 paginate = true; 424 #ifdef SIGCHLD 425 /* Pagination requires forking and waiting, and 426 System V fork+wait does not work if SIGCHLD is ignored. */ 427 signal (SIGCHLD, SIG_DFL); 428 #endif 429 break; 430 431 case 'L': 432 if (!file_label[0]) 433 file_label[0] = optarg; 434 else if (!file_label[1]) 435 file_label[1] = optarg; 436 else 437 fatal ("too many file label options"); 438 break; 439 440 case 'n': 441 specify_style (OUTPUT_RCS); 442 break; 443 444 case 'N': 445 new_file = true; 446 break; 447 448 case 'p': 449 show_c_function = true; 450 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 451 break; 452 453 case 'P': 454 unidirectional_new_file = true; 455 break; 456 457 case 'q': 458 brief = true; 459 break; 460 461 case 'r': 462 recursive = true; 463 break; 464 465 case 's': 466 report_identical_files = true; 467 break; 468 469 case 'S': 470 specify_value (&starting_file, optarg, "-S"); 471 break; 472 473 case 't': 474 expand_tabs = true; 475 break; 476 477 case 'T': 478 initial_tab = true; 479 break; 480 481 case 'u': 482 specify_style (OUTPUT_UNIFIED); 483 if (context < 3) 484 context = 3; 485 break; 486 487 case 'v': 488 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, 489 AUTHORS, (char *) NULL); 490 check_stdout (); 491 return EXIT_SUCCESS; 492 493 case 'w': 494 ignore_white_space = IGNORE_ALL_SPACE; 495 break; 496 497 case 'x': 498 add_exclude (excluded, optarg, exclude_options ()); 499 break; 500 501 case 'X': 502 if (add_exclude_file (add_exclude, excluded, optarg, 503 exclude_options (), '\n')) 504 pfatal_with_name (optarg); 505 break; 506 507 case 'y': 508 specify_style (OUTPUT_SDIFF); 509 break; 510 511 case 'W': 512 numval = strtoumax (optarg, &numend, 10); 513 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 514 try_help ("invalid width `%s'", optarg); 515 if (width != numval) 516 { 517 if (width) 518 fatal ("conflicting width options"); 519 width = numval; 520 } 521 break; 522 523 case BINARY_OPTION: 524 #if O_BINARY 525 binary = true; 526 if (! isatty (STDOUT_FILENO)) 527 xfreopen (NULL, "wb", stdout); 528 #endif 529 break; 530 531 case FROM_FILE_OPTION: 532 specify_value (&from_file, optarg, "--from-file"); 533 break; 534 535 case HELP_OPTION: 536 usage (); 537 check_stdout (); 538 return EXIT_SUCCESS; 539 540 case HORIZON_LINES_OPTION: 541 numval = strtoumax (optarg, &numend, 10); 542 if (*numend) 543 try_help ("invalid horizon length `%s'", optarg); 544 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 545 break; 546 547 case IGNORE_FILE_NAME_CASE_OPTION: 548 ignore_file_name_case = true; 549 break; 550 551 case INHIBIT_HUNK_MERGE_OPTION: 552 /* This option is obsolete, but accept it for backward 553 compatibility. */ 554 break; 555 556 case LEFT_COLUMN_OPTION: 557 left_column = true; 558 break; 559 560 case LINE_FORMAT_OPTION: 561 specify_style (OUTPUT_IFDEF); 562 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) 563 specify_value (&line_format[i], optarg, "--line-format"); 564 break; 565 566 case NO_IGNORE_FILE_NAME_CASE_OPTION: 567 ignore_file_name_case = false; 568 break; 569 570 case NORMAL_OPTION: 571 specify_style (OUTPUT_NORMAL); 572 break; 573 574 case SDIFF_MERGE_ASSIST_OPTION: 575 specify_style (OUTPUT_SDIFF); 576 sdiff_merge_assist = true; 577 break; 578 579 case STRIP_TRAILING_CR_OPTION: 580 strip_trailing_cr = true; 581 break; 582 583 case SUPPRESS_BLANK_EMPTY_OPTION: 584 suppress_blank_empty = true; 585 break; 586 587 case SUPPRESS_COMMON_LINES_OPTION: 588 suppress_common_lines = true; 589 break; 590 591 case TABSIZE_OPTION: 592 numval = strtoumax (optarg, &numend, 10); 593 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 594 try_help ("invalid tabsize `%s'", optarg); 595 if (tabsize != numval) 596 { 597 if (tabsize) 598 fatal ("conflicting tabsize options"); 599 tabsize = numval; 600 } 601 break; 602 603 case TO_FILE_OPTION: 604 specify_value (&to_file, optarg, "--to-file"); 605 break; 606 607 case UNCHANGED_LINE_FORMAT_OPTION: 608 case OLD_LINE_FORMAT_OPTION: 609 case NEW_LINE_FORMAT_OPTION: 610 specify_style (OUTPUT_IFDEF); 611 c -= UNCHANGED_LINE_FORMAT_OPTION; 612 specify_value (&line_format[c], optarg, line_format_option[c]); 613 break; 614 615 case UNCHANGED_GROUP_FORMAT_OPTION: 616 case OLD_GROUP_FORMAT_OPTION: 617 case NEW_GROUP_FORMAT_OPTION: 618 case CHANGED_GROUP_FORMAT_OPTION: 619 specify_style (OUTPUT_IFDEF); 620 c -= UNCHANGED_GROUP_FORMAT_OPTION; 621 specify_value (&group_format[c], optarg, group_format_option[c]); 622 break; 623 624 default: 625 try_help (NULL, NULL); 626 } 627 prev = c; 628 } 629 630 if (output_style == OUTPUT_UNSPECIFIED) 631 { 632 if (show_c_function) 633 { 634 specify_style (OUTPUT_CONTEXT); 635 if (ocontext < 0) 636 context = 3; 637 } 638 else 639 specify_style (OUTPUT_NORMAL); 640 } 641 642 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 643 { 644 #if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \ 645 || defined HAVE_STRUCT_STAT_ST_SPARE1) 646 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 647 #else 648 time_format = "%Y-%m-%d %H:%M:%S %z"; 649 #endif 650 } 651 else 652 { 653 /* See POSIX 1003.1-2001 for this format. */ 654 time_format = "%a %b %e %T %Y"; 655 } 656 657 if (0 <= ocontext 658 && (output_style == OUTPUT_CONTEXT 659 || output_style == OUTPUT_UNIFIED) 660 && (context < ocontext 661 || (ocontext < context && ! explicit_context))) 662 context = ocontext; 663 664 if (! tabsize) 665 tabsize = 8; 666 if (! width) 667 width = 130; 668 669 { 670 /* Maximize first the half line width, and then the gutter width, 671 according to the following constraints: 672 673 1. Two half lines plus a gutter must fit in a line. 674 2. If the half line width is nonzero: 675 a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 676 b. If tabs are not expanded to spaces, 677 a half line plus a gutter is an integral number of tabs, 678 so that tabs in the right column line up. */ 679 680 intmax_t t = expand_tabs ? 1 : tabsize; 681 intmax_t w = width; 682 intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 683 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 684 sdiff_column2_offset = sdiff_half_width ? off : w; 685 } 686 687 /* Make the horizon at least as large as the context, so that 688 shift_boundaries has more freedom to shift the first and last hunks. */ 689 if (horizon_lines < context) 690 horizon_lines = context; 691 692 summarize_regexp_list (&function_regexp_list); 693 summarize_regexp_list (&ignore_regexp_list); 694 695 if (output_style == OUTPUT_IFDEF) 696 { 697 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) 698 if (!line_format[i]) 699 line_format[i] = "%l\n"; 700 if (!group_format[OLD]) 701 group_format[OLD] 702 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 703 if (!group_format[NEW]) 704 group_format[NEW] 705 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 706 if (!group_format[UNCHANGED]) 707 group_format[UNCHANGED] = "%="; 708 if (!group_format[CHANGED]) 709 group_format[CHANGED] = concat (group_format[OLD], 710 group_format[NEW], ""); 711 } 712 713 no_diff_means_no_output = 714 (output_style == OUTPUT_IFDEF ? 715 (!*group_format[UNCHANGED] 716 || (STREQ (group_format[UNCHANGED], "%=") 717 && !*line_format[UNCHANGED])) 718 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 719 720 files_can_be_treated_as_binary = 721 (brief & binary 722 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 723 | (ignore_regexp_list.regexps || ignore_white_space))); 724 725 switch_string = option_list (argv + 1, optind - 1); 726 727 if (from_file) 728 { 729 if (to_file) 730 fatal ("--from-file and --to-file both specified"); 731 else 732 for (; optind < argc; optind++) 733 { 734 int status = compare_files (NULL, from_file, argv[optind]); 735 if (exit_status < status) 736 exit_status = status; 737 } 738 } 739 else 740 { 741 if (to_file) 742 for (; optind < argc; optind++) 743 { 744 int status = compare_files (NULL, argv[optind], to_file); 745 if (exit_status < status) 746 exit_status = status; 747 } 748 else 749 { 750 if (argc - optind != 2) 751 { 752 if (argc - optind < 2) 753 try_help ("missing operand after `%s'", argv[argc - 1]); 754 else 755 try_help ("extra operand `%s'", argv[optind + 2]); 756 } 757 758 exit_status = compare_files (NULL, argv[optind], argv[optind + 1]); 759 } 760 } 761 762 /* Print any messages that were saved up for last. */ 763 print_message_queue (); 764 765 check_stdout (); 766 exit (exit_status); 767 return exit_status; 768 } 769 770 /* Append to REGLIST the regexp PATTERN. */ 771 772 static void 773 add_regexp (struct regexp_list *reglist, char const *pattern) 774 { 775 size_t patlen = strlen (pattern); 776 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 777 778 if (m != 0) 779 error (0, 0, "%s: %s", pattern, m); 780 else 781 { 782 char *regexps = reglist->regexps; 783 size_t len = reglist->len; 784 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 785 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 786 size_t size = reglist->size; 787 788 if (size <= newlen) 789 { 790 if (!size) 791 size = 1; 792 793 do size *= 2; 794 while (size <= newlen); 795 796 reglist->size = size; 797 reglist->regexps = regexps = xrealloc (regexps, size); 798 } 799 if (multiple_regexps) 800 { 801 regexps[len++] = '\\'; 802 regexps[len++] = '|'; 803 } 804 memcpy (regexps + len, pattern, patlen + 1); 805 } 806 } 807 808 /* Ensure that REGLIST represents the disjunction of its regexps. 809 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 810 811 static void 812 summarize_regexp_list (struct regexp_list *reglist) 813 { 814 if (reglist->regexps) 815 { 816 /* At least one regexp was specified. Allocate a fastmap for it. */ 817 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 818 if (reglist->multiple_regexps) 819 { 820 /* Compile the disjunction of the regexps. 821 (If just one regexp was specified, it is already compiled.) */ 822 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 823 reglist->buf); 824 if (m) 825 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 826 } 827 } 828 } 829 830 static void 831 try_help (char const *reason_msgid, char const *operand) 832 { 833 if (reason_msgid) 834 error (0, 0, _(reason_msgid), operand); 835 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 836 program_name); 837 abort (); 838 } 839 840 static void 841 check_stdout (void) 842 { 843 if (ferror (stdout)) 844 fatal ("write failed"); 845 else if (fclose (stdout) != 0) 846 pfatal_with_name (_("standard output")); 847 } 848 849 static char const * const option_help_msgid[] = { 850 N_(" --normal output a normal diff (the default)"), 851 N_("-q, --brief report only when files differ"), 852 N_("-s, --report-identical-files report when two files are the same"), 853 N_("-c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context"), 854 N_("-u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context"), 855 N_("-e, --ed output an ed script"), 856 N_("-n, --rcs output an RCS format diff"), 857 N_("-y, --side-by-side output in two columns"), 858 N_("-W, --width=NUM output at most NUM (default 130) print columns"), 859 N_(" --left-column output only the left column of common lines"), 860 N_(" --suppress-common-lines do not output common lines"), 861 "", 862 N_("-p, --show-c-function show which C function each change is in"), 863 N_("-F, --show-function-line=RE show the most recent line matching RE"), 864 N_(" --label LABEL use LABEL instead of file name\n" 865 " (can be repeated)"), 866 "", 867 N_("-t, --expand-tabs expand tabs to spaces in output"), 868 N_("-T, --initial-tab make tabs line up by prepending a tab"), 869 N_(" --tabsize=NUM tab stops every NUM (default 8) print columns"), 870 N_(" --suppress-blank-empty suppress space or tab before empty output lines"), 871 N_("-l, --paginate pass output through `pr' to paginate it"), 872 "", 873 N_("-r, --recursive recursively compare any subdirectories found"), 874 N_("-N, --new-file treat absent files as empty"), 875 N_(" --unidirectional-new-file treat absent first files as empty"), 876 N_(" --ignore-file-name-case ignore case when comparing file names"), 877 N_(" --no-ignore-file-name-case consider case when comparing file names"), 878 N_("-x, --exclude=PAT exclude files that match PAT"), 879 N_("-X, --exclude-from=FILE exclude files that match any pattern in FILE"), 880 N_("-S, --starting-file=FILE start with FILE when comparing directories"), 881 N_(" --from-file=FILE1 compare FILE1 to all operands;\n" 882 " FILE1 can be a directory"), 883 N_(" --to-file=FILE2 compare all operands to FILE2;\n" 884 " FILE2 can be a directory"), 885 "", 886 N_("-i, --ignore-case ignore case differences in file contents"), 887 N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"), 888 N_("-Z, --ignore-trailing-space ignore white space at line end"), 889 N_("-b, --ignore-space-change ignore changes in the amount of white space"), 890 N_("-w, --ignore-all-space ignore all white space"), 891 N_("-B, --ignore-blank-lines ignore changes whose lines are all blank"), 892 N_("-I, --ignore-matching-lines=RE ignore changes whose lines all match RE"), 893 "", 894 N_("-a, --text treat all files as text"), 895 N_(" --strip-trailing-cr strip trailing carriage return on input"), 896 #if O_BINARY 897 N_(" --binary read and write data in binary mode"), 898 #endif 899 "", 900 N_("-D, --ifdef=NAME output merged file with `#ifdef NAME' diffs"), 901 N_(" --GTYPE-group-format=GFMT format GTYPE input groups with GFMT"), 902 N_(" --line-format=LFMT format all input lines with LFMT"), 903 N_(" --LTYPE-line-format=LFMT format LTYPE input lines with LFMT"), 904 N_(" These format options provide fine-grained control over the output\n" 905 " of diff, generalizing -D/--ifdef."), 906 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 907 N_(" GFMT (only) may contain:\n\ 908 %< lines from FILE1\n\ 909 %> lines from FILE2\n\ 910 %= lines common to FILE1 and FILE2\n\ 911 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 912 LETTERs are as follows for new group, lower case for old group:\n\ 913 F first line number\n\ 914 L last line number\n\ 915 N number of lines = L-F+1\n\ 916 E F-1\n\ 917 M L+1\n\ 918 %(A=B?T:E) if A equals B then T else E"), 919 N_(" LFMT (only) may contain:\n\ 920 %L contents of line\n\ 921 %l contents of line, excluding any trailing newline\n\ 922 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 923 N_(" Both GFMT and LFMT may contain:\n\ 924 %% %\n\ 925 %c'C' the single character C\n\ 926 %c'\\OOO' the character with octal code OOO\n\ 927 C the character C (other characters represent themselves)"), 928 "", 929 N_("-d, --minimal try hard to find a smaller set of changes"), 930 N_(" --horizon-lines=NUM keep NUM lines of the common prefix and suffix"), 931 N_(" --speed-large-files assume large files and many scattered small changes"), 932 "", 933 N_(" --help display this help and exit"), 934 N_("-v, --version output version information and exit"), 935 "", 936 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 937 N_("If --from-file or --to-file is given, there are no restrictions on FILE(s)."), 938 N_("If a FILE is `-', read standard input."), 939 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), 940 0 941 }; 942 943 static void 944 usage (void) 945 { 946 char const * const *p; 947 948 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 949 printf ("%s\n\n", _("Compare FILES line by line.")); 950 951 fputs (_("\ 952 Mandatory arguments to long options are mandatory for short options too.\n\ 953 "), stdout); 954 955 for (p = option_help_msgid; *p; p++) 956 { 957 if (!**p) 958 putchar ('\n'); 959 else 960 { 961 char const *msg = _(*p); 962 char const *nl; 963 while ((nl = strchr (msg, '\n'))) 964 { 965 int msglen = nl + 1 - msg; 966 printf (" %.*s", msglen, msg); 967 msg = nl + 1; 968 } 969 970 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 971 } 972 } 973 emit_bug_reporting_address (); 974 } 975 976 /* Set VAR to VALUE, reporting an OPTION error if this is a 977 conflict. */ 978 static void 979 specify_value (char const **var, char const *value, char const *option) 980 { 981 if (*var && ! STREQ (*var, value)) 982 { 983 error (0, 0, _("conflicting %s option value `%s'"), option, value); 984 try_help (NULL, NULL); 985 } 986 *var = value; 987 } 988 989 /* Set the output style to STYLE, diagnosing conflicts. */ 990 static void 991 specify_style (enum output_style style) 992 { 993 if (output_style != style) 994 { 995 if (output_style != OUTPUT_UNSPECIFIED) 996 try_help ("conflicting output style options", NULL); 997 output_style = style; 998 } 999 } 1000 1001 /* Set the last-modified time of *ST to be the current time. */ 1002 1003 static void 1004 set_mtime_to_now (struct stat *st) 1005 { 1006 #ifdef STAT_TIMESPEC 1007 gettime (&STAT_TIMESPEC (st, st_mtim)); 1008 #else 1009 struct timespec t; 1010 gettime (&t); 1011 st->st_mtime = t.tv_sec; 1012 # if defined STAT_TIMESPEC_NS 1013 STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec; 1014 # elif defined HAVE_STRUCT_STAT_ST_SPARE1 1015 st->st_spare1 = t.tv_nsec / 1000; 1016 # endif 1017 #endif 1018 } 1019 1020 /* Compare two files (or dirs) with parent comparison PARENT 1021 and names NAME0 and NAME1. 1022 (If PARENT is null, then the first name is just NAME0, etc.) 1023 This is self-contained; it opens the files and closes them. 1024 1025 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1026 different, EXIT_TROUBLE if there is a problem opening them. */ 1027 1028 static int 1029 compare_files (struct comparison const *parent, 1030 char const *name0, 1031 char const *name1) 1032 { 1033 struct comparison cmp; 1034 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1035 register int f; 1036 int status = EXIT_SUCCESS; 1037 bool same_files; 1038 char *free0; 1039 char *free1; 1040 1041 /* If this is directory comparison, perhaps we have a file 1042 that exists only in one of the directories. 1043 If so, just print a message to that effect. */ 1044 1045 if (! ((name0 && name1) 1046 || (unidirectional_new_file && name1) 1047 || new_file)) 1048 { 1049 char const *name = name0 ? name0 : name1; 1050 char const *dir = parent->file[!name0].name; 1051 1052 /* See POSIX 1003.1-2001 for this format. */ 1053 message ("Only in %s: %s\n", dir, name); 1054 1055 /* Return EXIT_FAILURE so that diff_dirs will return 1056 EXIT_FAILURE ("some files differ"). */ 1057 return EXIT_FAILURE; 1058 } 1059 1060 memset (cmp.file, 0, sizeof cmp.file); 1061 cmp.parent = parent; 1062 1063 /* cmp.file[f].desc markers */ 1064 #define NONEXISTENT (-1) /* nonexistent file */ 1065 #define UNOPENED (-2) /* unopened file (e.g. directory) */ 1066 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1067 1068 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1069 1070 cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT; 1071 cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT; 1072 1073 /* Now record the full name of each file, including nonexistent ones. */ 1074 1075 if (!name0) 1076 name0 = name1; 1077 if (!name1) 1078 name1 = name0; 1079 1080 if (!parent) 1081 { 1082 free0 = NULL; 1083 free1 = NULL; 1084 cmp.file[0].name = name0; 1085 cmp.file[1].name = name1; 1086 } 1087 else 1088 { 1089 cmp.file[0].name = free0 1090 = file_name_concat (parent->file[0].name, name0, NULL); 1091 cmp.file[1].name = free1 1092 = file_name_concat (parent->file[1].name, name1, NULL); 1093 } 1094 1095 /* Stat the files. */ 1096 1097 for (f = 0; f < 2; f++) 1098 { 1099 if (cmp.file[f].desc != NONEXISTENT) 1100 { 1101 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1102 { 1103 cmp.file[f].desc = cmp.file[0].desc; 1104 cmp.file[f].stat = cmp.file[0].stat; 1105 } 1106 else if (STREQ (cmp.file[f].name, "-")) 1107 { 1108 cmp.file[f].desc = STDIN_FILENO; 1109 if (O_BINARY && binary && ! isatty (STDIN_FILENO)) 1110 xfreopen (NULL, "rb", stdin); 1111 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1112 cmp.file[f].desc = ERRNO_ENCODE (errno); 1113 else 1114 { 1115 if (S_ISREG (cmp.file[f].stat.st_mode)) 1116 { 1117 off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR); 1118 if (pos < 0) 1119 cmp.file[f].desc = ERRNO_ENCODE (errno); 1120 else 1121 cmp.file[f].stat.st_size = 1122 MAX (0, cmp.file[f].stat.st_size - pos); 1123 } 1124 1125 /* POSIX 1003.1-2001 requires current time for 1126 stdin. */ 1127 set_mtime_to_now (&cmp.file[f].stat); 1128 } 1129 } 1130 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1131 cmp.file[f].desc = ERRNO_ENCODE (errno); 1132 } 1133 } 1134 1135 /* Mark files as nonexistent as needed for -N and -P, if they are 1136 inaccessible empty regular files (the kind of files that 'patch' 1137 creates to indicate nonexistent backups), or if they are 1138 top-level files that do not exist but their counterparts do 1139 exist. */ 1140 for (f = 0; f < 2; f++) 1141 if ((new_file || (f == 0 && unidirectional_new_file)) 1142 && (cmp.file[f].desc == UNOPENED 1143 ? (S_ISREG (cmp.file[f].stat.st_mode) 1144 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) 1145 && cmp.file[f].stat.st_size == 0) 1146 : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT) 1147 && ! parent 1148 && cmp.file[1 - f].desc == UNOPENED))) 1149 cmp.file[f].desc = NONEXISTENT; 1150 1151 for (f = 0; f < 2; f++) 1152 if (cmp.file[f].desc == NONEXISTENT) 1153 { 1154 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat); 1155 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1156 } 1157 1158 for (f = 0; f < 2; f++) 1159 { 1160 int e = ERRNO_DECODE (cmp.file[f].desc); 1161 if (0 <= e) 1162 { 1163 errno = e; 1164 perror_with_name (cmp.file[f].name); 1165 status = EXIT_TROUBLE; 1166 } 1167 } 1168 1169 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1170 { 1171 /* If one is a directory, and it was specified in the command line, 1172 use the file in that dir with the other file's basename. */ 1173 1174 int fnm_arg = DIR_P (0); 1175 int dir_arg = 1 - fnm_arg; 1176 char const *fnm = cmp.file[fnm_arg].name; 1177 char const *dir = cmp.file[dir_arg].name; 1178 char const *filename = cmp.file[dir_arg].name = free0 1179 = find_dir_file_pathname (dir, last_component (fnm)); 1180 1181 if (STREQ (fnm, "-")) 1182 fatal ("cannot compare `-' to a directory"); 1183 1184 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1185 { 1186 perror_with_name (filename); 1187 status = EXIT_TROUBLE; 1188 } 1189 } 1190 1191 if (status != EXIT_SUCCESS) 1192 { 1193 /* One of the files should exist but does not. */ 1194 } 1195 else if (cmp.file[0].desc == NONEXISTENT 1196 && cmp.file[1].desc == NONEXISTENT) 1197 { 1198 /* Neither file "exists", so there's nothing to compare. */ 1199 } 1200 else if ((same_files 1201 = (cmp.file[0].desc != NONEXISTENT 1202 && cmp.file[1].desc != NONEXISTENT 1203 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1204 && same_file_attributes (&cmp.file[0].stat, 1205 &cmp.file[1].stat))) 1206 && no_diff_means_no_output) 1207 { 1208 /* The two named files are actually the same physical file. 1209 We know they are identical without actually reading them. */ 1210 } 1211 else if (DIR_P (0) & DIR_P (1)) 1212 { 1213 if (output_style == OUTPUT_IFDEF) 1214 fatal ("-D option not supported with directories"); 1215 1216 /* If both are directories, compare the files in them. */ 1217 1218 if (parent && !recursive) 1219 { 1220 /* But don't compare dir contents one level down 1221 unless -r was specified. 1222 See POSIX 1003.1-2001 for this format. */ 1223 message ("Common subdirectories: %s and %s\n", 1224 cmp.file[0].name, cmp.file[1].name); 1225 } 1226 else 1227 status = diff_dirs (&cmp, compare_files); 1228 } 1229 else if ((DIR_P (0) | DIR_P (1)) 1230 || (parent 1231 && (! S_ISREG (cmp.file[0].stat.st_mode) 1232 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1233 { 1234 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1235 { 1236 /* We have a subdirectory that exists only in one directory. */ 1237 1238 if ((DIR_P (0) | DIR_P (1)) 1239 && recursive 1240 && (new_file 1241 || (unidirectional_new_file 1242 && cmp.file[0].desc == NONEXISTENT))) 1243 status = diff_dirs (&cmp, compare_files); 1244 else 1245 { 1246 char const *dir; 1247 1248 /* PARENT must be non-NULL here. */ 1249 assert (parent); 1250 dir = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1251 1252 /* See POSIX 1003.1-2001 for this format. */ 1253 message ("Only in %s: %s\n", dir, name0); 1254 1255 status = EXIT_FAILURE; 1256 } 1257 } 1258 else 1259 { 1260 /* We have two files that are not to be compared. */ 1261 1262 /* See POSIX 1003.1-2001 for this format. */ 1263 message5 ("File %s is a %s while file %s is a %s\n", 1264 file_label[0] ? file_label[0] : cmp.file[0].name, 1265 file_type (&cmp.file[0].stat), 1266 file_label[1] ? file_label[1] : cmp.file[1].name, 1267 file_type (&cmp.file[1].stat)); 1268 1269 /* This is a difference. */ 1270 status = EXIT_FAILURE; 1271 } 1272 } 1273 else if (files_can_be_treated_as_binary 1274 && S_ISREG (cmp.file[0].stat.st_mode) 1275 && S_ISREG (cmp.file[1].stat.st_mode) 1276 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size) 1277 { 1278 message ("Files %s and %s differ\n", 1279 file_label[0] ? file_label[0] : cmp.file[0].name, 1280 file_label[1] ? file_label[1] : cmp.file[1].name); 1281 status = EXIT_FAILURE; 1282 } 1283 else 1284 { 1285 /* Both exist and neither is a directory. */ 1286 1287 /* Open the files and record their descriptors. */ 1288 1289 int oflags = O_RDONLY | (binary ? O_BINARY : 0); 1290 1291 if (cmp.file[0].desc == UNOPENED) 1292 if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0) 1293 { 1294 perror_with_name (cmp.file[0].name); 1295 status = EXIT_TROUBLE; 1296 } 1297 if (cmp.file[1].desc == UNOPENED) 1298 { 1299 if (same_files) 1300 cmp.file[1].desc = cmp.file[0].desc; 1301 else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0) 1302 { 1303 perror_with_name (cmp.file[1].name); 1304 status = EXIT_TROUBLE; 1305 } 1306 } 1307 1308 /* Compare the files, if no error was found. */ 1309 1310 if (status == EXIT_SUCCESS) 1311 status = diff_2_files (&cmp); 1312 1313 /* Close the file descriptors. */ 1314 1315 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1316 { 1317 perror_with_name (cmp.file[0].name); 1318 status = EXIT_TROUBLE; 1319 } 1320 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1321 && close (cmp.file[1].desc) != 0) 1322 { 1323 perror_with_name (cmp.file[1].name); 1324 status = EXIT_TROUBLE; 1325 } 1326 } 1327 1328 /* Now the comparison has been done, if no error prevented it, 1329 and STATUS is the value this function will return. */ 1330 1331 if (status == EXIT_SUCCESS) 1332 { 1333 if (report_identical_files && !DIR_P (0)) 1334 message ("Files %s and %s are identical\n", 1335 file_label[0] ? file_label[0] : cmp.file[0].name, 1336 file_label[1] ? file_label[1] : cmp.file[1].name); 1337 } 1338 else 1339 { 1340 /* Flush stdout so that the user sees differences immediately. 1341 This can hurt performance, unfortunately. */ 1342 if (fflush (stdout) != 0) 1343 pfatal_with_name (_("standard output")); 1344 } 1345 1346 free (free0); 1347 free (free1); 1348 1349 return status; 1350 } 1351