1 /* diff - compare files line by line 2 3 Copyright (C) 1988-1989, 1992-1994, 1996, 1998, 2001-2002, 2004, 2006-2007, 4 2009-2010 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #define GDIFF_MAIN 22 #include "diff.h" 23 #include <assert.h> 24 #include "paths.h" 25 #include <c-stack.h> 26 #include <dirname.h> 27 #include <error.h> 28 #include <exclude.h> 29 #include <exitfail.h> 30 #include <file-type.h> 31 #include <fnmatch.h> 32 #include <getopt.h> 33 #include <hard-locale.h> 34 #include <prepargs.h> 35 #include <progname.h> 36 #include <sh-quote.h> 37 #include <stat-time.h> 38 #include <timespec.h> 39 #include <version-etc.h> 40 #include <xalloc.h> 41 #include <xfreopen.h> 42 43 /* The official name of this program (e.g., no `g' prefix). */ 44 #define PROGRAM_NAME "diff" 45 46 #define AUTHORS \ 47 proper_name ("Paul Eggert"), \ 48 proper_name ("Mike Haertel"), \ 49 proper_name ("David Hayes"), \ 50 proper_name ("Richard Stallman"), \ 51 proper_name ("Len Tower") 52 53 #ifndef GUTTER_WIDTH_MINIMUM 54 # define GUTTER_WIDTH_MINIMUM 3 55 #endif 56 57 struct regexp_list 58 { 59 char *regexps; /* chars representing disjunction of the regexps */ 60 size_t len; /* chars used in `regexps' */ 61 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 62 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 63 struct re_pattern_buffer *buf; 64 }; 65 66 static int compare_files (struct comparison const *, char const *, char const *); 67 static void add_regexp (struct regexp_list *, char const *); 68 static void summarize_regexp_list (struct regexp_list *); 69 static void specify_style (enum output_style); 70 static void specify_value (char const **, char const *, char const *); 71 static void try_help (char const *, char const *) __attribute__((noreturn)); 72 static void check_stdout (void); 73 static void usage (void); 74 75 /* If comparing directories, compare their common subdirectories 76 recursively. */ 77 static bool recursive; 78 79 /* In context diffs, show previous lines that match these regexps. */ 80 static struct regexp_list function_regexp_list; 81 82 /* Ignore changes affecting only lines that match these regexps. */ 83 static struct regexp_list ignore_regexp_list; 84 85 #if O_BINARY 86 /* Use binary I/O when reading and writing data (--binary). 87 On POSIX hosts, this has no effect. */ 88 static bool binary; 89 #else 90 enum { binary = true }; 91 #endif 92 93 /* When comparing directories, if a file appears only in one 94 directory, treat it as present but empty in the other (-N). 95 Then `patch' would create the file with appropriate contents. */ 96 static bool new_file; 97 98 /* When comparing directories, if a file appears only in the second 99 directory of the two, treat it as present but empty in the other 100 (--unidirectional-new-file). 101 Then `patch' would create the file with appropriate contents. */ 102 static bool unidirectional_new_file; 103 104 /* Report files compared that are the same (-s). 105 Normally nothing is output when that happens. */ 106 static bool report_identical_files; 107 108 static char const shortopts[] = 109 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; 110 111 /* Values for long options that do not have single-letter equivalents. */ 112 enum 113 { 114 BINARY_OPTION = CHAR_MAX + 1, 115 FROM_FILE_OPTION, 116 HELP_OPTION, 117 HORIZON_LINES_OPTION, 118 IGNORE_FILE_NAME_CASE_OPTION, 119 INHIBIT_HUNK_MERGE_OPTION, 120 LEFT_COLUMN_OPTION, 121 LINE_FORMAT_OPTION, 122 NO_IGNORE_FILE_NAME_CASE_OPTION, 123 NORMAL_OPTION, 124 SDIFF_MERGE_ASSIST_OPTION, 125 STRIP_TRAILING_CR_OPTION, 126 SUPPRESS_BLANK_EMPTY_OPTION, 127 SUPPRESS_COMMON_LINES_OPTION, 128 TABSIZE_OPTION, 129 TO_FILE_OPTION, 130 131 /* These options must be in sequence. */ 132 UNCHANGED_LINE_FORMAT_OPTION, 133 OLD_LINE_FORMAT_OPTION, 134 NEW_LINE_FORMAT_OPTION, 135 136 /* These options must be in sequence. */ 137 UNCHANGED_GROUP_FORMAT_OPTION, 138 OLD_GROUP_FORMAT_OPTION, 139 NEW_GROUP_FORMAT_OPTION, 140 CHANGED_GROUP_FORMAT_OPTION 141 }; 142 143 static char const group_format_option[][sizeof "--unchanged-group-format"] = 144 { 145 "--unchanged-group-format", 146 "--old-group-format", 147 "--new-group-format", 148 "--changed-group-format" 149 }; 150 151 static char const line_format_option[][sizeof "--unchanged-line-format"] = 152 { 153 "--unchanged-line-format", 154 "--old-line-format", 155 "--new-line-format" 156 }; 157 158 static struct option const longopts[] = 159 { 160 {"binary", 0, 0, BINARY_OPTION}, 161 {"brief", 0, 0, 'q'}, 162 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 163 {"context", 2, 0, 'C'}, 164 {"ed", 0, 0, 'e'}, 165 {"exclude", 1, 0, 'x'}, 166 {"exclude-from", 1, 0, 'X'}, 167 {"expand-tabs", 0, 0, 't'}, 168 {"forward-ed", 0, 0, 'f'}, 169 {"from-file", 1, 0, FROM_FILE_OPTION}, 170 {"help", 0, 0, HELP_OPTION}, 171 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 172 {"ifdef", 1, 0, 'D'}, 173 {"ignore-all-space", 0, 0, 'w'}, 174 {"ignore-blank-lines", 0, 0, 'B'}, 175 {"ignore-case", 0, 0, 'i'}, 176 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 177 {"ignore-matching-lines", 1, 0, 'I'}, 178 {"ignore-space-change", 0, 0, 'b'}, 179 {"ignore-tab-expansion", 0, 0, 'E'}, 180 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 181 {"initial-tab", 0, 0, 'T'}, 182 {"label", 1, 0, 'L'}, 183 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 184 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 185 {"minimal", 0, 0, 'd'}, 186 {"new-file", 0, 0, 'N'}, 187 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 188 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 189 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 190 {"normal", 0, 0, NORMAL_OPTION}, 191 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 192 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 193 {"paginate", 0, 0, 'l'}, 194 {"rcs", 0, 0, 'n'}, 195 {"recursive", 0, 0, 'r'}, 196 {"report-identical-files", 0, 0, 's'}, 197 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 198 {"show-c-function", 0, 0, 'p'}, 199 {"show-function-line", 1, 0, 'F'}, 200 {"side-by-side", 0, 0, 'y'}, 201 {"speed-large-files", 0, 0, 'H'}, 202 {"starting-file", 1, 0, 'S'}, 203 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 204 {"suppress-blank-empty", 0, 0, SUPPRESS_BLANK_EMPTY_OPTION}, 205 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 206 {"tabsize", 1, 0, TABSIZE_OPTION}, 207 {"text", 0, 0, 'a'}, 208 {"to-file", 1, 0, TO_FILE_OPTION}, 209 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 210 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 211 {"unidirectional-new-file", 0, 0, 'P'}, 212 {"unified", 2, 0, 'U'}, 213 {"version", 0, 0, 'v'}, 214 {"width", 1, 0, 'W'}, 215 {0, 0, 0, 0} 216 }; 217 218 /* Return a string containing the command options with which diff was invoked. 219 Spaces appear between what were separate ARGV-elements. 220 There is a space at the beginning but none at the end. 221 If there were no options, the result is an empty string. 222 223 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 224 the length of that vector. */ 225 226 static char * 227 option_list (char **optionvec, int count) 228 { 229 int i; 230 size_t size = 1; 231 char *result; 232 char *p; 233 234 for (i = 0; i < count; i++) 235 size += 1 + shell_quote_length (optionvec[i]); 236 237 p = result = xmalloc (size); 238 239 for (i = 0; i < count; i++) 240 { 241 *p++ = ' '; 242 p = shell_quote_copy (p, optionvec[i]); 243 } 244 245 *p = '\0'; 246 return result; 247 } 248 249 250 /* Return an option value suitable for add_exclude. */ 251 252 static int 253 exclude_options (void) 254 { 255 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 256 } 257 258 int 259 main (int argc, char **argv) 260 { 261 int exit_status = EXIT_SUCCESS; 262 int c; 263 int i; 264 int prev = -1; 265 lin ocontext = -1; 266 bool explicit_context = false; 267 size_t width = 0; 268 bool show_c_function = false; 269 char const *from_file = NULL; 270 char const *to_file = NULL; 271 uintmax_t numval; 272 char *numend; 273 274 /* Do our initializations. */ 275 exit_failure = EXIT_TROUBLE; 276 initialize_main (&argc, &argv); 277 set_program_name (argv[0]); 278 setlocale (LC_ALL, ""); 279 textdomain (PACKAGE); 280 c_stack_action (0); 281 function_regexp_list.buf = &function_regexp; 282 ignore_regexp_list.buf = &ignore_regexp; 283 re_set_syntax (RE_SYNTAX_GREP); 284 excluded = new_exclude (); 285 286 /* Decode the options. */ 287 288 while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1) 289 { 290 switch (c) 291 { 292 case 0: 293 break; 294 295 case '0': 296 case '1': 297 case '2': 298 case '3': 299 case '4': 300 case '5': 301 case '6': 302 case '7': 303 case '8': 304 case '9': 305 if (! ISDIGIT (prev)) 306 ocontext = c - '0'; 307 else if (LIN_MAX / 10 < ocontext 308 || ((ocontext = 10 * ocontext + c - '0') < 0)) 309 ocontext = LIN_MAX; 310 break; 311 312 case 'a': 313 text = true; 314 break; 315 316 case 'b': 317 if (ignore_white_space < IGNORE_SPACE_CHANGE) 318 ignore_white_space = IGNORE_SPACE_CHANGE; 319 break; 320 321 case 'B': 322 ignore_blank_lines = true; 323 break; 324 325 case 'C': 326 case 'U': 327 { 328 if (optarg) 329 { 330 numval = strtoumax (optarg, &numend, 10); 331 if (*numend) 332 try_help ("invalid context length `%s'", optarg); 333 if (LIN_MAX < numval) 334 numval = LIN_MAX; 335 } 336 else 337 numval = 3; 338 339 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 340 if (context < numval) 341 context = numval; 342 explicit_context = true; 343 } 344 break; 345 346 case 'c': 347 specify_style (OUTPUT_CONTEXT); 348 if (context < 3) 349 context = 3; 350 break; 351 352 case 'd': 353 minimal = true; 354 break; 355 356 case 'D': 357 specify_style (OUTPUT_IFDEF); 358 { 359 static char const C_ifdef_group_formats[] = 360 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 361 char *b = xmalloc (sizeof C_ifdef_group_formats 362 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 363 - 8 /* 5*"%%" + 3*"%c" */); 364 sprintf (b, C_ifdef_group_formats, 365 0, 366 optarg, optarg, 0, 367 optarg, optarg, 0, 368 optarg, optarg, optarg); 369 for (i = 0; i < sizeof group_format / sizeof group_format[0]; i++) 370 { 371 specify_value (&group_format[i], b, "-D"); 372 b += strlen (b) + 1; 373 } 374 } 375 break; 376 377 case 'e': 378 specify_style (OUTPUT_ED); 379 break; 380 381 case 'E': 382 if (ignore_white_space < IGNORE_TAB_EXPANSION) 383 ignore_white_space = IGNORE_TAB_EXPANSION; 384 break; 385 386 case 'f': 387 specify_style (OUTPUT_FORWARD_ED); 388 break; 389 390 case 'F': 391 add_regexp (&function_regexp_list, optarg); 392 break; 393 394 case 'h': 395 /* Split the files into chunks for faster processing. 396 Usually does not change the result. 397 398 This currently has no effect. */ 399 break; 400 401 case 'H': 402 speed_large_files = true; 403 break; 404 405 case 'i': 406 ignore_case = true; 407 break; 408 409 case 'I': 410 add_regexp (&ignore_regexp_list, optarg); 411 break; 412 413 case 'l': 414 if (!pr_program[0]) 415 try_help ("pagination not supported on this host", NULL); 416 paginate = true; 417 #ifdef SIGCHLD 418 /* Pagination requires forking and waiting, and 419 System V fork+wait does not work if SIGCHLD is ignored. */ 420 signal (SIGCHLD, SIG_DFL); 421 #endif 422 break; 423 424 case 'L': 425 if (!file_label[0]) 426 file_label[0] = optarg; 427 else if (!file_label[1]) 428 file_label[1] = optarg; 429 else 430 fatal ("too many file label options"); 431 break; 432 433 case 'n': 434 specify_style (OUTPUT_RCS); 435 break; 436 437 case 'N': 438 new_file = true; 439 break; 440 441 case 'p': 442 show_c_function = true; 443 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 444 break; 445 446 case 'P': 447 unidirectional_new_file = true; 448 break; 449 450 case 'q': 451 brief = true; 452 break; 453 454 case 'r': 455 recursive = true; 456 break; 457 458 case 's': 459 report_identical_files = true; 460 break; 461 462 case 'S': 463 specify_value (&starting_file, optarg, "-S"); 464 break; 465 466 case 't': 467 expand_tabs = true; 468 break; 469 470 case 'T': 471 initial_tab = true; 472 break; 473 474 case 'u': 475 specify_style (OUTPUT_UNIFIED); 476 if (context < 3) 477 context = 3; 478 break; 479 480 case 'v': 481 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, PACKAGE_VERSION, 482 AUTHORS, (char *) NULL); 483 check_stdout (); 484 return EXIT_SUCCESS; 485 486 case 'w': 487 ignore_white_space = IGNORE_ALL_SPACE; 488 break; 489 490 case 'x': 491 add_exclude (excluded, optarg, exclude_options ()); 492 break; 493 494 case 'X': 495 if (add_exclude_file (add_exclude, excluded, optarg, 496 exclude_options (), '\n')) 497 pfatal_with_name (optarg); 498 break; 499 500 case 'y': 501 specify_style (OUTPUT_SDIFF); 502 break; 503 504 case 'W': 505 numval = strtoumax (optarg, &numend, 10); 506 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 507 try_help ("invalid width `%s'", optarg); 508 if (width != numval) 509 { 510 if (width) 511 fatal ("conflicting width options"); 512 width = numval; 513 } 514 break; 515 516 case BINARY_OPTION: 517 #if O_BINARY 518 binary = true; 519 if (! isatty (STDOUT_FILENO)) 520 xfreopen (NULL, "wb", stdout); 521 #endif 522 break; 523 524 case FROM_FILE_OPTION: 525 specify_value (&from_file, optarg, "--from-file"); 526 break; 527 528 case HELP_OPTION: 529 usage (); 530 check_stdout (); 531 return EXIT_SUCCESS; 532 533 case HORIZON_LINES_OPTION: 534 numval = strtoumax (optarg, &numend, 10); 535 if (*numend) 536 try_help ("invalid horizon length `%s'", optarg); 537 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 538 break; 539 540 case IGNORE_FILE_NAME_CASE_OPTION: 541 ignore_file_name_case = true; 542 break; 543 544 case INHIBIT_HUNK_MERGE_OPTION: 545 /* This option is obsolete, but accept it for backward 546 compatibility. */ 547 break; 548 549 case LEFT_COLUMN_OPTION: 550 left_column = true; 551 break; 552 553 case LINE_FORMAT_OPTION: 554 specify_style (OUTPUT_IFDEF); 555 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) 556 specify_value (&line_format[i], optarg, "--line-format"); 557 break; 558 559 case NO_IGNORE_FILE_NAME_CASE_OPTION: 560 ignore_file_name_case = false; 561 break; 562 563 case NORMAL_OPTION: 564 specify_style (OUTPUT_NORMAL); 565 break; 566 567 case SDIFF_MERGE_ASSIST_OPTION: 568 specify_style (OUTPUT_SDIFF); 569 sdiff_merge_assist = true; 570 break; 571 572 case STRIP_TRAILING_CR_OPTION: 573 strip_trailing_cr = true; 574 break; 575 576 case SUPPRESS_BLANK_EMPTY_OPTION: 577 suppress_blank_empty = true; 578 break; 579 580 case SUPPRESS_COMMON_LINES_OPTION: 581 suppress_common_lines = true; 582 break; 583 584 case TABSIZE_OPTION: 585 numval = strtoumax (optarg, &numend, 10); 586 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 587 try_help ("invalid tabsize `%s'", optarg); 588 if (tabsize != numval) 589 { 590 if (tabsize) 591 fatal ("conflicting tabsize options"); 592 tabsize = numval; 593 } 594 break; 595 596 case TO_FILE_OPTION: 597 specify_value (&to_file, optarg, "--to-file"); 598 break; 599 600 case UNCHANGED_LINE_FORMAT_OPTION: 601 case OLD_LINE_FORMAT_OPTION: 602 case NEW_LINE_FORMAT_OPTION: 603 specify_style (OUTPUT_IFDEF); 604 c -= UNCHANGED_LINE_FORMAT_OPTION; 605 specify_value (&line_format[c], optarg, line_format_option[c]); 606 break; 607 608 case UNCHANGED_GROUP_FORMAT_OPTION: 609 case OLD_GROUP_FORMAT_OPTION: 610 case NEW_GROUP_FORMAT_OPTION: 611 case CHANGED_GROUP_FORMAT_OPTION: 612 specify_style (OUTPUT_IFDEF); 613 c -= UNCHANGED_GROUP_FORMAT_OPTION; 614 specify_value (&group_format[c], optarg, group_format_option[c]); 615 break; 616 617 default: 618 try_help (NULL, NULL); 619 } 620 prev = c; 621 } 622 623 if (output_style == OUTPUT_UNSPECIFIED) 624 { 625 if (show_c_function) 626 { 627 specify_style (OUTPUT_CONTEXT); 628 if (ocontext < 0) 629 context = 3; 630 } 631 else 632 specify_style (OUTPUT_NORMAL); 633 } 634 635 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 636 { 637 #if (defined STAT_TIMESPEC || defined STAT_TIMESPEC_NS \ 638 || defined HAVE_STRUCT_STAT_ST_SPARE1) 639 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 640 #else 641 time_format = "%Y-%m-%d %H:%M:%S %z"; 642 #endif 643 } 644 else 645 { 646 /* See POSIX 1003.1-2001 for this format. */ 647 time_format = "%a %b %e %T %Y"; 648 } 649 650 if (0 <= ocontext 651 && (output_style == OUTPUT_CONTEXT 652 || output_style == OUTPUT_UNIFIED) 653 && (context < ocontext 654 || (ocontext < context && ! explicit_context))) 655 context = ocontext; 656 657 if (! tabsize) 658 tabsize = 8; 659 if (! width) 660 width = 130; 661 662 { 663 /* Maximize first the half line width, and then the gutter width, 664 according to the following constraints: 665 666 1. Two half lines plus a gutter must fit in a line. 667 2. If the half line width is nonzero: 668 a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 669 b. If tabs are not expanded to spaces, 670 a half line plus a gutter is an integral number of tabs, 671 so that tabs in the right column line up. */ 672 673 intmax_t t = expand_tabs ? 1 : tabsize; 674 intmax_t w = width; 675 intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 676 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 677 sdiff_column2_offset = sdiff_half_width ? off : w; 678 } 679 680 /* Make the horizon at least as large as the context, so that 681 shift_boundaries has more freedom to shift the first and last hunks. */ 682 if (horizon_lines < context) 683 horizon_lines = context; 684 685 summarize_regexp_list (&function_regexp_list); 686 summarize_regexp_list (&ignore_regexp_list); 687 688 if (output_style == OUTPUT_IFDEF) 689 { 690 for (i = 0; i < sizeof line_format / sizeof line_format[0]; i++) 691 if (!line_format[i]) 692 line_format[i] = "%l\n"; 693 if (!group_format[OLD]) 694 group_format[OLD] 695 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 696 if (!group_format[NEW]) 697 group_format[NEW] 698 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 699 if (!group_format[UNCHANGED]) 700 group_format[UNCHANGED] = "%="; 701 if (!group_format[CHANGED]) 702 group_format[CHANGED] = concat (group_format[OLD], 703 group_format[NEW], ""); 704 } 705 706 no_diff_means_no_output = 707 (output_style == OUTPUT_IFDEF ? 708 (!*group_format[UNCHANGED] 709 || (STREQ (group_format[UNCHANGED], "%=") 710 && !*line_format[UNCHANGED])) 711 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 712 713 files_can_be_treated_as_binary = 714 (brief & binary 715 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 716 | (ignore_regexp_list.regexps || ignore_white_space))); 717 718 switch_string = option_list (argv + 1, optind - 1); 719 720 if (from_file) 721 { 722 if (to_file) 723 fatal ("--from-file and --to-file both specified"); 724 else 725 for (; optind < argc; optind++) 726 { 727 int status = compare_files (NULL, from_file, argv[optind]); 728 if (exit_status < status) 729 exit_status = status; 730 } 731 } 732 else 733 { 734 if (to_file) 735 for (; optind < argc; optind++) 736 { 737 int status = compare_files (NULL, argv[optind], to_file); 738 if (exit_status < status) 739 exit_status = status; 740 } 741 else 742 { 743 if (argc - optind != 2) 744 { 745 if (argc - optind < 2) 746 try_help ("missing operand after `%s'", argv[argc - 1]); 747 else 748 try_help ("extra operand `%s'", argv[optind + 2]); 749 } 750 751 exit_status = compare_files (NULL, argv[optind], argv[optind + 1]); 752 } 753 } 754 755 /* Print any messages that were saved up for last. */ 756 print_message_queue (); 757 758 check_stdout (); 759 exit (exit_status); 760 return exit_status; 761 } 762 763 /* Append to REGLIST the regexp PATTERN. */ 764 765 static void 766 add_regexp (struct regexp_list *reglist, char const *pattern) 767 { 768 size_t patlen = strlen (pattern); 769 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 770 771 if (m != 0) 772 error (0, 0, "%s: %s", pattern, m); 773 else 774 { 775 char *regexps = reglist->regexps; 776 size_t len = reglist->len; 777 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 778 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 779 size_t size = reglist->size; 780 781 if (size <= newlen) 782 { 783 if (!size) 784 size = 1; 785 786 do size *= 2; 787 while (size <= newlen); 788 789 reglist->size = size; 790 reglist->regexps = regexps = xrealloc (regexps, size); 791 } 792 if (multiple_regexps) 793 { 794 regexps[len++] = '\\'; 795 regexps[len++] = '|'; 796 } 797 memcpy (regexps + len, pattern, patlen + 1); 798 } 799 } 800 801 /* Ensure that REGLIST represents the disjunction of its regexps. 802 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 803 804 static void 805 summarize_regexp_list (struct regexp_list *reglist) 806 { 807 if (reglist->regexps) 808 { 809 /* At least one regexp was specified. Allocate a fastmap for it. */ 810 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 811 if (reglist->multiple_regexps) 812 { 813 /* Compile the disjunction of the regexps. 814 (If just one regexp was specified, it is already compiled.) */ 815 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 816 reglist->buf); 817 if (m) 818 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 819 } 820 } 821 } 822 823 static void 824 try_help (char const *reason_msgid, char const *operand) 825 { 826 if (reason_msgid) 827 error (0, 0, _(reason_msgid), operand); 828 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 829 program_name); 830 abort (); 831 } 832 833 static void 834 check_stdout (void) 835 { 836 if (ferror (stdout)) 837 fatal ("write failed"); 838 else if (fclose (stdout) != 0) 839 pfatal_with_name (_("standard output")); 840 } 841 842 static char const * const option_help_msgid[] = { 843 N_("Compare files line by line."), 844 "", 845 N_("-i --ignore-case Ignore case differences in file contents."), 846 N_("--ignore-file-name-case Ignore case when comparing file names."), 847 N_("--no-ignore-file-name-case Consider case when comparing file names."), 848 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), 849 N_("-b --ignore-space-change Ignore changes in the amount of white space."), 850 N_("-w --ignore-all-space Ignore all white space."), 851 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), 852 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), 853 N_("--strip-trailing-cr Strip trailing carriage return on input."), 854 #if O_BINARY 855 N_("--binary Read and write data in binary mode."), 856 #endif 857 N_("-a --text Treat all files as text."), 858 "", 859 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\ 860 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\ 861 --label LABEL Use LABEL instead of file name.\n\ 862 -p --show-c-function Show which C function each change is in.\n\ 863 -F RE --show-function-line=RE Show the most recent line matching RE."), 864 N_("-q --brief Output only whether files differ."), 865 N_("-e --ed Output an ed script."), 866 N_("--normal Output a normal diff."), 867 N_("-n --rcs Output an RCS format diff."), 868 N_("-y --side-by-side Output in two columns.\n\ 869 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\ 870 --left-column Output only the left column of common lines.\n\ 871 --suppress-common-lines Do not output common lines."), 872 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."), 873 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."), 874 N_("--line-format=LFMT Similar, but format all input lines with LFMT."), 875 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."), 876 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 877 N_(" GFMT may contain:\n\ 878 %< lines from FILE1\n\ 879 %> lines from FILE2\n\ 880 %= lines common to FILE1 and FILE2\n\ 881 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 882 LETTERs are as follows for new group, lower case for old group:\n\ 883 F first line number\n\ 884 L last line number\n\ 885 N number of lines = L-F+1\n\ 886 E F-1\n\ 887 M L+1"), 888 N_(" LFMT may contain:\n\ 889 %L contents of line\n\ 890 %l contents of line, excluding any trailing newline\n\ 891 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 892 N_(" Either GFMT or LFMT may contain:\n\ 893 %% %\n\ 894 %c'C' the single character C\n\ 895 %c'\\OOO' the character with octal code OOO"), 896 "", 897 N_("-l --paginate Pass the output through `pr' to paginate it."), 898 N_("-t --expand-tabs Expand tabs to spaces in output."), 899 N_("-T --initial-tab Make tabs line up by prepending a tab."), 900 N_("--tabsize=NUM Tab stops are every NUM (default 8) print columns."), 901 N_("--suppress-blank-empty Suppress space or tab before empty output lines."), 902 "", 903 N_("-r --recursive Recursively compare any subdirectories found."), 904 N_("-N --new-file Treat absent files as empty."), 905 N_("--unidirectional-new-file Treat absent first files as empty."), 906 N_("-s --report-identical-files Report when two files are the same."), 907 N_("-x PAT --exclude=PAT Exclude files that match PAT."), 908 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."), 909 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."), 910 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."), 911 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."), 912 "", 913 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."), 914 N_("-d --minimal Try hard to find a smaller set of changes."), 915 N_("--speed-large-files Assume large files and many scattered small changes."), 916 "", 917 N_("-v --version Output version info."), 918 N_("--help Output this help."), 919 "", 920 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 921 N_("If --from-file or --to-file is given, there are no restrictions on FILES."), 922 N_("If a FILE is `-', read standard input."), 923 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), 924 0 925 }; 926 927 static void 928 usage (void) 929 { 930 char const * const *p; 931 932 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 933 934 for (p = option_help_msgid; *p; p++) 935 { 936 if (!**p) 937 putchar ('\n'); 938 else 939 { 940 char const *msg = _(*p); 941 char const *nl; 942 while ((nl = strchr (msg, '\n'))) 943 { 944 int msglen = nl + 1 - msg; 945 printf (" %.*s", msglen, msg); 946 msg = nl + 1; 947 } 948 949 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 950 } 951 } 952 emit_bug_reporting_address (); 953 } 954 955 /* Set VAR to VALUE, reporting an OPTION error if this is a 956 conflict. */ 957 static void 958 specify_value (char const **var, char const *value, char const *option) 959 { 960 if (*var && strcmp (*var, value) != 0) 961 { 962 error (0, 0, _("conflicting %s option value `%s'"), option, value); 963 try_help (NULL, NULL); 964 } 965 *var = value; 966 } 967 968 /* Set the output style to STYLE, diagnosing conflicts. */ 969 static void 970 specify_style (enum output_style style) 971 { 972 if (output_style != style) 973 { 974 if (output_style != OUTPUT_UNSPECIFIED) 975 try_help ("conflicting output style options", NULL); 976 output_style = style; 977 } 978 } 979 980 /* Set the last-modified time of *ST to be the current time. */ 981 982 static void 983 set_mtime_to_now (struct stat *st) 984 { 985 #ifdef STAT_TIMESPEC 986 gettime (&STAT_TIMESPEC (st, st_mtim)); 987 #else 988 struct timespec t; 989 gettime (&t); 990 st->st_mtime = t.tv_sec; 991 # if defined STAT_TIMESPEC_NS 992 STAT_TIMESPEC_NS (st, st_mtim) = t.tv_nsec; 993 # elif defined HAVE_STRUCT_STAT_ST_SPARE1 994 st->st_spare1 = t.tv_nsec / 1000; 995 # endif 996 #endif 997 } 998 999 /* Compare two files (or dirs) with parent comparison PARENT 1000 and names NAME0 and NAME1. 1001 (If PARENT is null, then the first name is just NAME0, etc.) 1002 This is self-contained; it opens the files and closes them. 1003 1004 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1005 different, EXIT_TROUBLE if there is a problem opening them. */ 1006 1007 static int 1008 compare_files (struct comparison const *parent, 1009 char const *name0, 1010 char const *name1) 1011 { 1012 struct comparison cmp; 1013 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1014 register int f; 1015 int status = EXIT_SUCCESS; 1016 bool same_files; 1017 char *free0; 1018 char *free1; 1019 1020 /* If this is directory comparison, perhaps we have a file 1021 that exists only in one of the directories. 1022 If so, just print a message to that effect. */ 1023 1024 if (! ((name0 && name1) 1025 || (unidirectional_new_file && name1) 1026 || new_file)) 1027 { 1028 char const *name = name0 ? name0 : name1; 1029 char const *dir = parent->file[!name0].name; 1030 1031 /* See POSIX 1003.1-2001 for this format. */ 1032 message ("Only in %s: %s\n", dir, name); 1033 1034 /* Return EXIT_FAILURE so that diff_dirs will return 1035 EXIT_FAILURE ("some files differ"). */ 1036 return EXIT_FAILURE; 1037 } 1038 1039 memset (cmp.file, 0, sizeof cmp.file); 1040 cmp.parent = parent; 1041 1042 /* cmp.file[f].desc markers */ 1043 #define NONEXISTENT (-1) /* nonexistent file */ 1044 #define UNOPENED (-2) /* unopened file (e.g. directory) */ 1045 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1046 1047 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1048 1049 cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT; 1050 cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT; 1051 1052 /* Now record the full name of each file, including nonexistent ones. */ 1053 1054 if (!name0) 1055 name0 = name1; 1056 if (!name1) 1057 name1 = name0; 1058 1059 if (!parent) 1060 { 1061 free0 = NULL; 1062 free1 = NULL; 1063 cmp.file[0].name = name0; 1064 cmp.file[1].name = name1; 1065 } 1066 else 1067 { 1068 cmp.file[0].name = free0 1069 = dir_file_pathname (parent->file[0].name, name0); 1070 cmp.file[1].name = free1 1071 = dir_file_pathname (parent->file[1].name, name1); 1072 } 1073 1074 /* Stat the files. */ 1075 1076 for (f = 0; f < 2; f++) 1077 { 1078 if (cmp.file[f].desc != NONEXISTENT) 1079 { 1080 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1081 { 1082 cmp.file[f].desc = cmp.file[0].desc; 1083 cmp.file[f].stat = cmp.file[0].stat; 1084 } 1085 else if (STREQ (cmp.file[f].name, "-")) 1086 { 1087 cmp.file[f].desc = STDIN_FILENO; 1088 if (O_BINARY && binary && ! isatty (STDIN_FILENO)) 1089 xfreopen (NULL, "rb", stdin); 1090 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1091 cmp.file[f].desc = ERRNO_ENCODE (errno); 1092 else 1093 { 1094 if (S_ISREG (cmp.file[f].stat.st_mode)) 1095 { 1096 off_t pos = lseek (STDIN_FILENO, 0, SEEK_CUR); 1097 if (pos < 0) 1098 cmp.file[f].desc = ERRNO_ENCODE (errno); 1099 else 1100 cmp.file[f].stat.st_size = 1101 MAX (0, cmp.file[f].stat.st_size - pos); 1102 } 1103 1104 /* POSIX 1003.1-2001 requires current time for 1105 stdin. */ 1106 set_mtime_to_now (&cmp.file[f].stat); 1107 } 1108 } 1109 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1110 cmp.file[f].desc = ERRNO_ENCODE (errno); 1111 } 1112 } 1113 1114 /* Mark files as nonexistent as needed for -N and -P, if they are 1115 inaccessible empty regular files (the kind of files that 'patch' 1116 creates to indicate nonexistent backups), or if they are 1117 top-level files that do not exist but their counterparts do 1118 exist. */ 1119 for (f = 0; f < 2; f++) 1120 if ((new_file || (f == 0 && unidirectional_new_file)) 1121 && (cmp.file[f].desc == UNOPENED 1122 ? (S_ISREG (cmp.file[f].stat.st_mode) 1123 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) 1124 && cmp.file[f].stat.st_size == 0) 1125 : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT) 1126 && ! parent 1127 && cmp.file[1 - f].desc == UNOPENED))) 1128 cmp.file[f].desc = NONEXISTENT; 1129 1130 for (f = 0; f < 2; f++) 1131 if (cmp.file[f].desc == NONEXISTENT) 1132 { 1133 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat); 1134 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1135 } 1136 1137 for (f = 0; f < 2; f++) 1138 { 1139 int e = ERRNO_DECODE (cmp.file[f].desc); 1140 if (0 <= e) 1141 { 1142 errno = e; 1143 perror_with_name (cmp.file[f].name); 1144 status = EXIT_TROUBLE; 1145 } 1146 } 1147 1148 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1149 { 1150 /* If one is a directory, and it was specified in the command line, 1151 use the file in that dir with the other file's basename. */ 1152 1153 int fnm_arg = DIR_P (0); 1154 int dir_arg = 1 - fnm_arg; 1155 char const *fnm = cmp.file[fnm_arg].name; 1156 char const *dir = cmp.file[dir_arg].name; 1157 char const *filename = cmp.file[dir_arg].name = free0 1158 = dir_file_pathname (dir, last_component (fnm)); 1159 1160 if (STREQ (fnm, "-")) 1161 fatal ("cannot compare `-' to a directory"); 1162 1163 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1164 { 1165 perror_with_name (filename); 1166 status = EXIT_TROUBLE; 1167 } 1168 } 1169 1170 if (status != EXIT_SUCCESS) 1171 { 1172 /* One of the files should exist but does not. */ 1173 } 1174 else if (cmp.file[0].desc == NONEXISTENT 1175 && cmp.file[1].desc == NONEXISTENT) 1176 { 1177 /* Neither file "exists", so there's nothing to compare. */ 1178 } 1179 else if ((same_files 1180 = (cmp.file[0].desc != NONEXISTENT 1181 && cmp.file[1].desc != NONEXISTENT 1182 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1183 && same_file_attributes (&cmp.file[0].stat, 1184 &cmp.file[1].stat))) 1185 && no_diff_means_no_output) 1186 { 1187 /* The two named files are actually the same physical file. 1188 We know they are identical without actually reading them. */ 1189 } 1190 else if (DIR_P (0) & DIR_P (1)) 1191 { 1192 if (output_style == OUTPUT_IFDEF) 1193 fatal ("-D option not supported with directories"); 1194 1195 /* If both are directories, compare the files in them. */ 1196 1197 if (parent && !recursive) 1198 { 1199 /* But don't compare dir contents one level down 1200 unless -r was specified. 1201 See POSIX 1003.1-2001 for this format. */ 1202 message ("Common subdirectories: %s and %s\n", 1203 cmp.file[0].name, cmp.file[1].name); 1204 } 1205 else 1206 status = diff_dirs (&cmp, compare_files); 1207 } 1208 else if ((DIR_P (0) | DIR_P (1)) 1209 || (parent 1210 && (! S_ISREG (cmp.file[0].stat.st_mode) 1211 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1212 { 1213 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1214 { 1215 /* We have a subdirectory that exists only in one directory. */ 1216 1217 if ((DIR_P (0) | DIR_P (1)) 1218 && recursive 1219 && (new_file 1220 || (unidirectional_new_file 1221 && cmp.file[0].desc == NONEXISTENT))) 1222 status = diff_dirs (&cmp, compare_files); 1223 else 1224 { 1225 char const *dir; 1226 1227 /* PARENT must be non-NULL here. */ 1228 assert (parent); 1229 dir = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1230 1231 /* See POSIX 1003.1-2001 for this format. */ 1232 message ("Only in %s: %s\n", dir, name0); 1233 1234 status = EXIT_FAILURE; 1235 } 1236 } 1237 else 1238 { 1239 /* We have two files that are not to be compared. */ 1240 1241 /* See POSIX 1003.1-2001 for this format. */ 1242 message5 ("File %s is a %s while file %s is a %s\n", 1243 file_label[0] ? file_label[0] : cmp.file[0].name, 1244 file_type (&cmp.file[0].stat), 1245 file_label[1] ? file_label[1] : cmp.file[1].name, 1246 file_type (&cmp.file[1].stat)); 1247 1248 /* This is a difference. */ 1249 status = EXIT_FAILURE; 1250 } 1251 } 1252 else if (files_can_be_treated_as_binary 1253 && S_ISREG (cmp.file[0].stat.st_mode) 1254 && S_ISREG (cmp.file[1].stat.st_mode) 1255 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size) 1256 { 1257 message ("Files %s and %s differ\n", 1258 file_label[0] ? file_label[0] : cmp.file[0].name, 1259 file_label[1] ? file_label[1] : cmp.file[1].name); 1260 status = EXIT_FAILURE; 1261 } 1262 else 1263 { 1264 /* Both exist and neither is a directory. */ 1265 1266 /* Open the files and record their descriptors. */ 1267 1268 int oflags = O_RDONLY | (binary ? O_BINARY : 0); 1269 1270 if (cmp.file[0].desc == UNOPENED) 1271 if ((cmp.file[0].desc = open (cmp.file[0].name, oflags, 0)) < 0) 1272 { 1273 perror_with_name (cmp.file[0].name); 1274 status = EXIT_TROUBLE; 1275 } 1276 if (cmp.file[1].desc == UNOPENED) 1277 { 1278 if (same_files) 1279 cmp.file[1].desc = cmp.file[0].desc; 1280 else if ((cmp.file[1].desc = open (cmp.file[1].name, oflags, 0)) < 0) 1281 { 1282 perror_with_name (cmp.file[1].name); 1283 status = EXIT_TROUBLE; 1284 } 1285 } 1286 1287 /* Compare the files, if no error was found. */ 1288 1289 if (status == EXIT_SUCCESS) 1290 status = diff_2_files (&cmp); 1291 1292 /* Close the file descriptors. */ 1293 1294 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1295 { 1296 perror_with_name (cmp.file[0].name); 1297 status = EXIT_TROUBLE; 1298 } 1299 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1300 && close (cmp.file[1].desc) != 0) 1301 { 1302 perror_with_name (cmp.file[1].name); 1303 status = EXIT_TROUBLE; 1304 } 1305 } 1306 1307 /* Now the comparison has been done, if no error prevented it, 1308 and STATUS is the value this function will return. */ 1309 1310 if (status == EXIT_SUCCESS) 1311 { 1312 if (report_identical_files && !DIR_P (0)) 1313 message ("Files %s and %s are identical\n", 1314 file_label[0] ? file_label[0] : cmp.file[0].name, 1315 file_label[1] ? file_label[1] : cmp.file[1].name); 1316 } 1317 else 1318 { 1319 /* Flush stdout so that the user sees differences immediately. 1320 This can hurt performance, unfortunately. */ 1321 if (fflush (stdout) != 0) 1322 pfatal_with_name (_("standard output")); 1323 } 1324 1325 free (free0); 1326 free (free1); 1327 1328 return status; 1329 } 1330