1 /* GNU DIFF entry routine.
2 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU DIFF.
5
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 */
17
18 /* GNU DIFF was written by Mike Haertel, David Hayes,
19 Richard Stallman, Len Tower, and Paul Eggert. */
20
21 #define GDIFF_MAIN
22 #include "diff.h"
23 #include <signal.h>
24 #include "error.h"
25 #include "getopt.h"
26
27 #ifdef HAVE_FNMATCH
28 # include <fnmatch.h> /* This is supposed to be available on Posix systems */
29 #else /* HAVE_FNMATCH */
30 # include "fnmatch.h" /* Our substitute */
31 #endif /* HAVE_FNMATCH */
32
33 #ifndef DEFAULT_WIDTH
34 #define DEFAULT_WIDTH 130
35 #endif
36
37 #ifndef GUTTER_WIDTH_MINIMUM
38 #define GUTTER_WIDTH_MINIMUM 3
39 #endif
40
41 /* diff.c has a real initialize_main function. */
42 #ifdef initialize_main
43 #undef initialize_main
44 #endif
45
46 static char const *filetype PARAMS((struct stat const *));
47 static char *option_list PARAMS((char **, int));
48 static int add_exclude_file PARAMS((char const *));
49 static int ck_atoi PARAMS((char const *, int *));
50 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
51 static int specify_format PARAMS((char **, char *));
52 static void add_exclude PARAMS((char const *));
53 static void add_regexp PARAMS((struct regexp_list **, char const *));
54 static void specify_style PARAMS((enum output_style));
55 static int try_help PARAMS((char const *));
56 static void check_output PARAMS((FILE *));
57 static void usage PARAMS((void));
58 static void initialize_main PARAMS((int *, char ***));
59
60 /* Nonzero for -r: if comparing two directories,
61 compare their common subdirectories recursively. */
62
63 static int recursive;
64
65 /* For debugging: don't do discard_confusing_lines. */
66
67 int no_discards;
68
69 #if HAVE_SETMODE
70 /* I/O mode: nonzero only if using binary input/output. */
71 static int binary_I_O;
72 #endif
73
74 /* Return a string containing the command options with which diff was invoked.
75 Spaces appear between what were separate ARGV-elements.
76 There is a space at the beginning but none at the end.
77 If there were no options, the result is an empty string.
78
79 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
80 the length of that vector. */
81
82 static char *
option_list(optionvec,count)83 option_list (optionvec, count)
84 char **optionvec; /* Was `vector', but that collides on Alliant. */
85 int count;
86 {
87 int i;
88 size_t length = 0;
89 char *result;
90
91 for (i = 0; i < count; i++)
92 length += strlen (optionvec[i]) + 1;
93
94 result = xmalloc (length + 1);
95 result[0] = 0;
96
97 for (i = 0; i < count; i++)
98 {
99 strcat (result, " ");
100 strcat (result, optionvec[i]);
101 }
102
103 return result;
104 }
105
106 /* Convert STR to a positive integer, storing the result in *OUT.
107 If STR is not a valid integer, return -1 (otherwise 0). */
108 static int
ck_atoi(str,out)109 ck_atoi (str, out)
110 char const *str;
111 int *out;
112 {
113 char const *p;
114 for (p = str; *p; p++)
115 if (*p < '0' || *p > '9')
116 return -1;
117
118 *out = atoi (optarg);
119 return 0;
120 }
121
122 /* Keep track of excluded file name patterns. */
123
124 static char const **exclude;
125 static int exclude_alloc, exclude_count;
126
127 int
excluded_filename(f)128 excluded_filename (f)
129 char const *f;
130 {
131 int i;
132 for (i = 0; i < exclude_count; i++)
133 if (fnmatch (exclude[i], f, 0) == 0)
134 return 1;
135 return 0;
136 }
137
138 static void
add_exclude(pattern)139 add_exclude (pattern)
140 char const *pattern;
141 {
142 if (exclude_alloc <= exclude_count)
143 exclude = (char const **)
144 (exclude_alloc == 0
145 ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
146 : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
147
148 exclude[exclude_count++] = pattern;
149 }
150
151 static int
add_exclude_file(name)152 add_exclude_file (name)
153 char const *name;
154 {
155 struct file_data f;
156 char *p, *q, *lim;
157
158 f.name = optarg;
159 f.desc = (strcmp (optarg, "-") == 0
160 ? STDIN_FILENO
161 : open (optarg, O_RDONLY, 0));
162 if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
163 return -1;
164
165 sip (&f, 1);
166 slurp (&f);
167
168 for (p = f.buffer, lim = p + f.buffered_chars; p < lim; p = q)
169 {
170 q = (char *) memchr (p, '\n', lim - p);
171 if (!q)
172 q = lim;
173 *q++ = 0;
174 add_exclude (p);
175 }
176
177 return close (f.desc);
178 }
179
180 /* The numbers 129- that appear in the fourth element of some entries
181 tell the big switch in `diff_run' how to process those options. */
182
183 static struct option const longopts[] =
184 {
185 {"ignore-blank-lines", 0, 0, 'B'},
186 {"context", 2, 0, 'C'},
187 {"ifdef", 1, 0, 'D'},
188 {"show-function-line", 1, 0, 'F'},
189 {"speed-large-files", 0, 0, 'H'},
190 {"ignore-matching-lines", 1, 0, 'I'},
191 {"label", 1, 0, 'L'},
192 {"file-label", 1, 0, 'L'}, /* An alias, no longer recommended */
193 {"new-file", 0, 0, 'N'},
194 {"entire-new-file", 0, 0, 'N'}, /* An alias, no longer recommended */
195 {"unidirectional-new-file", 0, 0, 'P'},
196 {"starting-file", 1, 0, 'S'},
197 {"initial-tab", 0, 0, 'T'},
198 {"width", 1, 0, 'W'},
199 {"text", 0, 0, 'a'},
200 {"ascii", 0, 0, 'a'}, /* An alias, no longer recommended */
201 {"ignore-space-change", 0, 0, 'b'},
202 {"minimal", 0, 0, 'd'},
203 {"ed", 0, 0, 'e'},
204 {"forward-ed", 0, 0, 'f'},
205 {"ignore-case", 0, 0, 'i'},
206 {"paginate", 0, 0, 'l'},
207 {"print", 0, 0, 'l'}, /* An alias, no longer recommended */
208 {"rcs", 0, 0, 'n'},
209 {"show-c-function", 0, 0, 'p'},
210 {"brief", 0, 0, 'q'},
211 {"recursive", 0, 0, 'r'},
212 {"report-identical-files", 0, 0, 's'},
213 {"expand-tabs", 0, 0, 't'},
214 {"version", 0, 0, 'v'},
215 {"ignore-all-space", 0, 0, 'w'},
216 {"exclude", 1, 0, 'x'},
217 {"exclude-from", 1, 0, 'X'},
218 {"side-by-side", 0, 0, 'y'},
219 {"unified", 2, 0, 'U'},
220 {"left-column", 0, 0, 129},
221 {"suppress-common-lines", 0, 0, 130},
222 {"sdiff-merge-assist", 0, 0, 131},
223 {"old-line-format", 1, 0, 132},
224 {"new-line-format", 1, 0, 133},
225 {"unchanged-line-format", 1, 0, 134},
226 {"line-format", 1, 0, 135},
227 {"old-group-format", 1, 0, 136},
228 {"new-group-format", 1, 0, 137},
229 {"unchanged-group-format", 1, 0, 138},
230 {"changed-group-format", 1, 0, 139},
231 {"horizon-lines", 1, 0, 140},
232 {"help", 0, 0, 141},
233 {"binary", 0, 0, 142},
234 {0, 0, 0, 0}
235 };
236
237
238
239 int
diff_run(argc,argv,out,callbacks_arg)240 diff_run (argc, argv, out, callbacks_arg)
241 int argc;
242 char *argv[];
243 const char *out;
244 const struct diff_callbacks *callbacks_arg;
245 {
246 int val;
247 int c;
248 int prev = -1;
249 int width = DEFAULT_WIDTH;
250 int show_c_function = 0;
251 int optind_old;
252 int opened_file = 0;
253
254 callbacks = callbacks_arg;
255
256 /* Do our initializations. */
257 initialize_main (&argc, &argv);
258 optind_old = optind;
259 optind = 0;
260
261 /* Set the jump buffer, so that diff may abort execution without
262 terminating the process. */
263 val = setjmp (diff_abort_buf);
264 if (val != 0)
265 {
266 optind = optind_old;
267 if (opened_file)
268 fclose (outfile);
269 return val;
270 }
271
272 /* Decode the options. */
273 while ((c = getopt_long (argc, argv,
274 "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
275 longopts, 0)) != EOF)
276 {
277 switch (c)
278 {
279 /* All digits combine in decimal to specify the context-size. */
280 case '1':
281 case '2':
282 case '3':
283 case '4':
284 case '5':
285 case '6':
286 case '7':
287 case '8':
288 case '9':
289 case '0':
290 if (context == -1)
291 context = 0;
292 /* If a context length has already been specified,
293 more digits allowed only if they follow right after the others.
294 Reject two separate runs of digits, or digits after -C. */
295 else if (prev < '0' || prev > '9')
296 fatal ("context length specified twice");
297
298 context = context * 10 + c - '0';
299 break;
300
301 case 'a':
302 /* Treat all files as text files; never treat as binary. */
303 always_text_flag = 1;
304 break;
305
306 case 'b':
307 /* Ignore changes in amount of white space. */
308 ignore_space_change_flag = 1;
309 ignore_some_changes = 1;
310 ignore_some_line_changes = 1;
311 break;
312
313 case 'B':
314 /* Ignore changes affecting only blank lines. */
315 ignore_blank_lines_flag = 1;
316 ignore_some_changes = 1;
317 break;
318
319 case 'C': /* +context[=lines] */
320 case 'U': /* +unified[=lines] */
321 if (optarg)
322 {
323 if (context >= 0)
324 fatal ("context length specified twice");
325
326 if (ck_atoi (optarg, &context))
327 fatal ("invalid context length argument");
328 }
329
330 /* Falls through. */
331 case 'c':
332 /* Make context-style output. */
333 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
334 break;
335
336 case 'd':
337 /* Don't discard lines. This makes things slower (sometimes much
338 slower) but will find a guaranteed minimal set of changes. */
339 no_discards = 1;
340 break;
341
342 case 'D':
343 /* Make merged #ifdef output. */
344 specify_style (OUTPUT_IFDEF);
345 {
346 int i, err = 0;
347 static char const C_ifdef_group_formats[] =
348 "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
349 char *b = xmalloc (sizeof (C_ifdef_group_formats)
350 + 7 * strlen(optarg) - 14 /* 7*"%s" */
351 - 8 /* 5*"%%" + 3*"%c" */);
352 sprintf (b, C_ifdef_group_formats,
353 optarg, optarg, 0,
354 optarg, optarg, 0, 0,
355 optarg, optarg, optarg);
356 for (i = 0; i < 4; i++)
357 {
358 err |= specify_format (&group_format[i], b);
359 b += strlen (b) + 1;
360 }
361 if (err)
362 diff_error ("conflicting #ifdef formats", 0, 0);
363 }
364 break;
365
366 case 'e':
367 /* Make output that is a valid `ed' script. */
368 specify_style (OUTPUT_ED);
369 break;
370
371 case 'f':
372 /* Make output that looks vaguely like an `ed' script
373 but has changes in the order they appear in the file. */
374 specify_style (OUTPUT_FORWARD_ED);
375 break;
376
377 case 'F':
378 /* Show, for each set of changes, the previous line that
379 matches the specified regexp. Currently affects only
380 context-style output. */
381 add_regexp (&function_regexp_list, optarg);
382 break;
383
384 case 'h':
385 /* Split the files into chunks of around 1500 lines
386 for faster processing. Usually does not change the result.
387
388 This currently has no effect. */
389 break;
390
391 case 'H':
392 /* Turn on heuristics that speed processing of large files
393 with a small density of changes. */
394 heuristic = 1;
395 break;
396
397 case 'i':
398 /* Ignore changes in case. */
399 ignore_case_flag = 1;
400 ignore_some_changes = 1;
401 ignore_some_line_changes = 1;
402 break;
403
404 case 'I':
405 /* Ignore changes affecting only lines that match the
406 specified regexp. */
407 add_regexp (&ignore_regexp_list, optarg);
408 ignore_some_changes = 1;
409 break;
410
411 case 'l':
412 /* Pass the output through `pr' to paginate it. */
413 paginate_flag = 1;
414 #if !defined(SIGCHLD) && defined(SIGCLD)
415 #define SIGCHLD SIGCLD
416 #endif
417 #ifdef SIGCHLD
418 /* Pagination requires forking and waiting, and
419 System V fork+wait does not work if SIGCHLD is ignored. */
420 signal (SIGCHLD, SIG_DFL);
421 #endif
422 break;
423
424 case 'L':
425 /* Specify file labels for `-c' output headers. */
426 if (!file_label[0])
427 file_label[0] = optarg;
428 else if (!file_label[1])
429 file_label[1] = optarg;
430 else
431 fatal ("too many file label options");
432 break;
433
434 case 'n':
435 /* Output RCS-style diffs, like `-f' except that each command
436 specifies the number of lines affected. */
437 specify_style (OUTPUT_RCS);
438 break;
439
440 case 'N':
441 /* When comparing directories, if a file appears only in one
442 directory, treat it as present but empty in the other. */
443 entire_new_file_flag = 1;
444 break;
445
446 case 'p':
447 /* Make context-style output and show name of last C function. */
448 show_c_function = 1;
449 add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
450 break;
451
452 case 'P':
453 /* When comparing directories, if a file appears only in
454 the second directory of the two,
455 treat it as present but empty in the other. */
456 unidirectional_new_file_flag = 1;
457 break;
458
459 case 'q':
460 no_details_flag = 1;
461 break;
462
463 case 'r':
464 /* When comparing directories,
465 recursively compare any subdirectories found. */
466 recursive = 1;
467 break;
468
469 case 's':
470 /* Print a message if the files are the same. */
471 print_file_same_flag = 1;
472 break;
473
474 case 'S':
475 /* When comparing directories, start with the specified
476 file name. This is used for resuming an aborted comparison. */
477 dir_start_file = optarg;
478 break;
479
480 case 't':
481 /* Expand tabs to spaces in the output so that it preserves
482 the alignment of the input files. */
483 tab_expand_flag = 1;
484 break;
485
486 case 'T':
487 /* Use a tab in the output, rather than a space, before the
488 text of an input line, so as to keep the proper alignment
489 in the input line without changing the characters in it. */
490 tab_align_flag = 1;
491 break;
492
493 case 'u':
494 /* Output the context diff in unidiff format. */
495 specify_style (OUTPUT_UNIFIED);
496 break;
497
498 case 'v':
499 if (callbacks && callbacks->write_stdout)
500 {
501 (*callbacks->write_stdout) ("diff - GNU diffutils version ");
502 (*callbacks->write_stdout) (diff_version_string);
503 (*callbacks->write_stdout) ("\n");
504 }
505 else
506 printf ("diff - GNU diffutils version %s\n", diff_version_string);
507 return 0;
508
509 case 'w':
510 /* Ignore horizontal white space when comparing lines. */
511 ignore_all_space_flag = 1;
512 ignore_some_changes = 1;
513 ignore_some_line_changes = 1;
514 break;
515
516 case 'x':
517 add_exclude (optarg);
518 break;
519
520 case 'X':
521 if (add_exclude_file (optarg) != 0)
522 pfatal_with_name (optarg);
523 break;
524
525 case 'y':
526 /* Use side-by-side (sdiff-style) columnar output. */
527 specify_style (OUTPUT_SDIFF);
528 break;
529
530 case 'W':
531 /* Set the line width for OUTPUT_SDIFF. */
532 if (ck_atoi (optarg, &width) || width <= 0)
533 fatal ("column width must be a positive integer");
534 break;
535
536 case 129:
537 sdiff_left_only = 1;
538 break;
539
540 case 130:
541 sdiff_skip_common_lines = 1;
542 break;
543
544 case 131:
545 /* sdiff-style columns output. */
546 specify_style (OUTPUT_SDIFF);
547 sdiff_help_sdiff = 1;
548 break;
549
550 case 132:
551 case 133:
552 case 134:
553 specify_style (OUTPUT_IFDEF);
554 if (specify_format (&line_format[c - 132], optarg) != 0)
555 diff_error ("conflicting line format", 0, 0);
556 break;
557
558 case 135:
559 specify_style (OUTPUT_IFDEF);
560 {
561 int i, err = 0;
562 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
563 err |= specify_format (&line_format[i], optarg);
564 if (err)
565 diff_error ("conflicting line format", 0, 0);
566 }
567 break;
568
569 case 136:
570 case 137:
571 case 138:
572 case 139:
573 specify_style (OUTPUT_IFDEF);
574 if (specify_format (&group_format[c - 136], optarg) != 0)
575 diff_error ("conflicting group format", 0, 0);
576 break;
577
578 case 140:
579 if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
580 fatal ("horizon must be a nonnegative integer");
581 break;
582
583 case 141:
584 usage ();
585 if (! callbacks || ! callbacks->write_stdout)
586 check_output (stdout);
587 return 0;
588
589 case 142:
590 /* Use binary I/O when reading and writing data.
591 On Posix hosts, this has no effect. */
592 #if HAVE_SETMODE
593 binary_I_O = 1;
594 # if 0
595 /* Because this code is leftover from pre-library days,
596 there is no way to set stdout back to the default mode
597 when we are done. As it turns out, I think the only
598 parts of CVS that pass out == NULL, and thus cause diff
599 to write to stdout, are "cvs diff" and "cvs rdiff". So
600 I'm not going to worry about this too much yet. */
601 setmode (STDOUT_FILENO, O_BINARY);
602 # else
603 if (out == NULL)
604 error (0, 0, "warning: did not set stdout to binary mode");
605 # endif
606 #endif
607 break;
608
609 default:
610 return try_help (0);
611 }
612 prev = c;
613 }
614
615 if (argc - optind != 2)
616 return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
617
618 {
619 /*
620 * We maximize first the half line width, and then the gutter width,
621 * according to the following constraints:
622 * 1. Two half lines plus a gutter must fit in a line.
623 * 2. If the half line width is nonzero:
624 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
625 * b. If tabs are not expanded to spaces,
626 * a half line plus a gutter is an integral number of tabs,
627 * so that tabs in the right column line up.
628 */
629 int t = tab_expand_flag ? 1 : TAB_WIDTH;
630 int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t) * t;
631 sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
632 sdiff_column2_offset = sdiff_half_width ? off : width;
633 }
634
635 if (show_c_function && output_style != OUTPUT_UNIFIED)
636 specify_style (OUTPUT_CONTEXT);
637
638 if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
639 context = 0;
640 else if (context == -1)
641 /* Default amount of context for -c. */
642 context = 3;
643
644 if (output_style == OUTPUT_IFDEF)
645 {
646 /* Format arrays are char *, not char const *,
647 because integer formats are temporarily modified.
648 But it is safe to assign a constant like "%=" to a format array,
649 since "%=" does not format any integers. */
650 int i;
651 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
652 if (!line_format[i])
653 line_format[i] = "%l\n";
654 if (!group_format[OLD])
655 group_format[OLD]
656 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
657 if (!group_format[NEW])
658 group_format[NEW]
659 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
660 if (!group_format[UNCHANGED])
661 group_format[UNCHANGED] = "%=";
662 if (!group_format[CHANGED])
663 group_format[CHANGED] = concat (group_format[OLD],
664 group_format[NEW], "");
665 }
666
667 no_diff_means_no_output =
668 (output_style == OUTPUT_IFDEF ?
669 (!*group_format[UNCHANGED]
670 || (strcmp (group_format[UNCHANGED], "%=") == 0
671 && !*line_format[UNCHANGED]))
672 : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
673
674 switch_string = option_list (argv + 1, optind - 1);
675
676 if (callbacks && callbacks->write_output)
677 {
678 if (out != NULL)
679 {
680 diff_error ("write callback with output file", 0, 0);
681 return 2;
682 }
683 }
684 else
685 {
686 if (out == NULL)
687 outfile = stdout;
688 else
689 {
690 #if HAVE_SETMODE
691 /* A diff which is full of ^Z and such isn't going to work
692 very well in text mode. */
693 if (binary_I_O)
694 outfile = fopen (out, "wb");
695 else
696 #endif
697 outfile = fopen (out, "w");
698 if (outfile == NULL)
699 {
700 perror_with_name ("could not open output file");
701 return 2;
702 }
703 opened_file = 1;
704 }
705 }
706
707 val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
708
709 /* Print any messages that were saved up for last. */
710 print_message_queue ();
711
712 free (switch_string);
713
714 optind = optind_old;
715
716 if (! callbacks || ! callbacks->write_output)
717 check_output (outfile);
718
719 if (opened_file)
720 if (fclose (outfile) != 0)
721 perror_with_name ("close error on output file");
722
723 return val;
724 }
725
726 /* Add the compiled form of regexp PATTERN to REGLIST. */
727
728 static void
add_regexp(reglist,pattern)729 add_regexp (reglist, pattern)
730 struct regexp_list **reglist;
731 char const *pattern;
732 {
733 struct regexp_list *r;
734 char const *m;
735
736 r = (struct regexp_list *) xmalloc (sizeof (*r));
737 bzero (r, sizeof (*r));
738 r->buf.fastmap = xmalloc (256);
739 m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
740 if (m != 0)
741 diff_error ("%s: %s", pattern, m);
742
743 /* Add to the start of the list, since it's easier than the end. */
744 r->next = *reglist;
745 *reglist = r;
746 }
747
748 static int
try_help(reason)749 try_help (reason)
750 char const *reason;
751 {
752 if (reason)
753 diff_error ("%s", reason, 0);
754 diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
755 return 2;
756 }
757
758 static void
check_output(file)759 check_output (file)
760 FILE *file;
761 {
762 if (ferror (file) || fflush (file) != 0)
763 fatal ("write error");
764 }
765
766 static char const * const option_help[] = {
767 "-i --ignore-case Consider upper- and lower-case to be the same.",
768 "-w --ignore-all-space Ignore all white space.",
769 "-b --ignore-space-change Ignore changes in the amount of white space.",
770 "-B --ignore-blank-lines Ignore changes whose lines are all blank.",
771 "-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE.",
772 #if HAVE_SETMODE
773 "--binary Read and write data in binary mode.",
774 #endif
775 "-a --text Treat all files as text.\n",
776 "-c -C NUM --context[=NUM] Output NUM (default 2) lines of copied context.",
777 "-u -U NUM --unified[=NUM] Output NUM (default 2) lines of unified context.",
778 " -NUM Use NUM context lines.",
779 " -L LABEL --label LABEL Use LABEL instead of file name.",
780 " -p --show-c-function Show which C function each change is in.",
781 " -F RE --show-function-line=RE Show the most recent line matching RE.",
782 "-q --brief Output only whether files differ.",
783 "-e --ed Output an ed script.",
784 "-n --rcs Output an RCS format diff.",
785 "-y --side-by-side Output in two columns.",
786 " -W NUM --width=NUM Output at most NUM (default 130) characters per line.",
787 " --left-column Output only the left column of common lines.",
788 " --suppress-common-lines Do not output common lines.",
789 "-DNAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs.",
790 "--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT.",
791 "--line-format=LFMT Similar, but format all input lines with LFMT.",
792 "--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT.",
793 " LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'.",
794 " GFMT may contain:",
795 " %< lines from FILE1",
796 " %> lines from FILE2",
797 " %= lines common to FILE1 and FILE2",
798 " %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER",
799 " LETTERs are as follows for new group, lower case for old group:",
800 " F first line number",
801 " L last line number",
802 " N number of lines = L-F+1",
803 " E F-1",
804 " M L+1",
805 " LFMT may contain:",
806 " %L contents of line",
807 " %l contents of line, excluding any trailing newline",
808 " %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number",
809 " Either GFMT or LFMT may contain:",
810 " %% %",
811 " %c'C' the single character C",
812 " %c'\\OOO' the character with octal code OOO\n",
813 "-l --paginate Pass the output through `pr' to paginate it.",
814 "-t --expand-tabs Expand tabs to spaces in output.",
815 "-T --initial-tab Make tabs line up by prepending a tab.\n",
816 "-r --recursive Recursively compare any subdirectories found.",
817 "-N --new-file Treat absent files as empty.",
818 "-P --unidirectional-new-file Treat absent first files as empty.",
819 "-s --report-identical-files Report when two files are the same.",
820 "-x PAT --exclude=PAT Exclude files that match PAT.",
821 "-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE.",
822 "-S FILE --starting-file=FILE Start with FILE when comparing directories.\n",
823 "--horizon-lines=NUM Keep NUM lines of the common prefix and suffix.",
824 "-d --minimal Try hard to find a smaller set of changes.",
825 "-H --speed-large-files Assume large files and many scattered small changes.\n",
826 "-v --version Output version info.",
827 "--help Output this help.",
828 0
829 };
830
831 static void
usage()832 usage ()
833 {
834 char const * const *p;
835
836 if (callbacks && callbacks->write_stdout)
837 {
838 (*callbacks->write_stdout) ("Usage: ");
839 (*callbacks->write_stdout) (diff_program_name);
840 (*callbacks->write_stdout) (" [OPTION]... FILE1 FILE2\n\n");
841 for (p = option_help; *p; p++)
842 {
843 (*callbacks->write_stdout) (" ");
844 (*callbacks->write_stdout) (*p);
845 (*callbacks->write_stdout) ("\n");
846 }
847 (*callbacks->write_stdout)
848 ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
849 }
850 else
851 {
852 printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
853 for (p = option_help; *p; p++)
854 printf (" %s\n", *p);
855 printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
856 }
857 }
858
859 static int
specify_format(var,value)860 specify_format (var, value)
861 char **var;
862 char *value;
863 {
864 int err = *var ? strcmp (*var, value) : 0;
865 *var = value;
866 return err;
867 }
868
869 static void
specify_style(style)870 specify_style (style)
871 enum output_style style;
872 {
873 if (output_style != OUTPUT_NORMAL
874 && output_style != style)
875 diff_error ("conflicting specifications of output style", 0, 0);
876 output_style = style;
877 }
878
879 static char const *
filetype(st)880 filetype (st)
881 struct stat const *st;
882 {
883 /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
884 To keep diagnostics grammatical, the returned string must start
885 with a consonant. */
886
887 if (S_ISREG (st->st_mode))
888 {
889 if (st->st_size == 0)
890 return "regular empty file";
891 /* Posix.2 section 5.14.2 seems to suggest that we must read the file
892 and guess whether it's C, Fortran, etc., but this is somewhat useless
893 and doesn't reflect historical practice. We're allowed to guess
894 wrong, so we don't bother to read the file. */
895 return "regular file";
896 }
897 if (S_ISDIR (st->st_mode)) return "directory";
898
899 /* other Posix.1 file types */
900 #ifdef S_ISBLK
901 if (S_ISBLK (st->st_mode)) return "block special file";
902 #endif
903 #ifdef S_ISCHR
904 if (S_ISCHR (st->st_mode)) return "character special file";
905 #endif
906 #ifdef S_ISFIFO
907 if (S_ISFIFO (st->st_mode)) return "fifo";
908 #endif
909
910 /* other Posix.1b file types */
911 #ifdef S_TYPEISMQ
912 if (S_TYPEISMQ (st)) return "message queue";
913 #endif
914 #ifdef S_TYPEISSEM
915 if (S_TYPEISSEM (st)) return "semaphore";
916 #endif
917 #ifdef S_TYPEISSHM
918 if (S_TYPEISSHM (st)) return "shared memory object";
919 #endif
920
921 /* other popular file types */
922 /* S_ISLNK is impossible with `fstat' and `stat'. */
923 #ifdef S_ISSOCK
924 if (S_ISSOCK (st->st_mode)) return "socket";
925 #endif
926
927 return "weird file";
928 }
929
930 /* Compare two files (or dirs) with specified names
931 DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
932 (if DIR0 is 0, then the name is just NAME0, etc.)
933 This is self-contained; it opens the files and closes them.
934
935 Value is 0 if files are the same, 1 if different,
936 2 if there is a problem opening them. */
937
938 static int
compare_files(dir0,name0,dir1,name1,depth)939 compare_files (dir0, name0, dir1, name1, depth)
940 char const *dir0, *dir1;
941 char const *name0, *name1;
942 int depth;
943 {
944 struct file_data inf[2];
945 register int i;
946 int val;
947 int same_files;
948 int failed = 0;
949 char *free0 = 0, *free1 = 0;
950
951 /* If this is directory comparison, perhaps we have a file
952 that exists only in one of the directories.
953 If so, just print a message to that effect. */
954
955 if (! ((name0 != 0 && name1 != 0)
956 || (unidirectional_new_file_flag && name1 != 0)
957 || entire_new_file_flag))
958 {
959 char const *name = name0 == 0 ? name1 : name0;
960 char const *dir = name0 == 0 ? dir1 : dir0;
961 message ("Only in %s: %s\n", dir, name);
962 /* Return 1 so that diff_dirs will return 1 ("some files differ"). */
963 return 1;
964 }
965
966 bzero (inf, sizeof (inf));
967
968 /* Mark any nonexistent file with -1 in the desc field. */
969 /* Mark unopened files (e.g. directories) with -2. */
970
971 inf[0].desc = name0 == 0 ? -1 : -2;
972 inf[1].desc = name1 == 0 ? -1 : -2;
973
974 /* Now record the full name of each file, including nonexistent ones. */
975
976 if (name0 == 0)
977 name0 = name1;
978 if (name1 == 0)
979 name1 = name0;
980
981 inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
982 inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
983
984 /* Stat the files. Record whether they are directories. */
985
986 for (i = 0; i <= 1; i++)
987 {
988 if (inf[i].desc != -1)
989 {
990 int stat_result;
991
992 if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
993 {
994 inf[i].stat = inf[0].stat;
995 stat_result = 0;
996 }
997 else if (strcmp (inf[i].name, "-") == 0)
998 {
999 inf[i].desc = STDIN_FILENO;
1000 stat_result = fstat (STDIN_FILENO, &inf[i].stat);
1001 if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
1002 {
1003 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1004 if (pos == -1)
1005 stat_result = -1;
1006 else
1007 {
1008 if (pos <= inf[i].stat.st_size)
1009 inf[i].stat.st_size -= pos;
1010 else
1011 inf[i].stat.st_size = 0;
1012 /* Posix.2 4.17.6.1.4 requires current time for stdin. */
1013 time (&inf[i].stat.st_mtime);
1014 }
1015 }
1016 }
1017 else
1018 stat_result = stat (inf[i].name, &inf[i].stat);
1019
1020 if (stat_result != 0)
1021 {
1022 perror_with_name (inf[i].name);
1023 failed = 1;
1024 }
1025 else
1026 {
1027 inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
1028 if (inf[1 - i].desc == -1)
1029 {
1030 inf[1 - i].dir_p = inf[i].dir_p;
1031 inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
1032 }
1033 }
1034 }
1035 }
1036
1037 if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
1038 {
1039 /* If one is a directory, and it was specified in the command line,
1040 use the file in that dir with the other file's basename. */
1041
1042 int fnm_arg = inf[0].dir_p;
1043 int dir_arg = 1 - fnm_arg;
1044 char const *fnm = inf[fnm_arg].name;
1045 char const *dir = inf[dir_arg].name;
1046 char const *p = filename_lastdirchar (fnm);
1047 char const *filename = inf[dir_arg].name
1048 = dir_file_pathname (dir, p ? p + 1 : fnm);
1049
1050 if (strcmp (fnm, "-") == 0)
1051 fatal ("can't compare - to a directory");
1052
1053 if (stat (filename, &inf[dir_arg].stat) != 0)
1054 {
1055 perror_with_name (filename);
1056 failed = 1;
1057 }
1058 else
1059 inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1060 }
1061
1062 if (failed)
1063 {
1064
1065 /* If either file should exist but does not, return 2. */
1066
1067 val = 2;
1068
1069 }
1070 else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1071 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1072 && no_diff_means_no_output)
1073 {
1074 /* The two named files are actually the same physical file.
1075 We know they are identical without actually reading them. */
1076
1077 val = 0;
1078 }
1079 else if (inf[0].dir_p & inf[1].dir_p)
1080 {
1081 if (output_style == OUTPUT_IFDEF)
1082 fatal ("-D option not supported with directories");
1083
1084 /* If both are directories, compare the files in them. */
1085
1086 if (depth > 0 && !recursive)
1087 {
1088 /* But don't compare dir contents one level down
1089 unless -r was specified. */
1090 message ("Common subdirectories: %s and %s\n",
1091 inf[0].name, inf[1].name);
1092 val = 0;
1093 }
1094 else
1095 {
1096 val = diff_dirs (inf, compare_files, depth);
1097 }
1098
1099 }
1100 else if ((inf[0].dir_p | inf[1].dir_p)
1101 || (depth > 0
1102 && (! S_ISREG (inf[0].stat.st_mode)
1103 || ! S_ISREG (inf[1].stat.st_mode))))
1104 {
1105 /* Perhaps we have a subdirectory that exists only in one directory.
1106 If so, just print a message to that effect. */
1107
1108 if (inf[0].desc == -1 || inf[1].desc == -1)
1109 {
1110 if ((inf[0].dir_p | inf[1].dir_p)
1111 && recursive
1112 && (entire_new_file_flag
1113 || (unidirectional_new_file_flag && inf[0].desc == -1)))
1114 val = diff_dirs (inf, compare_files, depth);
1115 else
1116 {
1117 char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1118 /* See Posix.2 section 4.17.6.1.1 for this format. */
1119 message ("Only in %s: %s\n", dir, name0);
1120 val = 1;
1121 }
1122 }
1123 else
1124 {
1125 /* We have two files that are not to be compared. */
1126
1127 /* See Posix.2 section 4.17.6.1.1 for this format. */
1128 message5 ("File %s is a %s while file %s is a %s\n",
1129 inf[0].name, filetype (&inf[0].stat),
1130 inf[1].name, filetype (&inf[1].stat));
1131
1132 /* This is a difference. */
1133 val = 1;
1134 }
1135 }
1136 else if ((no_details_flag & ~ignore_some_changes)
1137 && inf[0].stat.st_size != inf[1].stat.st_size
1138 && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1139 && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1140 {
1141 message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1142 val = 1;
1143 }
1144 else
1145 {
1146 /* Both exist and neither is a directory. */
1147
1148 /* Open the files and record their descriptors. */
1149
1150 if (inf[0].desc == -2)
1151 if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1152 {
1153 perror_with_name (inf[0].name);
1154 failed = 1;
1155 }
1156 if (inf[1].desc == -2)
1157 {
1158 if (same_files)
1159 inf[1].desc = inf[0].desc;
1160 else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1161 {
1162 perror_with_name (inf[1].name);
1163 failed = 1;
1164 }
1165 }
1166
1167 #if HAVE_SETMODE
1168 if (binary_I_O)
1169 for (i = 0; i <= 1; i++)
1170 if (0 <= inf[i].desc)
1171 setmode (inf[i].desc, O_BINARY);
1172 #endif
1173
1174 /* Compare the files, if no error was found. */
1175
1176 val = failed ? 2 : diff_2_files (inf, depth);
1177
1178 /* Close the file descriptors. */
1179
1180 if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1181 {
1182 perror_with_name (inf[0].name);
1183 val = 2;
1184 }
1185 if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1186 && close (inf[1].desc) != 0)
1187 {
1188 perror_with_name (inf[1].name);
1189 val = 2;
1190 }
1191 }
1192
1193 /* Now the comparison has been done, if no error prevented it,
1194 and VAL is the value this function will return. */
1195
1196 if (val == 0 && !inf[0].dir_p)
1197 {
1198 if (print_file_same_flag)
1199 message ("Files %s and %s are identical\n",
1200 inf[0].name, inf[1].name);
1201 }
1202 else
1203 flush_output ();
1204
1205 if (free0)
1206 free (free0);
1207 if (free1)
1208 free (free1);
1209
1210 return val;
1211 }
1212
1213 /* Initialize status variables and flag variables used in libdiff,
1214 to permit repeated calls to diff_run. */
1215
1216 static void
initialize_main(argcp,argvp)1217 initialize_main (argcp, argvp)
1218 int *argcp;
1219 char ***argvp;
1220 {
1221 /* These variables really must be reset each time diff_run is called. */
1222 output_style = OUTPUT_NORMAL;
1223 context = -1;
1224 file_label[0] = NULL;
1225 file_label[1] = NULL;
1226 diff_program_name = (*argvp)[0];
1227 outfile = NULL;
1228
1229 /* Reset these also, just for safety's sake. (If one invocation turns
1230 on ignore_case_flag, it must be turned off before diff_run is called
1231 again. But it is possible to make many diffs before encountering
1232 such a problem. */
1233 recursive = 0;
1234 no_discards = 0;
1235 #if HAVE_SETMODE
1236 binary_I_O = 0;
1237 #endif
1238 no_diff_means_no_output = 0;
1239 always_text_flag = 0;
1240 horizon_lines = 0;
1241 ignore_space_change_flag = 0;
1242 ignore_all_space_flag = 0;
1243 ignore_blank_lines_flag = 0;
1244 ignore_some_line_changes = 0;
1245 ignore_some_changes = 0;
1246 ignore_case_flag = 0;
1247 function_regexp_list = NULL;
1248 ignore_regexp_list = NULL;
1249 no_details_flag = 0;
1250 print_file_same_flag = 0;
1251 tab_align_flag = 0;
1252 tab_expand_flag = 0;
1253 dir_start_file = NULL;
1254 entire_new_file_flag = 0;
1255 unidirectional_new_file_flag = 0;
1256 paginate_flag = 0;
1257 bzero (group_format, sizeof (group_format));
1258 bzero (line_format, sizeof (line_format));
1259 sdiff_help_sdiff = 0;
1260 sdiff_left_only = 0;
1261 sdiff_skip_common_lines = 0;
1262 sdiff_half_width = 0;
1263 sdiff_column2_offset = 0;
1264 switch_string = NULL;
1265 heuristic = 0;
1266 bzero (files, sizeof (files));
1267 }
1268