1 /* GNU DIFF entry routine.
2 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU DIFF.
5
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 */
17
18 /* GNU DIFF was written by Mike Haertel, David Hayes,
19 Richard Stallman, Len Tower, and Paul Eggert. */
20
21 #define GDIFF_MAIN
22 #include "diff.h"
23 #include <signal.h>
24 #include "getopt.h"
25 #include "fnmatch.h"
26
27 #ifndef DEFAULT_WIDTH
28 #define DEFAULT_WIDTH 130
29 #endif
30
31 #ifndef GUTTER_WIDTH_MINIMUM
32 #define GUTTER_WIDTH_MINIMUM 3
33 #endif
34
35 /* diff.c has a real initialize_main function. */
36 #ifdef initialize_main
37 #undef initialize_main
38 #endif
39
40 static char const *filetype PARAMS((struct stat const *));
41 static char *option_list PARAMS((char **, int));
42 static int add_exclude_file PARAMS((char const *));
43 static int ck_atoi PARAMS((char const *, int *));
44 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
45 static int specify_format PARAMS((char **, char *));
46 static void add_exclude PARAMS((char const *));
47 static void add_regexp PARAMS((struct regexp_list **, char const *));
48 static void specify_style PARAMS((enum output_style));
49 static int try_help PARAMS((char const *));
50 static void check_output PARAMS((FILE *));
51 static void usage PARAMS((void));
52 static void initialize_main PARAMS((int *, char ***));
53
54 /* Nonzero for -r: if comparing two directories,
55 compare their common subdirectories recursively. */
56
57 static int recursive;
58
59 /* For debugging: don't do discard_confusing_lines. */
60
61 int no_discards;
62
63 #if HAVE_SETMODE
64 /* I/O mode: nonzero only if using binary input/output. */
65 static int binary_I_O;
66 #endif
67
68 /* Return a string containing the command options with which diff was invoked.
69 Spaces appear between what were separate ARGV-elements.
70 There is a space at the beginning but none at the end.
71 If there were no options, the result is an empty string.
72
73 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
74 the length of that vector. */
75
76 static char *
option_list(optionvec,count)77 option_list (optionvec, count)
78 char **optionvec; /* Was `vector', but that collides on Alliant. */
79 int count;
80 {
81 int i;
82 size_t length = 0;
83 char *result;
84
85 for (i = 0; i < count; i++)
86 length += strlen (optionvec[i]) + 1;
87
88 result = xmalloc (length + 1);
89 result[0] = 0;
90
91 for (i = 0; i < count; i++)
92 {
93 strcat (result, " ");
94 strcat (result, optionvec[i]);
95 }
96
97 return result;
98 }
99
100 /* Convert STR to a positive integer, storing the result in *OUT.
101 If STR is not a valid integer, return -1 (otherwise 0). */
102 static int
ck_atoi(str,out)103 ck_atoi (str, out)
104 char const *str;
105 int *out;
106 {
107 char const *p;
108 for (p = str; *p; p++)
109 if (*p < '0' || *p > '9')
110 return -1;
111
112 *out = atoi (optarg);
113 return 0;
114 }
115
116 /* Keep track of excluded file name patterns. */
117
118 static char const **exclude;
119 static int exclude_alloc, exclude_count;
120
121 int
excluded_filename(f)122 excluded_filename (f)
123 char const *f;
124 {
125 int i;
126 for (i = 0; i < exclude_count; i++)
127 if (fnmatch (exclude[i], f, 0) == 0)
128 return 1;
129 return 0;
130 }
131
132 static void
add_exclude(pattern)133 add_exclude (pattern)
134 char const *pattern;
135 {
136 if (exclude_alloc <= exclude_count)
137 exclude = (char const **)
138 (exclude_alloc == 0
139 ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
140 : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
141
142 exclude[exclude_count++] = pattern;
143 }
144
145 static int
add_exclude_file(name)146 add_exclude_file (name)
147 char const *name;
148 {
149 struct file_data f;
150 char *p, *q, *lim;
151
152 f.name = optarg;
153 f.desc = (strcmp (optarg, "-") == 0
154 ? STDIN_FILENO
155 : open (optarg, O_RDONLY, 0));
156 if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
157 return -1;
158
159 sip (&f, 1);
160 slurp (&f);
161
162 for (p = f.buffer, lim = p + f.buffered_chars; p < lim; p = q)
163 {
164 q = (char *) memchr (p, '\n', lim - p);
165 if (!q)
166 q = lim;
167 *q++ = 0;
168 add_exclude (p);
169 }
170
171 return close (f.desc);
172 }
173
174 /* The numbers 129- that appear in the fourth element of some entries
175 tell the big switch in `diff_run' how to process those options. */
176
177 static struct option const longopts[] =
178 {
179 {"ignore-blank-lines", 0, 0, 'B'},
180 {"context", 2, 0, 'C'},
181 {"ifdef", 1, 0, 'D'},
182 {"show-function-line", 1, 0, 'F'},
183 {"speed-large-files", 0, 0, 'H'},
184 {"ignore-matching-lines", 1, 0, 'I'},
185 {"label", 1, 0, 'L'},
186 {"file-label", 1, 0, 'L'}, /* An alias, no longer recommended */
187 {"new-file", 0, 0, 'N'},
188 {"entire-new-file", 0, 0, 'N'}, /* An alias, no longer recommended */
189 {"unidirectional-new-file", 0, 0, 'P'},
190 {"starting-file", 1, 0, 'S'},
191 {"initial-tab", 0, 0, 'T'},
192 {"width", 1, 0, 'W'},
193 {"text", 0, 0, 'a'},
194 {"ascii", 0, 0, 'a'}, /* An alias, no longer recommended */
195 {"ignore-space-change", 0, 0, 'b'},
196 {"minimal", 0, 0, 'd'},
197 {"ed", 0, 0, 'e'},
198 {"forward-ed", 0, 0, 'f'},
199 {"ignore-case", 0, 0, 'i'},
200 {"paginate", 0, 0, 'l'},
201 {"print", 0, 0, 'l'}, /* An alias, no longer recommended */
202 {"rcs", 0, 0, 'n'},
203 {"show-c-function", 0, 0, 'p'},
204 {"brief", 0, 0, 'q'},
205 {"recursive", 0, 0, 'r'},
206 {"report-identical-files", 0, 0, 's'},
207 {"expand-tabs", 0, 0, 't'},
208 {"version", 0, 0, 'v'},
209 {"ignore-all-space", 0, 0, 'w'},
210 {"exclude", 1, 0, 'x'},
211 {"exclude-from", 1, 0, 'X'},
212 {"side-by-side", 0, 0, 'y'},
213 {"unified", 2, 0, 'U'},
214 {"left-column", 0, 0, 129},
215 {"suppress-common-lines", 0, 0, 130},
216 {"sdiff-merge-assist", 0, 0, 131},
217 {"old-line-format", 1, 0, 132},
218 {"new-line-format", 1, 0, 133},
219 {"unchanged-line-format", 1, 0, 134},
220 {"line-format", 1, 0, 135},
221 {"old-group-format", 1, 0, 136},
222 {"new-group-format", 1, 0, 137},
223 {"unchanged-group-format", 1, 0, 138},
224 {"changed-group-format", 1, 0, 139},
225 {"horizon-lines", 1, 0, 140},
226 {"help", 0, 0, 141},
227 {"binary", 0, 0, 142},
228 {0, 0, 0, 0}
229 };
230
231 int
diff_run(argc,argv,out,callbacks_arg)232 diff_run (argc, argv, out, callbacks_arg)
233 int argc;
234 char *argv[];
235 char *out;
236 const struct diff_callbacks *callbacks_arg;
237 {
238 int val;
239 int c;
240 int prev = -1;
241 int width = DEFAULT_WIDTH;
242 int show_c_function = 0;
243 int optind_old;
244 int opened_file = 0;
245
246 callbacks = callbacks_arg;
247
248 /* Do our initializations. */
249 initialize_main (&argc, &argv);
250 optind_old = optind;
251 optind = 0;
252
253 /* Set the jump buffer, so that diff may abort execution without
254 terminating the process. */
255 val = setjmp (diff_abort_buf);
256 if (val != 0)
257 {
258 optind = optind_old;
259 if (opened_file)
260 fclose (outfile);
261 return val;
262 }
263
264 /* Decode the options. */
265 while ((c = getopt_long (argc, argv,
266 "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
267 longopts, 0)) != EOF)
268 {
269 switch (c)
270 {
271 /* All digits combine in decimal to specify the context-size. */
272 case '1':
273 case '2':
274 case '3':
275 case '4':
276 case '5':
277 case '6':
278 case '7':
279 case '8':
280 case '9':
281 case '0':
282 if (context == -1)
283 context = 0;
284 /* If a context length has already been specified,
285 more digits allowed only if they follow right after the others.
286 Reject two separate runs of digits, or digits after -C. */
287 else if (prev < '0' || prev > '9')
288 fatal ("context length specified twice");
289
290 context = context * 10 + c - '0';
291 break;
292
293 case 'a':
294 /* Treat all files as text files; never treat as binary. */
295 always_text_flag = 1;
296 break;
297
298 case 'b':
299 /* Ignore changes in amount of white space. */
300 ignore_space_change_flag = 1;
301 ignore_some_changes = 1;
302 ignore_some_line_changes = 1;
303 break;
304
305 case 'B':
306 /* Ignore changes affecting only blank lines. */
307 ignore_blank_lines_flag = 1;
308 ignore_some_changes = 1;
309 break;
310
311 case 'C': /* +context[=lines] */
312 case 'U': /* +unified[=lines] */
313 if (optarg)
314 {
315 if (context >= 0)
316 fatal ("context length specified twice");
317
318 if (ck_atoi (optarg, &context))
319 fatal ("invalid context length argument");
320 }
321
322 /* Falls through. */
323 case 'c':
324 /* Make context-style output. */
325 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
326 break;
327
328 case 'd':
329 /* Don't discard lines. This makes things slower (sometimes much
330 slower) but will find a guaranteed minimal set of changes. */
331 no_discards = 1;
332 break;
333
334 case 'D':
335 /* Make merged #ifdef output. */
336 specify_style (OUTPUT_IFDEF);
337 {
338 int i, err = 0;
339 static char const C_ifdef_group_formats[] =
340 "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
341 char *b = xmalloc (sizeof (C_ifdef_group_formats)
342 + 7 * strlen(optarg) - 14 /* 7*"%s" */
343 - 8 /* 5*"%%" + 3*"%c" */);
344 sprintf (b, C_ifdef_group_formats,
345 optarg, optarg, 0,
346 optarg, optarg, 0, 0,
347 optarg, optarg, optarg);
348 for (i = 0; i < 4; i++)
349 {
350 err |= specify_format (&group_format[i], b);
351 b += strlen (b) + 1;
352 }
353 if (err)
354 diff_error ("conflicting #ifdef formats", 0, 0);
355 }
356 break;
357
358 case 'e':
359 /* Make output that is a valid `ed' script. */
360 specify_style (OUTPUT_ED);
361 break;
362
363 case 'f':
364 /* Make output that looks vaguely like an `ed' script
365 but has changes in the order they appear in the file. */
366 specify_style (OUTPUT_FORWARD_ED);
367 break;
368
369 case 'F':
370 /* Show, for each set of changes, the previous line that
371 matches the specified regexp. Currently affects only
372 context-style output. */
373 add_regexp (&function_regexp_list, optarg);
374 break;
375
376 case 'h':
377 /* Split the files into chunks of around 1500 lines
378 for faster processing. Usually does not change the result.
379
380 This currently has no effect. */
381 break;
382
383 case 'H':
384 /* Turn on heuristics that speed processing of large files
385 with a small density of changes. */
386 heuristic = 1;
387 break;
388
389 case 'i':
390 /* Ignore changes in case. */
391 ignore_case_flag = 1;
392 ignore_some_changes = 1;
393 ignore_some_line_changes = 1;
394 break;
395
396 case 'I':
397 /* Ignore changes affecting only lines that match the
398 specified regexp. */
399 add_regexp (&ignore_regexp_list, optarg);
400 ignore_some_changes = 1;
401 break;
402
403 case 'l':
404 /* Pass the output through `pr' to paginate it. */
405 paginate_flag = 1;
406 #if !defined(SIGCHLD) && defined(SIGCLD)
407 #define SIGCHLD SIGCLD
408 #endif
409 #ifdef SIGCHLD
410 /* Pagination requires forking and waiting, and
411 System V fork+wait does not work if SIGCHLD is ignored. */
412 signal (SIGCHLD, SIG_DFL);
413 #endif
414 break;
415
416 case 'L':
417 /* Specify file labels for `-c' output headers. */
418 if (!file_label[0])
419 file_label[0] = optarg;
420 else if (!file_label[1])
421 file_label[1] = optarg;
422 else
423 fatal ("too many file label options");
424 break;
425
426 case 'n':
427 /* Output RCS-style diffs, like `-f' except that each command
428 specifies the number of lines affected. */
429 specify_style (OUTPUT_RCS);
430 break;
431
432 case 'N':
433 /* When comparing directories, if a file appears only in one
434 directory, treat it as present but empty in the other. */
435 entire_new_file_flag = 1;
436 break;
437
438 case 'p':
439 /* Make context-style output and show name of last C function. */
440 show_c_function = 1;
441 add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
442 break;
443
444 case 'P':
445 /* When comparing directories, if a file appears only in
446 the second directory of the two,
447 treat it as present but empty in the other. */
448 unidirectional_new_file_flag = 1;
449 break;
450
451 case 'q':
452 no_details_flag = 1;
453 break;
454
455 case 'r':
456 /* When comparing directories,
457 recursively compare any subdirectories found. */
458 recursive = 1;
459 break;
460
461 case 's':
462 /* Print a message if the files are the same. */
463 print_file_same_flag = 1;
464 break;
465
466 case 'S':
467 /* When comparing directories, start with the specified
468 file name. This is used for resuming an aborted comparison. */
469 dir_start_file = optarg;
470 break;
471
472 case 't':
473 /* Expand tabs to spaces in the output so that it preserves
474 the alignment of the input files. */
475 tab_expand_flag = 1;
476 break;
477
478 case 'T':
479 /* Use a tab in the output, rather than a space, before the
480 text of an input line, so as to keep the proper alignment
481 in the input line without changing the characters in it. */
482 tab_align_flag = 1;
483 break;
484
485 case 'u':
486 /* Output the context diff in unidiff format. */
487 specify_style (OUTPUT_UNIFIED);
488 break;
489
490 case 'v':
491 if (callbacks && callbacks->write_stdout)
492 {
493 (*callbacks->write_stdout) ("diff - GNU diffutils version ");
494 (*callbacks->write_stdout) (diff_version_string);
495 (*callbacks->write_stdout) ("\n");
496 }
497 else
498 printf ("diff - GNU diffutils version %s\n", diff_version_string);
499 return 0;
500
501 case 'w':
502 /* Ignore horizontal white space when comparing lines. */
503 ignore_all_space_flag = 1;
504 ignore_some_changes = 1;
505 ignore_some_line_changes = 1;
506 break;
507
508 case 'x':
509 add_exclude (optarg);
510 break;
511
512 case 'X':
513 if (add_exclude_file (optarg) != 0)
514 pfatal_with_name (optarg);
515 break;
516
517 case 'y':
518 /* Use side-by-side (sdiff-style) columnar output. */
519 specify_style (OUTPUT_SDIFF);
520 break;
521
522 case 'W':
523 /* Set the line width for OUTPUT_SDIFF. */
524 if (ck_atoi (optarg, &width) || width <= 0)
525 fatal ("column width must be a positive integer");
526 break;
527
528 case 129:
529 sdiff_left_only = 1;
530 break;
531
532 case 130:
533 sdiff_skip_common_lines = 1;
534 break;
535
536 case 131:
537 /* sdiff-style columns output. */
538 specify_style (OUTPUT_SDIFF);
539 sdiff_help_sdiff = 1;
540 break;
541
542 case 132:
543 case 133:
544 case 134:
545 specify_style (OUTPUT_IFDEF);
546 if (specify_format (&line_format[c - 132], optarg) != 0)
547 diff_error ("conflicting line format", 0, 0);
548 break;
549
550 case 135:
551 specify_style (OUTPUT_IFDEF);
552 {
553 int i, err = 0;
554 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
555 err |= specify_format (&line_format[i], optarg);
556 if (err)
557 diff_error ("conflicting line format", 0, 0);
558 }
559 break;
560
561 case 136:
562 case 137:
563 case 138:
564 case 139:
565 specify_style (OUTPUT_IFDEF);
566 if (specify_format (&group_format[c - 136], optarg) != 0)
567 diff_error ("conflicting group format", 0, 0);
568 break;
569
570 case 140:
571 if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
572 fatal ("horizon must be a nonnegative integer");
573 break;
574
575 case 141:
576 usage ();
577 if (! callbacks || ! callbacks->write_stdout)
578 check_output (stdout);
579 return 0;
580
581 case 142:
582 /* Use binary I/O when reading and writing data.
583 On Posix hosts, this has no effect. */
584 #if HAVE_SETMODE
585 binary_I_O = 1;
586 # if 0
587 /* Because this code is leftover from pre-library days,
588 there is no way to set stdout back to the default mode
589 when we are done. As it turns out, I think the only
590 parts of CVS that pass out == NULL, and thus cause diff
591 to write to stdout, are "cvs diff" and "cvs rdiff". So
592 I'm not going to worry about this too much yet. */
593 setmode (STDOUT_FILENO, O_BINARY);
594 # else
595 if (out == NULL)
596 error (0, 0, "warning: did not set stdout to binary mode");
597 # endif
598 #endif
599 break;
600
601 default:
602 return try_help (0);
603 }
604 prev = c;
605 }
606
607 if (argc - optind != 2)
608 return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
609
610 {
611 /*
612 * We maximize first the half line width, and then the gutter width,
613 * according to the following constraints:
614 * 1. Two half lines plus a gutter must fit in a line.
615 * 2. If the half line width is nonzero:
616 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
617 * b. If tabs are not expanded to spaces,
618 * a half line plus a gutter is an integral number of tabs,
619 * so that tabs in the right column line up.
620 */
621 int t = tab_expand_flag ? 1 : TAB_WIDTH;
622 int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t) * t;
623 sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
624 sdiff_column2_offset = sdiff_half_width ? off : width;
625 }
626
627 if (show_c_function && output_style != OUTPUT_UNIFIED)
628 specify_style (OUTPUT_CONTEXT);
629
630 if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
631 context = 0;
632 else if (context == -1)
633 /* Default amount of context for -c. */
634 context = 3;
635
636 if (output_style == OUTPUT_IFDEF)
637 {
638 /* Format arrays are char *, not char const *,
639 because integer formats are temporarily modified.
640 But it is safe to assign a constant like "%=" to a format array,
641 since "%=" does not format any integers. */
642 int i;
643 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
644 if (!line_format[i])
645 line_format[i] = "%l\n";
646 if (!group_format[OLD])
647 group_format[OLD]
648 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
649 if (!group_format[NEW])
650 group_format[NEW]
651 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
652 if (!group_format[UNCHANGED])
653 group_format[UNCHANGED] = "%=";
654 if (!group_format[CHANGED])
655 group_format[CHANGED] = concat (group_format[OLD],
656 group_format[NEW], "");
657 }
658
659 no_diff_means_no_output =
660 (output_style == OUTPUT_IFDEF ?
661 (!*group_format[UNCHANGED]
662 || (strcmp (group_format[UNCHANGED], "%=") == 0
663 && !*line_format[UNCHANGED]))
664 : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
665
666 switch_string = option_list (argv + 1, optind - 1);
667
668 if (callbacks && callbacks->write_output)
669 {
670 if (out != NULL)
671 {
672 diff_error ("write callback with output file", 0, 0);
673 return 2;
674 }
675 }
676 else
677 {
678 if (out == NULL)
679 outfile = stdout;
680 else
681 {
682 #if HAVE_SETMODE
683 /* A diff which is full of ^Z and such isn't going to work
684 very well in text mode. */
685 if (binary_I_O)
686 outfile = fopen (out, "wb");
687 else
688 #endif
689 outfile = fopen (out, "w");
690 if (outfile == NULL)
691 {
692 perror_with_name ("could not open output file");
693 return 2;
694 }
695 opened_file = 1;
696 }
697 }
698
699 val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
700
701 /* Print any messages that were saved up for last. */
702 print_message_queue ();
703
704 free (switch_string);
705
706 optind = optind_old;
707
708 if (! callbacks || ! callbacks->write_output)
709 check_output (outfile);
710
711 if (opened_file)
712 if (fclose (outfile) != 0)
713 perror_with_name ("close error on output file");
714
715 return val;
716 }
717
718 /* Add the compiled form of regexp PATTERN to REGLIST. */
719
720 static void
add_regexp(reglist,pattern)721 add_regexp (reglist, pattern)
722 struct regexp_list **reglist;
723 char const *pattern;
724 {
725 struct regexp_list *r;
726 char const *m;
727
728 r = (struct regexp_list *) xmalloc (sizeof (*r));
729 bzero (r, sizeof (*r));
730 r->buf.fastmap = xmalloc (256);
731 m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
732 if (m != 0)
733 diff_error ("%s: %s", pattern, m);
734
735 /* Add to the start of the list, since it's easier than the end. */
736 r->next = *reglist;
737 *reglist = r;
738 }
739
740 static int
try_help(reason)741 try_help (reason)
742 char const *reason;
743 {
744 if (reason)
745 diff_error ("%s", reason, 0);
746 diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
747 return 2;
748 }
749
750 static void
check_output(file)751 check_output (file)
752 FILE *file;
753 {
754 if (ferror (file) || fflush (file) != 0)
755 fatal ("write error");
756 }
757
758 static char const * const option_help[] = {
759 "-i --ignore-case Consider upper- and lower-case to be the same.",
760 "-w --ignore-all-space Ignore all white space.",
761 "-b --ignore-space-change Ignore changes in the amount of white space.",
762 "-B --ignore-blank-lines Ignore changes whose lines are all blank.",
763 "-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE.",
764 #if HAVE_SETMODE
765 "--binary Read and write data in binary mode.",
766 #endif
767 "-a --text Treat all files as text.\n",
768 "-c -C NUM --context[=NUM] Output NUM (default 2) lines of copied context.",
769 "-u -U NUM --unified[=NUM] Output NUM (default 2) lines of unified context.",
770 " -NUM Use NUM context lines.",
771 " -L LABEL --label LABEL Use LABEL instead of file name.",
772 " -p --show-c-function Show which C function each change is in.",
773 " -F RE --show-function-line=RE Show the most recent line matching RE.",
774 "-q --brief Output only whether files differ.",
775 "-e --ed Output an ed script.",
776 "-n --rcs Output an RCS format diff.",
777 "-y --side-by-side Output in two columns.",
778 " -W NUM --width=NUM Output at most NUM (default 130) characters per line.",
779 " --left-column Output only the left column of common lines.",
780 " --suppress-common-lines Do not output common lines.",
781 "-DNAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs.",
782 "--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT.",
783 "--line-format=LFMT Similar, but format all input lines with LFMT.",
784 "--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT.",
785 " LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'.",
786 " GFMT may contain:",
787 " %< lines from FILE1",
788 " %> lines from FILE2",
789 " %= lines common to FILE1 and FILE2",
790 " %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER",
791 " LETTERs are as follows for new group, lower case for old group:",
792 " F first line number",
793 " L last line number",
794 " N number of lines = L-F+1",
795 " E F-1",
796 " M L+1",
797 " LFMT may contain:",
798 " %L contents of line",
799 " %l contents of line, excluding any trailing newline",
800 " %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number",
801 " Either GFMT or LFMT may contain:",
802 " %% %",
803 " %c'C' the single character C",
804 " %c'\\OOO' the character with octal code OOO\n",
805 "-l --paginate Pass the output through `pr' to paginate it.",
806 "-t --expand-tabs Expand tabs to spaces in output.",
807 "-T --initial-tab Make tabs line up by prepending a tab.\n",
808 "-r --recursive Recursively compare any subdirectories found.",
809 "-N --new-file Treat absent files as empty.",
810 "-P --unidirectional-new-file Treat absent first files as empty.",
811 "-s --report-identical-files Report when two files are the same.",
812 "-x PAT --exclude=PAT Exclude files that match PAT.",
813 "-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE.",
814 "-S FILE --starting-file=FILE Start with FILE when comparing directories.\n",
815 "--horizon-lines=NUM Keep NUM lines of the common prefix and suffix.",
816 "-d --minimal Try hard to find a smaller set of changes.",
817 "-H --speed-large-files Assume large files and many scattered small changes.\n",
818 "-v --version Output version info.",
819 "--help Output this help.",
820 0
821 };
822
823 static void
usage()824 usage ()
825 {
826 char const * const *p;
827
828 if (callbacks && callbacks->write_stdout)
829 {
830 (*callbacks->write_stdout) ("Usage: ");
831 (*callbacks->write_stdout) (diff_program_name);
832 (*callbacks->write_stdout) (" [OPTION]... FILE1 FILE2\n\n");
833 for (p = option_help; *p; p++)
834 {
835 (*callbacks->write_stdout) (" ");
836 (*callbacks->write_stdout) (*p);
837 (*callbacks->write_stdout) ("\n");
838 }
839 (*callbacks->write_stdout)
840 ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
841 }
842 else
843 {
844 printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
845 for (p = option_help; *p; p++)
846 printf (" %s\n", *p);
847 printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
848 }
849 }
850
851 static int
specify_format(var,value)852 specify_format (var, value)
853 char **var;
854 char *value;
855 {
856 int err = *var ? strcmp (*var, value) : 0;
857 *var = value;
858 return err;
859 }
860
861 static void
specify_style(style)862 specify_style (style)
863 enum output_style style;
864 {
865 if (output_style != OUTPUT_NORMAL
866 && output_style != style)
867 diff_error ("conflicting specifications of output style", 0, 0);
868 output_style = style;
869 }
870
871 static char const *
filetype(st)872 filetype (st)
873 struct stat const *st;
874 {
875 /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
876 To keep diagnostics grammatical, the returned string must start
877 with a consonant. */
878
879 if (S_ISREG (st->st_mode))
880 {
881 if (st->st_size == 0)
882 return "regular empty file";
883 /* Posix.2 section 5.14.2 seems to suggest that we must read the file
884 and guess whether it's C, Fortran, etc., but this is somewhat useless
885 and doesn't reflect historical practice. We're allowed to guess
886 wrong, so we don't bother to read the file. */
887 return "regular file";
888 }
889 if (S_ISDIR (st->st_mode)) return "directory";
890
891 /* other Posix.1 file types */
892 #ifdef S_ISBLK
893 if (S_ISBLK (st->st_mode)) return "block special file";
894 #endif
895 #ifdef S_ISCHR
896 if (S_ISCHR (st->st_mode)) return "character special file";
897 #endif
898 #ifdef S_ISFIFO
899 if (S_ISFIFO (st->st_mode)) return "fifo";
900 #endif
901
902 /* other Posix.1b file types */
903 #ifdef S_TYPEISMQ
904 if (S_TYPEISMQ (st)) return "message queue";
905 #endif
906 #ifdef S_TYPEISSEM
907 if (S_TYPEISSEM (st)) return "semaphore";
908 #endif
909 #ifdef S_TYPEISSHM
910 if (S_TYPEISSHM (st)) return "shared memory object";
911 #endif
912
913 /* other popular file types */
914 /* S_ISLNK is impossible with `fstat' and `stat'. */
915 #ifdef S_ISSOCK
916 if (S_ISSOCK (st->st_mode)) return "socket";
917 #endif
918
919 return "weird file";
920 }
921
922 /* Compare two files (or dirs) with specified names
923 DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
924 (if DIR0 is 0, then the name is just NAME0, etc.)
925 This is self-contained; it opens the files and closes them.
926
927 Value is 0 if files are the same, 1 if different,
928 2 if there is a problem opening them. */
929
930 static int
compare_files(dir0,name0,dir1,name1,depth)931 compare_files (dir0, name0, dir1, name1, depth)
932 char const *dir0, *dir1;
933 char const *name0, *name1;
934 int depth;
935 {
936 struct file_data inf[2];
937 register int i;
938 int val;
939 int same_files;
940 int failed = 0;
941 char *free0 = 0, *free1 = 0;
942
943 /* If this is directory comparison, perhaps we have a file
944 that exists only in one of the directories.
945 If so, just print a message to that effect. */
946
947 if (! ((name0 != 0 && name1 != 0)
948 || (unidirectional_new_file_flag && name1 != 0)
949 || entire_new_file_flag))
950 {
951 char const *name = name0 == 0 ? name1 : name0;
952 char const *dir = name0 == 0 ? dir1 : dir0;
953 message ("Only in %s: %s\n", dir, name);
954 /* Return 1 so that diff_dirs will return 1 ("some files differ"). */
955 return 1;
956 }
957
958 bzero (inf, sizeof (inf));
959
960 /* Mark any nonexistent file with -1 in the desc field. */
961 /* Mark unopened files (e.g. directories) with -2. */
962
963 inf[0].desc = name0 == 0 ? -1 : -2;
964 inf[1].desc = name1 == 0 ? -1 : -2;
965
966 /* Now record the full name of each file, including nonexistent ones. */
967
968 if (name0 == 0)
969 name0 = name1;
970 if (name1 == 0)
971 name1 = name0;
972
973 inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
974 inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
975
976 /* Stat the files. Record whether they are directories. */
977
978 for (i = 0; i <= 1; i++)
979 {
980 if (inf[i].desc != -1)
981 {
982 int stat_result;
983
984 if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
985 {
986 inf[i].stat = inf[0].stat;
987 stat_result = 0;
988 }
989 else if (strcmp (inf[i].name, "-") == 0)
990 {
991 inf[i].desc = STDIN_FILENO;
992 stat_result = fstat (STDIN_FILENO, &inf[i].stat);
993 if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
994 {
995 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
996 if (pos == -1)
997 stat_result = -1;
998 else
999 {
1000 if (pos <= inf[i].stat.st_size)
1001 inf[i].stat.st_size -= pos;
1002 else
1003 inf[i].stat.st_size = 0;
1004 /* Posix.2 4.17.6.1.4 requires current time for stdin. */
1005 time (&inf[i].stat.st_mtime);
1006 }
1007 }
1008 }
1009 else
1010 stat_result = stat (inf[i].name, &inf[i].stat);
1011
1012 if (stat_result != 0)
1013 {
1014 perror_with_name (inf[i].name);
1015 failed = 1;
1016 }
1017 else
1018 {
1019 inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
1020 if (inf[1 - i].desc == -1)
1021 {
1022 inf[1 - i].dir_p = inf[i].dir_p;
1023 inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
1024 }
1025 }
1026 }
1027 }
1028
1029 if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
1030 {
1031 /* If one is a directory, and it was specified in the command line,
1032 use the file in that dir with the other file's basename. */
1033
1034 int fnm_arg = inf[0].dir_p;
1035 int dir_arg = 1 - fnm_arg;
1036 char const *fnm = inf[fnm_arg].name;
1037 char const *dir = inf[dir_arg].name;
1038 char const *p = filename_lastdirchar (fnm);
1039 char const *filename = inf[dir_arg].name
1040 = dir_file_pathname (dir, p ? p + 1 : fnm);
1041
1042 if (strcmp (fnm, "-") == 0)
1043 fatal ("can't compare - to a directory");
1044
1045 if (stat (filename, &inf[dir_arg].stat) != 0)
1046 {
1047 perror_with_name (filename);
1048 failed = 1;
1049 }
1050 else
1051 inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1052 }
1053
1054 if (failed)
1055 {
1056
1057 /* If either file should exist but does not, return 2. */
1058
1059 val = 2;
1060
1061 }
1062 else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1063 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1064 && no_diff_means_no_output)
1065 {
1066 /* The two named files are actually the same physical file.
1067 We know they are identical without actually reading them. */
1068
1069 val = 0;
1070 }
1071 else if (inf[0].dir_p & inf[1].dir_p)
1072 {
1073 if (output_style == OUTPUT_IFDEF)
1074 fatal ("-D option not supported with directories");
1075
1076 /* If both are directories, compare the files in them. */
1077
1078 if (depth > 0 && !recursive)
1079 {
1080 /* But don't compare dir contents one level down
1081 unless -r was specified. */
1082 message ("Common subdirectories: %s and %s\n",
1083 inf[0].name, inf[1].name);
1084 val = 0;
1085 }
1086 else
1087 {
1088 val = diff_dirs (inf, compare_files, depth);
1089 }
1090
1091 }
1092 else if ((inf[0].dir_p | inf[1].dir_p)
1093 || (depth > 0
1094 && (! S_ISREG (inf[0].stat.st_mode)
1095 || ! S_ISREG (inf[1].stat.st_mode))))
1096 {
1097 /* Perhaps we have a subdirectory that exists only in one directory.
1098 If so, just print a message to that effect. */
1099
1100 if (inf[0].desc == -1 || inf[1].desc == -1)
1101 {
1102 if ((inf[0].dir_p | inf[1].dir_p)
1103 && recursive
1104 && (entire_new_file_flag
1105 || (unidirectional_new_file_flag && inf[0].desc == -1)))
1106 val = diff_dirs (inf, compare_files, depth);
1107 else
1108 {
1109 char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1110 /* See Posix.2 section 4.17.6.1.1 for this format. */
1111 message ("Only in %s: %s\n", dir, name0);
1112 val = 1;
1113 }
1114 }
1115 else
1116 {
1117 /* We have two files that are not to be compared. */
1118
1119 /* See Posix.2 section 4.17.6.1.1 for this format. */
1120 message5 ("File %s is a %s while file %s is a %s\n",
1121 inf[0].name, filetype (&inf[0].stat),
1122 inf[1].name, filetype (&inf[1].stat));
1123
1124 /* This is a difference. */
1125 val = 1;
1126 }
1127 }
1128 else if ((no_details_flag & ~ignore_some_changes)
1129 && inf[0].stat.st_size != inf[1].stat.st_size
1130 && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1131 && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1132 {
1133 message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1134 val = 1;
1135 }
1136 else
1137 {
1138 /* Both exist and neither is a directory. */
1139
1140 /* Open the files and record their descriptors. */
1141
1142 if (inf[0].desc == -2)
1143 if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1144 {
1145 perror_with_name (inf[0].name);
1146 failed = 1;
1147 }
1148 if (inf[1].desc == -2)
1149 {
1150 if (same_files)
1151 inf[1].desc = inf[0].desc;
1152 else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1153 {
1154 perror_with_name (inf[1].name);
1155 failed = 1;
1156 }
1157 }
1158
1159 #if HAVE_SETMODE
1160 if (binary_I_O)
1161 for (i = 0; i <= 1; i++)
1162 if (0 <= inf[i].desc)
1163 setmode (inf[i].desc, O_BINARY);
1164 #endif
1165
1166 /* Compare the files, if no error was found. */
1167
1168 val = failed ? 2 : diff_2_files (inf, depth);
1169
1170 /* Close the file descriptors. */
1171
1172 if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1173 {
1174 perror_with_name (inf[0].name);
1175 val = 2;
1176 }
1177 if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1178 && close (inf[1].desc) != 0)
1179 {
1180 perror_with_name (inf[1].name);
1181 val = 2;
1182 }
1183 }
1184
1185 /* Now the comparison has been done, if no error prevented it,
1186 and VAL is the value this function will return. */
1187
1188 if (val == 0 && !inf[0].dir_p)
1189 {
1190 if (print_file_same_flag)
1191 message ("Files %s and %s are identical\n",
1192 inf[0].name, inf[1].name);
1193 }
1194 else
1195 flush_output ();
1196
1197 if (free0)
1198 free (free0);
1199 if (free1)
1200 free (free1);
1201
1202 return val;
1203 }
1204
1205 /* Initialize status variables and flag variables used in libdiff,
1206 to permit repeated calls to diff_run. */
1207
1208 static void
initialize_main(argcp,argvp)1209 initialize_main (argcp, argvp)
1210 int *argcp;
1211 char ***argvp;
1212 {
1213 /* These variables really must be reset each time diff_run is called. */
1214 output_style = OUTPUT_NORMAL;
1215 context = -1;
1216 file_label[0] = NULL;
1217 file_label[1] = NULL;
1218 diff_program_name = (*argvp)[0];
1219 outfile = NULL;
1220
1221 /* Reset these also, just for safety's sake. (If one invocation turns
1222 on ignore_case_flag, it must be turned off before diff_run is called
1223 again. But it is possible to make many diffs before encountering
1224 such a problem. */
1225 recursive = 0;
1226 no_discards = 0;
1227 #if HAVE_SETMODE
1228 binary_I_O = 0;
1229 #endif
1230 no_diff_means_no_output = 0;
1231 always_text_flag = 0;
1232 horizon_lines = 0;
1233 ignore_space_change_flag = 0;
1234 ignore_all_space_flag = 0;
1235 ignore_blank_lines_flag = 0;
1236 ignore_some_line_changes = 0;
1237 ignore_some_changes = 0;
1238 ignore_case_flag = 0;
1239 function_regexp_list = NULL;
1240 ignore_regexp_list = NULL;
1241 no_details_flag = 0;
1242 print_file_same_flag = 0;
1243 tab_align_flag = 0;
1244 tab_expand_flag = 0;
1245 dir_start_file = NULL;
1246 entire_new_file_flag = 0;
1247 unidirectional_new_file_flag = 0;
1248 paginate_flag = 0;
1249 bzero (group_format, sizeof (group_format));
1250 bzero (line_format, sizeof (line_format));
1251 sdiff_help_sdiff = 0;
1252 sdiff_left_only = 0;
1253 sdiff_skip_common_lines = 0;
1254 sdiff_half_width = 0;
1255 sdiff_column2_offset = 0;
1256 switch_string = NULL;
1257 heuristic = 0;
1258 bzero (files, sizeof (files));
1259 }
1260