1 /* GNU DIFF entry routine.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 */
17 
18 /* GNU DIFF was written by Mike Haertel, David Hayes,
19    Richard Stallman, Len Tower, and Paul Eggert.  */
20 
21 #define GDIFF_MAIN
22 #include "diff.h"
23 #include <signal.h>
24 #include "error.h"
25 #include "getopt.h"
26 
27 #ifdef HAVE_FNMATCH
28 # include <fnmatch.h> /* This is supposed to be available on Posix systems */
29 #else /* HAVE_FNMATCH */
30 # include "fnmatch.h" /* Our substitute */
31 #endif /* HAVE_FNMATCH */
32 
33 #ifndef DEFAULT_WIDTH
34 #define DEFAULT_WIDTH 130
35 #endif
36 
37 #ifndef GUTTER_WIDTH_MINIMUM
38 #define GUTTER_WIDTH_MINIMUM 3
39 #endif
40 
41 /* diff.c has a real initialize_main function. */
42 #ifdef initialize_main
43 #undef initialize_main
44 #endif
45 
46 static char const *filetype PARAMS((struct stat const *));
47 static char *option_list PARAMS((char **, int));
48 static int add_exclude_file PARAMS((char const *));
49 static int ck_atoi PARAMS((char const *, int *));
50 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
51 static int specify_format PARAMS((char **, char *));
52 static void add_exclude PARAMS((char const *));
53 static void add_regexp PARAMS((struct regexp_list **, char const *));
54 static void specify_style PARAMS((enum output_style));
55 static int try_help PARAMS((char const *));
56 static void check_output PARAMS((FILE *));
57 static void usage PARAMS((void));
58 static void initialize_main PARAMS((int *, char ***));
59 
60 /* Nonzero for -r: if comparing two directories,
61    compare their common subdirectories recursively.  */
62 
63 static int recursive;
64 
65 /* For debugging: don't do discard_confusing_lines.  */
66 
67 int no_discards;
68 
69 #if HAVE_SETMODE
70 /* I/O mode: nonzero only if using binary input/output.  */
71 static int binary_I_O;
72 #endif
73 
74 /* Return a string containing the command options with which diff was invoked.
75    Spaces appear between what were separate ARGV-elements.
76    There is a space at the beginning but none at the end.
77    If there were no options, the result is an empty string.
78 
79    Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
80    the length of that vector.  */
81 
82 static char *
option_list(optionvec,count)83 option_list (optionvec, count)
84      char **optionvec;  /* Was `vector', but that collides on Alliant.  */
85      int count;
86 {
87   int i;
88   size_t length = 0;
89   char *result;
90 
91   for (i = 0; i < count; i++)
92     length += strlen (optionvec[i]) + 1;
93 
94   result = xmalloc (length + 1);
95   result[0] = 0;
96 
97   for (i = 0; i < count; i++)
98     {
99       strcat (result, " ");
100       strcat (result, optionvec[i]);
101     }
102 
103   return result;
104 }
105 
106 /* Convert STR to a positive integer, storing the result in *OUT.
107    If STR is not a valid integer, return -1 (otherwise 0). */
108 static int
ck_atoi(str,out)109 ck_atoi (str, out)
110      char const *str;
111      int *out;
112 {
113   char const *p;
114   for (p = str; *p; p++)
115     if (*p < '0' || *p > '9')
116       return -1;
117 
118   *out = atoi (optarg);
119   return 0;
120 }
121 
122 /* Keep track of excluded file name patterns.  */
123 
124 static char const **exclude;
125 static int exclude_alloc, exclude_count;
126 
127 int
excluded_filename(f)128 excluded_filename (f)
129      char const *f;
130 {
131   int i;
132   for (i = 0;  i < exclude_count;  i++)
133     if (fnmatch (exclude[i], f, 0) == 0)
134       return 1;
135   return 0;
136 }
137 
138 static void
add_exclude(pattern)139 add_exclude (pattern)
140      char const *pattern;
141 {
142   if (exclude_alloc <= exclude_count)
143     exclude = (char const **)
144 	      (exclude_alloc == 0
145 	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
146 	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
147 
148   exclude[exclude_count++] = pattern;
149 }
150 
151 static int
add_exclude_file(name)152 add_exclude_file (name)
153      char const *name;
154 {
155   struct file_data f;
156   char *p, *q, *lim;
157 
158   f.name = optarg;
159   f.desc = (strcmp (optarg, "-") == 0
160 	    ? STDIN_FILENO
161 	    : open (optarg, O_RDONLY, 0));
162   if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
163     return -1;
164 
165   sip (&f, 1);
166   slurp (&f);
167 
168   for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
169     {
170       q = (char *) memchr (p, '\n', lim - p);
171       if (!q)
172 	q = lim;
173       *q++ = 0;
174       add_exclude (p);
175     }
176 
177   return close (f.desc);
178 }
179 
180 /* The numbers 129- that appear in the fourth element of some entries
181    tell the big switch in `diff_run' how to process those options.  */
182 
183 static struct option const longopts[] =
184 {
185   {"ignore-blank-lines", 0, 0, 'B'},
186   {"context", 2, 0, 'C'},
187   {"ifdef", 1, 0, 'D'},
188   {"show-function-line", 1, 0, 'F'},
189   {"speed-large-files", 0, 0, 'H'},
190   {"ignore-matching-lines", 1, 0, 'I'},
191   {"label", 1, 0, 'L'},
192   {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
193   {"new-file", 0, 0, 'N'},
194   {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
195   {"unidirectional-new-file", 0, 0, 'P'},
196   {"starting-file", 1, 0, 'S'},
197   {"initial-tab", 0, 0, 'T'},
198   {"width", 1, 0, 'W'},
199   {"text", 0, 0, 'a'},
200   {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
201   {"ignore-space-change", 0, 0, 'b'},
202   {"minimal", 0, 0, 'd'},
203   {"ed", 0, 0, 'e'},
204   {"forward-ed", 0, 0, 'f'},
205   {"ignore-case", 0, 0, 'i'},
206   {"paginate", 0, 0, 'l'},
207   {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
208   {"rcs", 0, 0, 'n'},
209   {"show-c-function", 0, 0, 'p'},
210   {"brief", 0, 0, 'q'},
211   {"recursive", 0, 0, 'r'},
212   {"report-identical-files", 0, 0, 's'},
213   {"expand-tabs", 0, 0, 't'},
214   {"version", 0, 0, 'v'},
215   {"ignore-all-space", 0, 0, 'w'},
216   {"exclude", 1, 0, 'x'},
217   {"exclude-from", 1, 0, 'X'},
218   {"side-by-side", 0, 0, 'y'},
219   {"unified", 2, 0, 'U'},
220   {"left-column", 0, 0, 129},
221   {"suppress-common-lines", 0, 0, 130},
222   {"sdiff-merge-assist", 0, 0, 131},
223   {"old-line-format", 1, 0, 132},
224   {"new-line-format", 1, 0, 133},
225   {"unchanged-line-format", 1, 0, 134},
226   {"line-format", 1, 0, 135},
227   {"old-group-format", 1, 0, 136},
228   {"new-group-format", 1, 0, 137},
229   {"unchanged-group-format", 1, 0, 138},
230   {"changed-group-format", 1, 0, 139},
231   {"horizon-lines", 1, 0, 140},
232   {"help", 0, 0, 141},
233   {"binary", 0, 0, 142},
234   {0, 0, 0, 0}
235 };
236 
237 
238 
239 int
diff_run(argc,argv,out,callbacks_arg)240 diff_run (argc, argv, out, callbacks_arg)
241      int argc;
242      char *argv[];
243      const char *out;
244      const struct diff_callbacks *callbacks_arg;
245 {
246   int val;
247   int c;
248   int prev = -1;
249   int width = DEFAULT_WIDTH;
250   int show_c_function = 0;
251   int optind_old;
252   int opened_file = 0;
253 
254   callbacks = callbacks_arg;
255 
256   /* Do our initializations.  */
257   initialize_main (&argc, &argv);
258   optind_old = optind;
259   optind = 0;
260 
261   /* Set the jump buffer, so that diff may abort execution without
262      terminating the process. */
263   val = setjmp (diff_abort_buf);
264   if (val != 0)
265     {
266       optind = optind_old;
267       if (opened_file)
268 	fclose (outfile);
269       return val;
270     }
271 
272   /* Decode the options.  */
273   while ((c = getopt_long (argc, argv,
274 			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
275 			   longopts, 0)) != EOF)
276     {
277       switch (c)
278 	{
279 	  /* All digits combine in decimal to specify the context-size.  */
280 	case '1':
281 	case '2':
282 	case '3':
283 	case '4':
284 	case '5':
285 	case '6':
286 	case '7':
287 	case '8':
288 	case '9':
289 	case '0':
290 	  if (context == -1)
291 	    context = 0;
292 	  /* If a context length has already been specified,
293 	     more digits allowed only if they follow right after the others.
294 	     Reject two separate runs of digits, or digits after -C.  */
295 	  else if (prev < '0' || prev > '9')
296 	    fatal ("context length specified twice");
297 
298 	  context = context * 10 + c - '0';
299 	  break;
300 
301 	case 'a':
302 	  /* Treat all files as text files; never treat as binary.  */
303 	  always_text_flag = 1;
304 	  break;
305 
306 	case 'b':
307 	  /* Ignore changes in amount of white space.  */
308 	  ignore_space_change_flag = 1;
309 	  ignore_some_changes = 1;
310 	  ignore_some_line_changes = 1;
311 	  break;
312 
313 	case 'B':
314 	  /* Ignore changes affecting only blank lines.  */
315 	  ignore_blank_lines_flag = 1;
316 	  ignore_some_changes = 1;
317 	  break;
318 
319 	case 'C':		/* +context[=lines] */
320 	case 'U':		/* +unified[=lines] */
321 	  if (optarg)
322 	    {
323 	      if (context >= 0)
324 		fatal ("context length specified twice");
325 
326 	      if (ck_atoi (optarg, &context))
327 		fatal ("invalid context length argument");
328 	    }
329 
330 	  /* Falls through.  */
331 	case 'c':
332 	  /* Make context-style output.  */
333 	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
334 	  break;
335 
336 	case 'd':
337 	  /* Don't discard lines.  This makes things slower (sometimes much
338 	     slower) but will find a guaranteed minimal set of changes.  */
339 	  no_discards = 1;
340 	  break;
341 
342 	case 'D':
343 	  /* Make merged #ifdef output.  */
344 	  specify_style (OUTPUT_IFDEF);
345 	  {
346 	    int i, err = 0;
347 	    static char const C_ifdef_group_formats[] =
348 	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
349 	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
350 			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
351 			       - 8 /* 5*"%%" + 3*"%c" */);
352 	    sprintf (b, C_ifdef_group_formats,
353 		     optarg, optarg, 0,
354 		     optarg, optarg, 0, 0,
355 		     optarg, optarg, optarg);
356 	    for (i = 0; i < 4; i++)
357 	      {
358 		err |= specify_format (&group_format[i], b);
359 		b += strlen (b) + 1;
360 	      }
361 	    if (err)
362 	      diff_error ("conflicting #ifdef formats", 0, 0);
363 	  }
364 	  break;
365 
366 	case 'e':
367 	  /* Make output that is a valid `ed' script.  */
368 	  specify_style (OUTPUT_ED);
369 	  break;
370 
371 	case 'f':
372 	  /* Make output that looks vaguely like an `ed' script
373 	     but has changes in the order they appear in the file.  */
374 	  specify_style (OUTPUT_FORWARD_ED);
375 	  break;
376 
377 	case 'F':
378 	  /* Show, for each set of changes, the previous line that
379 	     matches the specified regexp.  Currently affects only
380 	     context-style output.  */
381 	  add_regexp (&function_regexp_list, optarg);
382 	  break;
383 
384 	case 'h':
385 	  /* Split the files into chunks of around 1500 lines
386 	     for faster processing.  Usually does not change the result.
387 
388 	     This currently has no effect.  */
389 	  break;
390 
391 	case 'H':
392 	  /* Turn on heuristics that speed processing of large files
393 	     with a small density of changes.  */
394 	  heuristic = 1;
395 	  break;
396 
397 	case 'i':
398 	  /* Ignore changes in case.  */
399 	  ignore_case_flag = 1;
400 	  ignore_some_changes = 1;
401 	  ignore_some_line_changes = 1;
402 	  break;
403 
404 	case 'I':
405 	  /* Ignore changes affecting only lines that match the
406 	     specified regexp.  */
407 	  add_regexp (&ignore_regexp_list, optarg);
408 	  ignore_some_changes = 1;
409 	  break;
410 
411 	case 'l':
412 	  /* Pass the output through `pr' to paginate it.  */
413 	  paginate_flag = 1;
414 #if !defined(SIGCHLD) && defined(SIGCLD)
415 #define SIGCHLD SIGCLD
416 #endif
417 #ifdef SIGCHLD
418 	  /* Pagination requires forking and waiting, and
419 	     System V fork+wait does not work if SIGCHLD is ignored.  */
420 	  signal (SIGCHLD, SIG_DFL);
421 #endif
422 	  break;
423 
424 	case 'L':
425 	  /* Specify file labels for `-c' output headers.  */
426 	  if (!file_label[0])
427 	    file_label[0] = optarg;
428 	  else if (!file_label[1])
429 	    file_label[1] = optarg;
430 	  else
431 	    fatal ("too many file label options");
432 	  break;
433 
434 	case 'n':
435 	  /* Output RCS-style diffs, like `-f' except that each command
436 	     specifies the number of lines affected.  */
437 	  specify_style (OUTPUT_RCS);
438 	  break;
439 
440 	case 'N':
441 	  /* When comparing directories, if a file appears only in one
442 	     directory, treat it as present but empty in the other.  */
443 	  entire_new_file_flag = 1;
444 	  break;
445 
446 	case 'p':
447 	  /* Make context-style output and show name of last C function.  */
448 	  show_c_function = 1;
449 	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
450 	  break;
451 
452 	case 'P':
453 	  /* When comparing directories, if a file appears only in
454 	     the second directory of the two,
455 	     treat it as present but empty in the other.  */
456 	  unidirectional_new_file_flag = 1;
457 	  break;
458 
459 	case 'q':
460 	  no_details_flag = 1;
461 	  break;
462 
463 	case 'r':
464 	  /* When comparing directories,
465 	     recursively compare any subdirectories found.  */
466 	  recursive = 1;
467 	  break;
468 
469 	case 's':
470 	  /* Print a message if the files are the same.  */
471 	  print_file_same_flag = 1;
472 	  break;
473 
474 	case 'S':
475 	  /* When comparing directories, start with the specified
476 	     file name.  This is used for resuming an aborted comparison.  */
477 	  dir_start_file = optarg;
478 	  break;
479 
480 	case 't':
481 	  /* Expand tabs to spaces in the output so that it preserves
482 	     the alignment of the input files.  */
483 	  tab_expand_flag = 1;
484 	  break;
485 
486 	case 'T':
487 	  /* Use a tab in the output, rather than a space, before the
488 	     text of an input line, so as to keep the proper alignment
489 	     in the input line without changing the characters in it.  */
490 	  tab_align_flag = 1;
491 	  break;
492 
493 	case 'u':
494 	  /* Output the context diff in unidiff format.  */
495 	  specify_style (OUTPUT_UNIFIED);
496 	  break;
497 
498 	case 'v':
499 	  if (callbacks && callbacks->write_stdout)
500 	    {
501 	      (*callbacks->write_stdout) ("diff - GNU diffutils version ");
502 	      (*callbacks->write_stdout) (diff_version_string);
503 	      (*callbacks->write_stdout) ("\n");
504 	    }
505 	  else
506 	    printf ("diff - GNU diffutils version %s\n", diff_version_string);
507 	  return 0;
508 
509 	case 'w':
510 	  /* Ignore horizontal white space when comparing lines.  */
511 	  ignore_all_space_flag = 1;
512 	  ignore_some_changes = 1;
513 	  ignore_some_line_changes = 1;
514 	  break;
515 
516 	case 'x':
517 	  add_exclude (optarg);
518 	  break;
519 
520 	case 'X':
521 	  if (add_exclude_file (optarg) != 0)
522 	    pfatal_with_name (optarg);
523 	  break;
524 
525 	case 'y':
526 	  /* Use side-by-side (sdiff-style) columnar output. */
527 	  specify_style (OUTPUT_SDIFF);
528 	  break;
529 
530 	case 'W':
531 	  /* Set the line width for OUTPUT_SDIFF.  */
532 	  if (ck_atoi (optarg, &width) || width <= 0)
533 	    fatal ("column width must be a positive integer");
534 	  break;
535 
536 	case 129:
537 	  sdiff_left_only = 1;
538 	  break;
539 
540 	case 130:
541 	  sdiff_skip_common_lines = 1;
542 	  break;
543 
544 	case 131:
545 	  /* sdiff-style columns output. */
546 	  specify_style (OUTPUT_SDIFF);
547 	  sdiff_help_sdiff = 1;
548 	  break;
549 
550 	case 132:
551 	case 133:
552 	case 134:
553 	  specify_style (OUTPUT_IFDEF);
554 	  if (specify_format (&line_format[c - 132], optarg) != 0)
555 	    diff_error ("conflicting line format", 0, 0);
556 	  break;
557 
558 	case 135:
559 	  specify_style (OUTPUT_IFDEF);
560 	  {
561 	    int i, err = 0;
562 	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
563 	      err |= specify_format (&line_format[i], optarg);
564 	    if (err)
565 	      diff_error ("conflicting line format", 0, 0);
566 	  }
567 	  break;
568 
569 	case 136:
570 	case 137:
571 	case 138:
572 	case 139:
573 	  specify_style (OUTPUT_IFDEF);
574 	  if (specify_format (&group_format[c - 136], optarg) != 0)
575 	    diff_error ("conflicting group format", 0, 0);
576 	  break;
577 
578 	case 140:
579 	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
580 	    fatal ("horizon must be a nonnegative integer");
581 	  break;
582 
583 	case 141:
584 	  usage ();
585 	  if (! callbacks || ! callbacks->write_stdout)
586 	    check_output (stdout);
587 	  return 0;
588 
589 	case 142:
590 	  /* Use binary I/O when reading and writing data.
591 	     On Posix hosts, this has no effect.  */
592 #if HAVE_SETMODE
593 	  binary_I_O = 1;
594 #  if 0
595 	  /* Because this code is leftover from pre-library days,
596 	     there is no way to set stdout back to the default mode
597 	     when we are done.  As it turns out, I think the only
598 	     parts of CVS that pass out == NULL, and thus cause diff
599 	     to write to stdout, are "cvs diff" and "cvs rdiff".  So
600 	     I'm not going to worry about this too much yet.  */
601 	  setmode (STDOUT_FILENO, O_BINARY);
602 #  else
603 	  if (out == NULL)
604 	    error (0, 0, "warning: did not set stdout to binary mode");
605 #  endif
606 #endif
607 	  break;
608 
609 	default:
610 	  return try_help (0);
611 	}
612       prev = c;
613     }
614 
615   if (argc - optind != 2)
616     return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
617 
618   {
619     /*
620      *	We maximize first the half line width, and then the gutter width,
621      *	according to the following constraints:
622      *	1.  Two half lines plus a gutter must fit in a line.
623      *	2.  If the half line width is nonzero:
624      *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
625      *	    b.  If tabs are not expanded to spaces,
626      *		a half line plus a gutter is an integral number of tabs,
627      *		so that tabs in the right column line up.
628      */
629     int t = tab_expand_flag ? 1 : TAB_WIDTH;
630     int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
631     sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
632     sdiff_column2_offset = sdiff_half_width ? off : width;
633   }
634 
635   if (show_c_function && output_style != OUTPUT_UNIFIED)
636     specify_style (OUTPUT_CONTEXT);
637 
638   if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
639     context = 0;
640   else if (context == -1)
641     /* Default amount of context for -c.  */
642     context = 3;
643 
644   if (output_style == OUTPUT_IFDEF)
645     {
646       /* Format arrays are char *, not char const *,
647 	 because integer formats are temporarily modified.
648 	 But it is safe to assign a constant like "%=" to a format array,
649 	 since "%=" does not format any integers.  */
650       int i;
651       for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
652 	if (!line_format[i])
653 	  line_format[i] = "%l\n";
654       if (!group_format[OLD])
655 	group_format[OLD]
656 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
657       if (!group_format[NEW])
658 	group_format[NEW]
659 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
660       if (!group_format[UNCHANGED])
661 	group_format[UNCHANGED] = "%=";
662       if (!group_format[CHANGED])
663 	group_format[CHANGED] = concat (group_format[OLD],
664 					group_format[NEW], "");
665     }
666 
667   no_diff_means_no_output =
668     (output_style == OUTPUT_IFDEF ?
669       (!*group_format[UNCHANGED]
670        || (strcmp (group_format[UNCHANGED], "%=") == 0
671 	   && !*line_format[UNCHANGED]))
672      : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
673 
674   switch_string = option_list (argv + 1, optind - 1);
675 
676   if (callbacks && callbacks->write_output)
677     {
678       if (out != NULL)
679 	{
680 	  diff_error ("write callback with output file", 0, 0);
681 	  return 2;
682 	}
683     }
684   else
685     {
686       if (out == NULL)
687 	outfile = stdout;
688       else
689 	{
690 #if HAVE_SETMODE
691 	  /* A diff which is full of ^Z and such isn't going to work
692 	     very well in text mode.  */
693 	  if (binary_I_O)
694 	    outfile = fopen (out, "wb");
695 	  else
696 #endif
697 	    outfile = fopen (out, "w");
698 	  if (outfile == NULL)
699 	    {
700 	      perror_with_name ("could not open output file");
701 	      return 2;
702 	    }
703 	  opened_file = 1;
704 	}
705     }
706 
707   val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
708 
709   /* Print any messages that were saved up for last.  */
710   print_message_queue ();
711 
712   free (switch_string);
713 
714   optind = optind_old;
715 
716   if (! callbacks || ! callbacks->write_output)
717     check_output (outfile);
718 
719   if (opened_file)
720     if (fclose (outfile) != 0)
721 	perror_with_name ("close error on output file");
722 
723   return val;
724 }
725 
726 /* Add the compiled form of regexp PATTERN to REGLIST.  */
727 
728 static void
add_regexp(reglist,pattern)729 add_regexp (reglist, pattern)
730      struct regexp_list **reglist;
731      char const *pattern;
732 {
733   struct regexp_list *r;
734   char const *m;
735 
736   r = (struct regexp_list *) xmalloc (sizeof (*r));
737   bzero (r, sizeof (*r));
738   r->buf.fastmap = xmalloc (256);
739   m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
740   if (m != 0)
741     diff_error ("%s: %s", pattern, m);
742 
743   /* Add to the start of the list, since it's easier than the end.  */
744   r->next = *reglist;
745   *reglist = r;
746 }
747 
748 static int
try_help(reason)749 try_help (reason)
750      char const *reason;
751 {
752   if (reason)
753     diff_error ("%s", reason, 0);
754   diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
755   return 2;
756 }
757 
758 static void
check_output(file)759 check_output (file)
760     FILE *file;
761 {
762   if (ferror (file) || fflush (file) != 0)
763     fatal ("write error");
764 }
765 
766 static char const * const option_help[] = {
767 "-i  --ignore-case  Consider upper- and lower-case to be the same.",
768 "-w  --ignore-all-space  Ignore all white space.",
769 "-b  --ignore-space-change  Ignore changes in the amount of white space.",
770 "-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
771 "-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
772 #if HAVE_SETMODE
773 "--binary  Read and write data in binary mode.",
774 #endif
775 "-a  --text  Treat all files as text.\n",
776 "-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
777 "-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
778 "  -NUM  Use NUM context lines.",
779 "  -L LABEL  --label LABEL  Use LABEL instead of file name.",
780 "  -p  --show-c-function  Show which C function each change is in.",
781 "  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
782 "-q  --brief  Output only whether files differ.",
783 "-e  --ed  Output an ed script.",
784 "-n  --rcs  Output an RCS format diff.",
785 "-y  --side-by-side  Output in two columns.",
786 "  -W NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
787 "  --left-column  Output only the left column of common lines.",
788 "  --suppress-common-lines  Do not output common lines.",
789 "-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
790 "--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
791 "--line-format=LFMT  Similar, but format all input lines with LFMT.",
792 "--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
793 "  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
794 "  GFMT may contain:",
795 "    %<  lines from FILE1",
796 "    %>  lines from FILE2",
797 "    %=  lines common to FILE1 and FILE2",
798 "    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
799 "      LETTERs are as follows for new group, lower case for old group:",
800 "        F  first line number",
801 "        L  last line number",
802 "        N  number of lines = L-F+1",
803 "        E  F-1",
804 "        M  L+1",
805 "  LFMT may contain:",
806 "    %L  contents of line",
807 "    %l  contents of line, excluding any trailing newline",
808 "    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
809 "  Either GFMT or LFMT may contain:",
810 "    %%  %",
811 "    %c'C'  the single character C",
812 "    %c'\\OOO'  the character with octal code OOO\n",
813 "-l  --paginate  Pass the output through `pr' to paginate it.",
814 "-t  --expand-tabs  Expand tabs to spaces in output.",
815 "-T  --initial-tab  Make tabs line up by prepending a tab.\n",
816 "-r  --recursive  Recursively compare any subdirectories found.",
817 "-N  --new-file  Treat absent files as empty.",
818 "-P  --unidirectional-new-file  Treat absent first files as empty.",
819 "-s  --report-identical-files  Report when two files are the same.",
820 "-x PAT  --exclude=PAT  Exclude files that match PAT.",
821 "-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
822 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
823 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
824 "-d  --minimal  Try hard to find a smaller set of changes.",
825 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
826 "-v  --version  Output version info.",
827 "--help  Output this help.",
828 0
829 };
830 
831 static void
usage()832 usage ()
833 {
834   char const * const *p;
835 
836   if (callbacks && callbacks->write_stdout)
837     {
838       (*callbacks->write_stdout) ("Usage: ");
839       (*callbacks->write_stdout) (diff_program_name);
840       (*callbacks->write_stdout) (" [OPTION]... FILE1 FILE2\n\n");
841       for (p = option_help;  *p;  p++)
842 	{
843 	  (*callbacks->write_stdout) ("  ");
844 	  (*callbacks->write_stdout) (*p);
845 	  (*callbacks->write_stdout) ("\n");
846 	}
847       (*callbacks->write_stdout)
848 	("\nIf FILE1 or FILE2 is `-', read standard input.\n");
849     }
850   else
851     {
852       printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
853       for (p = option_help;  *p;  p++)
854 	printf ("  %s\n", *p);
855       printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
856     }
857 }
858 
859 static int
specify_format(var,value)860 specify_format (var, value)
861      char **var;
862      char *value;
863 {
864   int err = *var ? strcmp (*var, value) : 0;
865   *var = value;
866   return err;
867 }
868 
869 static void
specify_style(style)870 specify_style (style)
871      enum output_style style;
872 {
873   if (output_style != OUTPUT_NORMAL
874       && output_style != style)
875     diff_error ("conflicting specifications of output style", 0, 0);
876   output_style = style;
877 }
878 
879 static char const *
filetype(st)880 filetype (st)
881      struct stat const *st;
882 {
883   /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
884      To keep diagnostics grammatical, the returned string must start
885      with a consonant.  */
886 
887   if (S_ISREG (st->st_mode))
888     {
889       if (st->st_size == 0)
890 	return "regular empty file";
891       /* Posix.2 section 5.14.2 seems to suggest that we must read the file
892 	 and guess whether it's C, Fortran, etc., but this is somewhat useless
893 	 and doesn't reflect historical practice.  We're allowed to guess
894 	 wrong, so we don't bother to read the file.  */
895       return "regular file";
896     }
897   if (S_ISDIR (st->st_mode)) return "directory";
898 
899   /* other Posix.1 file types */
900 #ifdef S_ISBLK
901   if (S_ISBLK (st->st_mode)) return "block special file";
902 #endif
903 #ifdef S_ISCHR
904   if (S_ISCHR (st->st_mode)) return "character special file";
905 #endif
906 #ifdef S_ISFIFO
907   if (S_ISFIFO (st->st_mode)) return "fifo";
908 #endif
909 
910   /* other Posix.1b file types */
911 #ifdef S_TYPEISMQ
912   if (S_TYPEISMQ (st)) return "message queue";
913 #endif
914 #ifdef S_TYPEISSEM
915   if (S_TYPEISSEM (st)) return "semaphore";
916 #endif
917 #ifdef S_TYPEISSHM
918   if (S_TYPEISSHM (st)) return "shared memory object";
919 #endif
920 
921   /* other popular file types */
922   /* S_ISLNK is impossible with `fstat' and `stat'.  */
923 #ifdef S_ISSOCK
924   if (S_ISSOCK (st->st_mode)) return "socket";
925 #endif
926 
927   return "weird file";
928 }
929 
930 /* Compare two files (or dirs) with specified names
931    DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
932    (if DIR0 is 0, then the name is just NAME0, etc.)
933    This is self-contained; it opens the files and closes them.
934 
935    Value is 0 if files are the same, 1 if different,
936    2 if there is a problem opening them.  */
937 
938 static int
compare_files(dir0,name0,dir1,name1,depth)939 compare_files (dir0, name0, dir1, name1, depth)
940      char const *dir0, *dir1;
941      char const *name0, *name1;
942      int depth;
943 {
944   struct file_data inf[2];
945   register int i;
946   int val;
947   int same_files;
948   int failed = 0;
949   char *free0 = 0, *free1 = 0;
950 
951   /* If this is directory comparison, perhaps we have a file
952      that exists only in one of the directories.
953      If so, just print a message to that effect.  */
954 
955   if (! ((name0 != 0 && name1 != 0)
956 	 || (unidirectional_new_file_flag && name1 != 0)
957 	 || entire_new_file_flag))
958     {
959       char const *name = name0 == 0 ? name1 : name0;
960       char const *dir = name0 == 0 ? dir1 : dir0;
961       message ("Only in %s: %s\n", dir, name);
962       /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
963       return 1;
964     }
965 
966   bzero (inf, sizeof (inf));
967 
968   /* Mark any nonexistent file with -1 in the desc field.  */
969   /* Mark unopened files (e.g. directories) with -2. */
970 
971   inf[0].desc = name0 == 0 ? -1 : -2;
972   inf[1].desc = name1 == 0 ? -1 : -2;
973 
974   /* Now record the full name of each file, including nonexistent ones.  */
975 
976   if (name0 == 0)
977     name0 = name1;
978   if (name1 == 0)
979     name1 = name0;
980 
981   inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
982   inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
983 
984   /* Stat the files.  Record whether they are directories.  */
985 
986   for (i = 0; i <= 1; i++)
987     {
988       if (inf[i].desc != -1)
989 	{
990 	  int stat_result;
991 
992 	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
993 	    {
994 	      inf[i].stat = inf[0].stat;
995 	      stat_result = 0;
996 	    }
997 	  else if (strcmp (inf[i].name, "-") == 0)
998 	    {
999 	      inf[i].desc = STDIN_FILENO;
1000 	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
1001 	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
1002 		{
1003 		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1004 		  if (pos == -1)
1005 		    stat_result = -1;
1006 		  else
1007 		    {
1008 		      if (pos <= inf[i].stat.st_size)
1009 			inf[i].stat.st_size -= pos;
1010 		      else
1011 			inf[i].stat.st_size = 0;
1012 		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
1013 		      time (&inf[i].stat.st_mtime);
1014 		    }
1015 		}
1016 	    }
1017 	  else
1018 	    stat_result = stat (inf[i].name, &inf[i].stat);
1019 
1020 	  if (stat_result != 0)
1021 	    {
1022 	      perror_with_name (inf[i].name);
1023 	      failed = 1;
1024 	    }
1025 	  else
1026 	    {
1027 	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
1028 	      if (inf[1 - i].desc == -1)
1029 		{
1030 		  inf[1 - i].dir_p = inf[i].dir_p;
1031 		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
1032 		}
1033 	    }
1034 	}
1035     }
1036 
1037   if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
1038     {
1039       /* If one is a directory, and it was specified in the command line,
1040 	 use the file in that dir with the other file's basename.  */
1041 
1042       int fnm_arg = inf[0].dir_p;
1043       int dir_arg = 1 - fnm_arg;
1044       char const *fnm = inf[fnm_arg].name;
1045       char const *dir = inf[dir_arg].name;
1046       char const *p = filename_lastdirchar (fnm);
1047       char const *filename = inf[dir_arg].name
1048 	= dir_file_pathname (dir, p ? p + 1 : fnm);
1049 
1050       if (strcmp (fnm, "-") == 0)
1051 	fatal ("can't compare - to a directory");
1052 
1053       if (stat (filename, &inf[dir_arg].stat) != 0)
1054 	{
1055 	  perror_with_name (filename);
1056 	  failed = 1;
1057 	}
1058       else
1059 	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1060     }
1061 
1062   if (failed)
1063     {
1064 
1065       /* If either file should exist but does not, return 2.  */
1066 
1067       val = 2;
1068 
1069     }
1070   else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1071 			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1072 	   && no_diff_means_no_output)
1073     {
1074       /* The two named files are actually the same physical file.
1075 	 We know they are identical without actually reading them.  */
1076 
1077       val = 0;
1078     }
1079   else if (inf[0].dir_p & inf[1].dir_p)
1080     {
1081       if (output_style == OUTPUT_IFDEF)
1082 	fatal ("-D option not supported with directories");
1083 
1084       /* If both are directories, compare the files in them.  */
1085 
1086       if (depth > 0 && !recursive)
1087 	{
1088 	  /* But don't compare dir contents one level down
1089 	     unless -r was specified.  */
1090 	  message ("Common subdirectories: %s and %s\n",
1091 		   inf[0].name, inf[1].name);
1092 	  val = 0;
1093 	}
1094       else
1095 	{
1096 	  val = diff_dirs (inf, compare_files, depth);
1097 	}
1098 
1099     }
1100   else if ((inf[0].dir_p | inf[1].dir_p)
1101 	   || (depth > 0
1102 	       && (! S_ISREG (inf[0].stat.st_mode)
1103 		   || ! S_ISREG (inf[1].stat.st_mode))))
1104     {
1105       /* Perhaps we have a subdirectory that exists only in one directory.
1106 	 If so, just print a message to that effect.  */
1107 
1108       if (inf[0].desc == -1 || inf[1].desc == -1)
1109 	{
1110 	  if ((inf[0].dir_p | inf[1].dir_p)
1111 	      && recursive
1112 	      && (entire_new_file_flag
1113 		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
1114 	    val = diff_dirs (inf, compare_files, depth);
1115 	  else
1116 	    {
1117 	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1118 	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
1119 	      message ("Only in %s: %s\n", dir, name0);
1120 	      val = 1;
1121 	    }
1122 	}
1123       else
1124 	{
1125 	  /* We have two files that are not to be compared.  */
1126 
1127 	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
1128 	  message5 ("File %s is a %s while file %s is a %s\n",
1129 		    inf[0].name, filetype (&inf[0].stat),
1130 		    inf[1].name, filetype (&inf[1].stat));
1131 
1132 	  /* This is a difference.  */
1133 	  val = 1;
1134 	}
1135     }
1136   else if ((no_details_flag & ~ignore_some_changes)
1137 	   && inf[0].stat.st_size != inf[1].stat.st_size
1138 	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1139 	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1140     {
1141       message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1142       val = 1;
1143     }
1144   else
1145     {
1146       /* Both exist and neither is a directory.  */
1147 
1148       /* Open the files and record their descriptors.  */
1149 
1150       if (inf[0].desc == -2)
1151 	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1152 	  {
1153 	    perror_with_name (inf[0].name);
1154 	    failed = 1;
1155 	  }
1156       if (inf[1].desc == -2)
1157 	{
1158 	  if (same_files)
1159 	    inf[1].desc = inf[0].desc;
1160 	  else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1161 	    {
1162 	      perror_with_name (inf[1].name);
1163 	      failed = 1;
1164 	    }
1165 	}
1166 
1167 #if HAVE_SETMODE
1168       if (binary_I_O)
1169 	for (i = 0; i <= 1; i++)
1170 	  if (0 <= inf[i].desc)
1171 	    setmode (inf[i].desc, O_BINARY);
1172 #endif
1173 
1174       /* Compare the files, if no error was found.  */
1175 
1176       val = failed ? 2 : diff_2_files (inf, depth);
1177 
1178       /* Close the file descriptors.  */
1179 
1180       if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1181 	{
1182 	  perror_with_name (inf[0].name);
1183 	  val = 2;
1184 	}
1185       if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1186 	  && close (inf[1].desc) != 0)
1187 	{
1188 	  perror_with_name (inf[1].name);
1189 	  val = 2;
1190 	}
1191     }
1192 
1193   /* Now the comparison has been done, if no error prevented it,
1194      and VAL is the value this function will return.  */
1195 
1196   if (val == 0 && !inf[0].dir_p)
1197     {
1198       if (print_file_same_flag)
1199 	message ("Files %s and %s are identical\n",
1200 		 inf[0].name, inf[1].name);
1201     }
1202   else
1203     flush_output ();
1204 
1205   if (free0)
1206     free (free0);
1207   if (free1)
1208     free (free1);
1209 
1210   return val;
1211 }
1212 
1213 /* Initialize status variables and flag variables used in libdiff,
1214    to permit repeated calls to diff_run. */
1215 
1216 static void
initialize_main(argcp,argvp)1217 initialize_main (argcp, argvp)
1218     int *argcp;
1219     char ***argvp;
1220 {
1221   /* These variables really must be reset each time diff_run is called. */
1222   output_style = OUTPUT_NORMAL;
1223   context = -1;
1224   file_label[0] = NULL;
1225   file_label[1] = NULL;
1226   diff_program_name = (*argvp)[0];
1227   outfile = NULL;
1228 
1229   /* Reset these also, just for safety's sake. (If one invocation turns
1230      on ignore_case_flag, it must be turned off before diff_run is called
1231      again.  But it is possible to make many diffs before encountering
1232      such a problem. */
1233   recursive = 0;
1234   no_discards = 0;
1235 #if HAVE_SETMODE
1236   binary_I_O = 0;
1237 #endif
1238   no_diff_means_no_output = 0;
1239   always_text_flag = 0;
1240   horizon_lines = 0;
1241   ignore_space_change_flag = 0;
1242   ignore_all_space_flag = 0;
1243   ignore_blank_lines_flag = 0;
1244   ignore_some_line_changes = 0;
1245   ignore_some_changes = 0;
1246   ignore_case_flag = 0;
1247   function_regexp_list = NULL;
1248   ignore_regexp_list = NULL;
1249   no_details_flag = 0;
1250   print_file_same_flag = 0;
1251   tab_align_flag = 0;
1252   tab_expand_flag = 0;
1253   dir_start_file = NULL;
1254   entire_new_file_flag = 0;
1255   unidirectional_new_file_flag = 0;
1256   paginate_flag = 0;
1257   bzero (group_format, sizeof (group_format));
1258   bzero (line_format, sizeof (line_format));
1259   sdiff_help_sdiff = 0;
1260   sdiff_left_only = 0;
1261   sdiff_skip_common_lines = 0;
1262   sdiff_half_width = 0;
1263   sdiff_column2_offset = 0;
1264   switch_string = NULL;
1265   heuristic = 0;
1266   bzero (files, sizeof (files));
1267 }
1268