xref: /openbsd/gnu/usr.bin/cvs/diff/util.c (revision f2dfb0a4)
1 /* Support routines for GNU DIFF.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 #include "diff.h"
21 
22 /* Queue up one-line messages to be printed at the end,
23    when -l is specified.  Each message is recorded with a `struct msg'.  */
24 
25 struct msg
26 {
27   struct msg *next;
28   char const *format;
29   char const *arg1;
30   char const *arg2;
31   char const *arg3;
32   char const *arg4;
33 };
34 
35 /* Head of the chain of queues messages.  */
36 
37 static struct msg *msg_chain;
38 
39 /* Tail of the chain of queues messages.  */
40 
41 static struct msg **msg_chain_end = &msg_chain;
42 
43 /* Use when a system call returns non-zero status.
44    TEXT should normally be the file name.  */
45 
46 void
47 perror_with_name (text)
48      char const *text;
49 {
50   int e = errno;
51   fprintf (stderr, "%s: ", diff_program_name);
52   errno = e;
53   perror (text);
54 }
55 
56 /* Use when a system call returns non-zero status and that is fatal.  */
57 
58 void
59 pfatal_with_name (text)
60      char const *text;
61 {
62   int e = errno;
63   print_message_queue ();
64   fprintf (stderr, "%s: ", diff_program_name);
65   errno = e;
66   perror (text);
67   DIFF_ABORT (2);
68 }
69 
70 /* Print an error message from the format-string FORMAT
71    with args ARG1 and ARG2.  */
72 
73 void
74 diff_error (format, arg, arg1)
75      char const *format, *arg, *arg1;
76 {
77   fprintf (stderr, "%s: ", diff_program_name);
78   fprintf (stderr, format, arg, arg1);
79   fprintf (stderr, "\n");
80 }
81 
82 /* Print an error message containing the string TEXT, then exit.  */
83 
84 void
85 fatal (m)
86      char const *m;
87 {
88   print_message_queue ();
89   diff_error ("%s", m, 0);
90   DIFF_ABORT (2);
91 }
92 
93 /* Like printf, except if -l in effect then save the message and print later.
94    This is used for things like "binary files differ" and "Only in ...".  */
95 
96 void
97 message (format, arg1, arg2)
98      char const *format, *arg1, *arg2;
99 {
100   message5 (format, arg1, arg2, 0, 0);
101 }
102 
103 void
104 message5 (format, arg1, arg2, arg3, arg4)
105      char const *format, *arg1, *arg2, *arg3, *arg4;
106 {
107   if (paginate_flag)
108     {
109       struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
110       new->format = format;
111       new->arg1 = concat (arg1, "", "");
112       new->arg2 = concat (arg2, "", "");
113       new->arg3 = arg3 ? concat (arg3, "", "") : 0;
114       new->arg4 = arg4 ? concat (arg4, "", "") : 0;
115       new->next = 0;
116       *msg_chain_end = new;
117       msg_chain_end = &new->next;
118     }
119   else
120     {
121       if (sdiff_help_sdiff)
122 	putc (' ', outfile);
123       fprintf (outfile, format, arg1, arg2, arg3, arg4);
124     }
125 }
126 
127 /* Output all the messages that were saved up by calls to `message'.  */
128 
129 void
130 print_message_queue ()
131 {
132   struct msg *m;
133 
134   for (m = msg_chain; m; m = m->next)
135     fprintf (outfile, m->format, m->arg1, m->arg2, m->arg3, m->arg4);
136 }
137 
138 /* Call before outputting the results of comparing files NAME0 and NAME1
139    to set up OUTFILE, the stdio stream for the output to go to.
140 
141    Usually, OUTFILE is just stdout.  But when -l was specified
142    we fork off a `pr' and make OUTFILE a pipe to it.
143    `pr' then outputs to our stdout.  */
144 
145 static char const *current_name0;
146 static char const *current_name1;
147 static int current_depth;
148 
149 static int output_in_progress = 0;
150 
151 void
152 setup_output (name0, name1, depth)
153      char const *name0, *name1;
154      int depth;
155 {
156   current_name0 = name0;
157   current_name1 = name1;
158   current_depth = depth;
159 }
160 
161 #if HAVE_FORK && defined (PR_PROGRAM)
162 static pid_t pr_pid;
163 #endif
164 
165 void
166 begin_output ()
167 {
168   char *name;
169 
170   if (output_in_progress)
171     return;
172   output_in_progress = 1;
173 
174   /* Construct the header of this piece of diff.  */
175   name = xmalloc (strlen (current_name0) + strlen (current_name1)
176 		  + strlen (switch_string) + 7);
177   /* Posix.2 section 4.17.6.1.1 specifies this format.  But there is a
178      bug in the first printing (IEEE Std 1003.2-1992 p 251 l 3304):
179      it says that we must print only the last component of the pathnames.
180      This requirement is silly and does not match historical practice.  */
181   sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
182 
183   if (paginate_flag)
184     {
185       /* Make OUTFILE a pipe to a subsidiary `pr'.  */
186 
187 #ifdef PR_PROGRAM
188 
189 # if HAVE_FORK
190       int pipes[2];
191 
192       if (pipe (pipes) != 0)
193 	pfatal_with_name ("pipe");
194 
195       fflush (stdout);
196 
197       pr_pid = vfork ();
198       if (pr_pid < 0)
199 	pfatal_with_name ("vfork");
200 
201       if (pr_pid == 0)
202 	{
203 	  close (pipes[1]);
204 	  if (pipes[0] != STDIN_FILENO)
205 	    {
206 	      if (dup2 (pipes[0], STDIN_FILENO) < 0)
207 		pfatal_with_name ("dup2");
208 	      close (pipes[0]);
209 	    }
210 
211 	  execl (PR_PROGRAM, PR_PROGRAM, "-f", "-h", name, 0);
212 	  pfatal_with_name (PR_PROGRAM);
213 	}
214       else
215 	{
216 	  close (pipes[0]);
217 	  outfile = fdopen (pipes[1], "w");
218 	  if (!outfile)
219 	    pfatal_with_name ("fdopen");
220 	}
221 # else /* ! HAVE_FORK */
222       char *command = xmalloc (4 * strlen (name) + strlen (PR_PROGRAM) + 10);
223       char *p;
224       char const *a = name;
225       sprintf (command, "%s -f -h ", PR_PROGRAM);
226       p = command + strlen (command);
227       SYSTEM_QUOTE_ARG (p, a);
228       *p = 0;
229       outfile = popen (command, "w");
230       if (!outfile)
231 	pfatal_with_name (command);
232       free (command);
233 # endif /* ! HAVE_FORK */
234 #else
235       fatal ("This port does not support the --paginate option to diff.");
236 #endif
237     }
238   else
239     {
240 
241       /* If -l was not specified, output the diff straight to `stdout'.  */
242 
243       /* If handling multiple files (because scanning a directory),
244 	 print which files the following output is about.  */
245       if (current_depth > 0)
246 	fprintf (outfile, "%s\n", name);
247     }
248 
249   free (name);
250 
251   /* A special header is needed at the beginning of context output.  */
252   switch (output_style)
253     {
254     case OUTPUT_CONTEXT:
255       print_context_header (files, 0);
256       break;
257 
258     case OUTPUT_UNIFIED:
259       print_context_header (files, 1);
260       break;
261 
262     default:
263       break;
264     }
265 }
266 
267 /* Call after the end of output of diffs for one file.
268    If -l was given, close OUTFILE and get rid of the `pr' subfork.  */
269 
270 void
271 finish_output ()
272 {
273   if (paginate_flag && outfile != 0 && outfile != stdout)
274     {
275 #ifdef PR_PROGRAM
276       int wstatus;
277       if (ferror (outfile))
278 	fatal ("write error");
279 # if ! HAVE_FORK
280       wstatus = pclose (outfile);
281 # else /* HAVE_FORK */
282       if (fclose (outfile) != 0)
283 	pfatal_with_name ("write error");
284       if (waitpid (pr_pid, &wstatus, 0) < 0)
285 	pfatal_with_name ("waitpid");
286 # endif /* HAVE_FORK */
287       if (wstatus != 0)
288 	fatal ("subsidiary pr failed");
289 #else
290       fatal ("internal error in finish_output");
291 #endif
292     }
293 
294   output_in_progress = 0;
295 }
296 
297 /* Compare two lines (typically one from each input file)
298    according to the command line options.
299    For efficiency, this is invoked only when the lines do not match exactly
300    but an option like -i might cause us to ignore the difference.
301    Return nonzero if the lines differ.  */
302 
303 int
304 line_cmp (s1, s2)
305      char const *s1, *s2;
306 {
307   register unsigned char const *t1 = (unsigned char const *) s1;
308   register unsigned char const *t2 = (unsigned char const *) s2;
309 
310   while (1)
311     {
312       register unsigned char c1 = *t1++;
313       register unsigned char c2 = *t2++;
314 
315       /* Test for exact char equality first, since it's a common case.  */
316       if (c1 != c2)
317 	{
318 	  /* Ignore horizontal white space if -b or -w is specified.  */
319 
320 	  if (ignore_all_space_flag)
321 	    {
322 	      /* For -w, just skip past any white space.  */
323 	      while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
324 	      while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
325 	    }
326 	  else if (ignore_space_change_flag)
327 	    {
328 	      /* For -b, advance past any sequence of white space in line 1
329 		 and consider it just one Space, or nothing at all
330 		 if it is at the end of the line.  */
331 	      if (ISSPACE (c1))
332 		{
333 		  while (c1 != '\n')
334 		    {
335 		      c1 = *t1++;
336 		      if (! ISSPACE (c1))
337 			{
338 			  --t1;
339 			  c1 = ' ';
340 			  break;
341 			}
342 		    }
343 		}
344 
345 	      /* Likewise for line 2.  */
346 	      if (ISSPACE (c2))
347 		{
348 		  while (c2 != '\n')
349 		    {
350 		      c2 = *t2++;
351 		      if (! ISSPACE (c2))
352 			{
353 			  --t2;
354 			  c2 = ' ';
355 			  break;
356 			}
357 		    }
358 		}
359 
360 	      if (c1 != c2)
361 		{
362 		  /* If we went too far when doing the simple test
363 		     for equality, go back to the first non-white-space
364 		     character in both sides and try again.  */
365 		  if (c2 == ' ' && c1 != '\n'
366 		      && (unsigned char const *) s1 + 1 < t1
367 		      && ISSPACE(t1[-2]))
368 		    {
369 		      --t1;
370 		      continue;
371 		    }
372 		  if (c1 == ' ' && c2 != '\n'
373 		      && (unsigned char const *) s2 + 1 < t2
374 		      && ISSPACE(t2[-2]))
375 		    {
376 		      --t2;
377 		      continue;
378 		    }
379 		}
380 	    }
381 
382 	  /* Lowercase all letters if -i is specified.  */
383 
384 	  if (ignore_case_flag)
385 	    {
386 	      if (ISUPPER (c1))
387 		c1 = tolower (c1);
388 	      if (ISUPPER (c2))
389 		c2 = tolower (c2);
390 	    }
391 
392 	  if (c1 != c2)
393 	    break;
394 	}
395       if (c1 == '\n')
396 	return 0;
397     }
398 
399   return (1);
400 }
401 
402 /* Find the consecutive changes at the start of the script START.
403    Return the last link before the first gap.  */
404 
405 struct change *
406 find_change (start)
407      struct change *start;
408 {
409   return start;
410 }
411 
412 struct change *
413 find_reverse_change (start)
414      struct change *start;
415 {
416   return start;
417 }
418 
419 /* Divide SCRIPT into pieces by calling HUNKFUN and
420    print each piece with PRINTFUN.
421    Both functions take one arg, an edit script.
422 
423    HUNKFUN is called with the tail of the script
424    and returns the last link that belongs together with the start
425    of the tail.
426 
427    PRINTFUN takes a subscript which belongs together (with a null
428    link at the end) and prints it.  */
429 
430 void
431 print_script (script, hunkfun, printfun)
432      struct change *script;
433      struct change * (*hunkfun) PARAMS((struct change *));
434      void (*printfun) PARAMS((struct change *));
435 {
436   struct change *next = script;
437 
438   while (next)
439     {
440       struct change *this, *end;
441 
442       /* Find a set of changes that belong together.  */
443       this = next;
444       end = (*hunkfun) (next);
445 
446       /* Disconnect them from the rest of the changes,
447 	 making them a hunk, and remember the rest for next iteration.  */
448       next = end->link;
449       end->link = 0;
450 #ifdef DEBUG
451       debug_script (this);
452 #endif
453 
454       /* Print this hunk.  */
455       (*printfun) (this);
456 
457       /* Reconnect the script so it will all be freed properly.  */
458       end->link = next;
459     }
460 }
461 
462 /* Print the text of a single line LINE,
463    flagging it with the characters in LINE_FLAG (which say whether
464    the line is inserted, deleted, changed, etc.).  */
465 
466 void
467 print_1_line (line_flag, line)
468      char const *line_flag;
469      char const * const *line;
470 {
471   char const *text = line[0], *limit = line[1]; /* Help the compiler.  */
472   FILE *out = outfile; /* Help the compiler some more.  */
473   char const *flag_format = 0;
474 
475   /* If -T was specified, use a Tab between the line-flag and the text.
476      Otherwise use a Space (as Unix diff does).
477      Print neither space nor tab if line-flags are empty.  */
478 
479   if (line_flag && *line_flag)
480     {
481       flag_format = tab_align_flag ? "%s\t" : "%s ";
482       fprintf (out, flag_format, line_flag);
483     }
484 
485   output_1_line (text, limit, flag_format, line_flag);
486 
487   if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
488     fprintf (out, "\n\\ No newline at end of file\n");
489 }
490 
491 /* Output a line from TEXT up to LIMIT.  Without -t, output verbatim.
492    With -t, expand white space characters to spaces, and if FLAG_FORMAT
493    is nonzero, output it with argument LINE_FLAG after every
494    internal carriage return, so that tab stops continue to line up.  */
495 
496 void
497 output_1_line (text, limit, flag_format, line_flag)
498      char const *text, *limit, *flag_format, *line_flag;
499 {
500   if (!tab_expand_flag)
501     fwrite (text, sizeof (char), limit - text, outfile);
502   else
503     {
504       register FILE *out = outfile;
505       register unsigned char c;
506       register char const *t = text;
507       register unsigned column = 0;
508 
509       while (t < limit)
510 	switch ((c = *t++))
511 	  {
512 	  case '\t':
513 	    {
514 	      unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
515 	      column += spaces;
516 	      do
517 		putc (' ', out);
518 	      while (--spaces);
519 	    }
520 	    break;
521 
522 	  case '\r':
523 	    putc (c, out);
524 	    if (flag_format && t < limit && *t != '\n')
525 	      fprintf (out, flag_format, line_flag);
526 	    column = 0;
527 	    break;
528 
529 	  case '\b':
530 	    if (column == 0)
531 	      continue;
532 	    column--;
533 	    putc (c, out);
534 	    break;
535 
536 	  default:
537 	    if (ISPRINT (c))
538 	      column++;
539 	    putc (c, out);
540 	    break;
541 	  }
542     }
543 }
544 
545 int
546 change_letter (inserts, deletes)
547      int inserts, deletes;
548 {
549   if (!inserts)
550     return 'd';
551   else if (!deletes)
552     return 'a';
553   else
554     return 'c';
555 }
556 
557 /* Translate an internal line number (an index into diff's table of lines)
558    into an actual line number in the input file.
559    The internal line number is LNUM.  FILE points to the data on the file.
560 
561    Internal line numbers count from 0 starting after the prefix.
562    Actual line numbers count from 1 within the entire file.  */
563 
564 int
565 translate_line_number (file, lnum)
566      struct file_data const *file;
567      int lnum;
568 {
569   return lnum + file->prefix_lines + 1;
570 }
571 
572 void
573 translate_range (file, a, b, aptr, bptr)
574      struct file_data const *file;
575      int a, b;
576      int *aptr, *bptr;
577 {
578   *aptr = translate_line_number (file, a - 1) + 1;
579   *bptr = translate_line_number (file, b + 1) - 1;
580 }
581 
582 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
583    If the two numbers are identical, print just one number.
584 
585    Args A and B are internal line numbers.
586    We print the translated (real) line numbers.  */
587 
588 void
589 print_number_range (sepchar, file, a, b)
590      int sepchar;
591      struct file_data *file;
592      int a, b;
593 {
594   int trans_a, trans_b;
595   translate_range (file, a, b, &trans_a, &trans_b);
596 
597   /* Note: we can have B < A in the case of a range of no lines.
598      In this case, we should print the line number before the range,
599      which is B.  */
600   if (trans_b > trans_a)
601     fprintf (outfile, "%d%c%d", trans_a, sepchar, trans_b);
602   else
603     fprintf (outfile, "%d", trans_b);
604 }
605 
606 /* Look at a hunk of edit script and report the range of lines in each file
607    that it applies to.  HUNK is the start of the hunk, which is a chain
608    of `struct change'.  The first and last line numbers of file 0 are stored in
609    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
610    Note that these are internal line numbers that count from 0.
611 
612    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
613 
614    Also set *DELETES nonzero if any lines of file 0 are deleted
615    and set *INSERTS nonzero if any lines of file 1 are inserted.
616    If only ignorable lines are inserted or deleted, both are
617    set to 0.  */
618 
619 void
620 analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
621      struct change *hunk;
622      int *first0, *last0, *first1, *last1;
623      int *deletes, *inserts;
624 {
625   int l0, l1, show_from, show_to;
626   int i;
627   int trivial = ignore_blank_lines_flag || ignore_regexp_list;
628   struct change *next;
629 
630   show_from = show_to = 0;
631 
632   *first0 = hunk->line0;
633   *first1 = hunk->line1;
634 
635   next = hunk;
636   do
637     {
638       l0 = next->line0 + next->deleted - 1;
639       l1 = next->line1 + next->inserted - 1;
640       show_from += next->deleted;
641       show_to += next->inserted;
642 
643       for (i = next->line0; i <= l0 && trivial; i++)
644 	if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
645 	  {
646 	    struct regexp_list *r;
647 	    char const *line = files[0].linbuf[i];
648 	    int len = files[0].linbuf[i + 1] - line;
649 
650 	    for (r = ignore_regexp_list; r; r = r->next)
651 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
652 		break;	/* Found a match.  Ignore this line.  */
653 	    /* If we got all the way through the regexp list without
654 	       finding a match, then it's nontrivial.  */
655 	    if (!r)
656 	      trivial = 0;
657 	  }
658 
659       for (i = next->line1; i <= l1 && trivial; i++)
660 	if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
661 	  {
662 	    struct regexp_list *r;
663 	    char const *line = files[1].linbuf[i];
664 	    int len = files[1].linbuf[i + 1] - line;
665 
666 	    for (r = ignore_regexp_list; r; r = r->next)
667 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
668 		break;	/* Found a match.  Ignore this line.  */
669 	    /* If we got all the way through the regexp list without
670 	       finding a match, then it's nontrivial.  */
671 	    if (!r)
672 	      trivial = 0;
673 	  }
674     }
675   while ((next = next->link) != 0);
676 
677   *last0 = l0;
678   *last1 = l1;
679 
680   /* If all inserted or deleted lines are ignorable,
681      tell the caller to ignore this hunk.  */
682 
683   if (trivial)
684     show_from = show_to = 0;
685 
686   *deletes = show_from;
687   *inserts = show_to;
688 }
689 
690 /* Concatenate three strings, returning a newly malloc'd string.  */
691 
692 char *
693 concat (s1, s2, s3)
694      char const *s1, *s2, *s3;
695 {
696   size_t len = strlen (s1) + strlen (s2) + strlen (s3);
697   char *new = xmalloc (len + 1);
698   sprintf (new, "%s%s%s", s1, s2, s3);
699   return new;
700 }
701 
702 /* Yield the newly malloc'd pathname
703    of the file in DIR whose filename is FILE.  */
704 
705 char *
706 dir_file_pathname (dir, file)
707      char const *dir, *file;
708 {
709   char const *p = filename_lastdirchar (dir);
710   return concat (dir, "/" + (p && !p[1]), file);
711 }
712 
713 void
714 debug_script (sp)
715      struct change *sp;
716 {
717   fflush (stdout);
718   for (; sp; sp = sp->link)
719     fprintf (stderr, "%3d %3d delete %d insert %d\n",
720 	     sp->line0, sp->line1, sp->deleted, sp->inserted);
721   fflush (stderr);
722 }
723