xref: /386bsd/usr/src/usr.bin/diff/util.c (revision a2142627)
1 /* Support routines for GNU DIFF.
2    Copyright (C) 1988, 1989, 1992 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 #include "diff.h"
21 
22 /* Use when a system call returns non-zero status.
23    TEXT should normally be the file name.  */
24 
25 void
perror_with_name(text)26 perror_with_name (text)
27      char *text;
28 {
29   int e = errno;
30   fprintf (stderr, "%s: ", program);
31   errno = e;
32   perror (text);
33 }
34 
35 /* Use when a system call returns non-zero status and that is fatal.  */
36 
37 void
pfatal_with_name(text)38 pfatal_with_name (text)
39      char *text;
40 {
41   int e = errno;
42   print_message_queue ();
43   fprintf (stderr, "%s: ", program);
44   errno = e;
45   perror (text);
46   exit (2);
47 }
48 
49 /* Print an error message from the format-string FORMAT
50    with args ARG1 and ARG2.  */
51 
52 void
error(format,arg,arg1)53 error (format, arg, arg1)
54      char *format;
55      char *arg;
56      char *arg1;
57 {
58   fprintf (stderr, "%s: ", program);
59   fprintf (stderr, format, arg, arg1);
60   fprintf (stderr, "\n");
61 }
62 
63 /* Print an error message containing the string TEXT, then exit.  */
64 
65 void
fatal(m)66 fatal (m)
67      char *m;
68 {
69   print_message_queue ();
70   error ("%s", m, 0);
71   exit (2);
72 }
73 
74 /* Like printf, except if -l in effect then save the message and print later.
75    This is used for things like "binary files differ" and "Only in ...".  */
76 
77 void
message(format,arg1,arg2)78 message (format, arg1, arg2)
79      char *format, *arg1, *arg2;
80 {
81   if (paginate_flag)
82     {
83       struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
84       if (msg_chain_end == 0)
85 	msg_chain = msg_chain_end = new;
86       else
87 	{
88 	  msg_chain_end->next = new;
89 	  msg_chain_end = new;
90 	}
91       new->format = format;
92       new->arg1 = concat (arg1, "", "");
93       new->arg2 = concat (arg2, "", "");
94       new->next = 0;
95     }
96   else
97     {
98       if (sdiff_help_sdiff)
99 	putchar (' ');
100       printf (format, arg1, arg2);
101     }
102 }
103 
104 /* Output all the messages that were saved up by calls to `message'.  */
105 
106 void
print_message_queue()107 print_message_queue ()
108 {
109   struct msg *m;
110 
111   for (m = msg_chain; m; m = m->next)
112     printf (m->format, m->arg1, m->arg2);
113 }
114 
115 /* Call before outputting the results of comparing files NAME0 and NAME1
116    to set up OUTFILE, the stdio stream for the output to go to.
117 
118    Usually, OUTFILE is just stdout.  But when -l was specified
119    we fork off a `pr' and make OUTFILE a pipe to it.
120    `pr' then outputs to our stdout.  */
121 
122 static char *current_name0;
123 static char *current_name1;
124 static int current_depth;
125 
126 void
setup_output(name0,name1,depth)127 setup_output (name0, name1, depth)
128      char *name0, *name1;
129      int depth;
130 {
131   current_name0 = name0;
132   current_name1 = name1;
133   current_depth = depth;
134   outfile = 0;
135 }
136 
137 void
begin_output()138 begin_output ()
139 {
140   char *name;
141 
142   if (outfile != 0)
143     return;
144 
145   /* Construct the header of this piece of diff.  */
146   name = (char *) xmalloc (strlen (current_name0) + strlen (current_name1)
147 			   + strlen (switch_string) + 15);
148 
149   strcpy (name, "diff");
150   strcat (name, switch_string);
151   strcat (name, " ");
152   strcat (name, current_name0);
153   strcat (name, " ");
154   strcat (name, current_name1);
155 
156   if (paginate_flag)
157     {
158       int pipes[2];
159       int desc;
160 
161       /* For a `pr' and make OUTFILE a pipe to it.  */
162       if (pipe (pipes) < 0)
163 	pfatal_with_name ("pipe");
164 
165       fflush (stdout);
166 
167       desc = vfork ();
168       if (desc < 0)
169 	pfatal_with_name ("vfork");
170 
171       if (desc == 0)
172 	{
173 	  close (pipes[1]);
174 	  if (pipes[0] != fileno (stdin))
175 	    {
176 	      if (dup2 (pipes[0], fileno (stdin)) < 0)
177 		pfatal_with_name ("dup2");
178 	      close (pipes[0]);
179 	    }
180 
181 	  if (execl (PR_FILE_NAME, PR_FILE_NAME, "-f", "-h", name, 0) < 0)
182 	    pfatal_with_name (PR_FILE_NAME);
183 	}
184       else
185 	{
186 	  close (pipes[0]);
187 	  outfile = fdopen (pipes[1], "w");
188 	}
189     }
190   else
191     {
192 
193       /* If -l was not specified, output the diff straight to `stdout'.  */
194 
195       outfile = stdout;
196 
197       /* If handling multiple files (because scanning a directory),
198 	 print which files the following output is about.  */
199       if (current_depth > 0)
200 	printf ("%s\n", name);
201     }
202 
203   free (name);
204 
205   /* A special header is needed at the beginning of context output.  */
206   switch (output_style)
207     {
208     case OUTPUT_CONTEXT:
209       print_context_header (files, 0);
210       break;
211 
212     case OUTPUT_UNIFIED:
213       print_context_header (files, 1);
214       break;
215 
216     default:
217       break;
218     }
219 }
220 
221 /* Call after the end of output of diffs for one file.
222    Close OUTFILE and get rid of the `pr' subfork.  */
223 
224 void
finish_output()225 finish_output ()
226 {
227   if (outfile != 0 && outfile != stdout)
228     {
229       fclose (outfile);
230       wait (0);
231     }
232 
233   outfile = 0;
234 }
235 
236 /* Compare two lines (typically one from each input file)
237    according to the command line options.
238    Return 1 if the lines differ, like `bcmp'.  */
239 
240 int
line_cmp(s1,len1,s2,len2)241 line_cmp (s1, len1, s2, len2)
242      const char *s1, *s2;
243      int len1, len2;
244 {
245   register const unsigned char *t1, *t2;
246   register unsigned char end_char = line_end_char;
247 
248   /* Check first for exact identity.
249      If that is true, return 0 immediately.
250      This detects the common case of exact identity
251      faster than complete comparison would.  */
252 
253   if (len1 == len2 && bcmp (s1, s2, len1) == 0)
254     return 0;
255 
256   /* Not exactly identical, but perhaps they match anyway
257      when case or whitespace is ignored.  */
258 
259   if (ignore_case_flag || ignore_space_change_flag || ignore_all_space_flag)
260     {
261       t1 = (const unsigned char *) s1;
262       t2 = (const unsigned char *) s2;
263 
264       while (1)
265 	{
266 	  register unsigned char c1 = *t1++;
267 	  register unsigned char c2 = *t2++;
268 
269 	  /* Ignore horizontal whitespace if -b or -w is specified.  */
270 
271 	  if (ignore_all_space_flag)
272 	    {
273 	      /* For -w, just skip past any white space.  */
274 	      while (Is_space (c1)) c1 = *t1++;
275 	      while (Is_space (c2)) c2 = *t2++;
276 	    }
277 	  else if (ignore_space_change_flag)
278 	    {
279 	      /* For -b, advance past any sequence of whitespace in line 1
280 		 and consider it just one Space, or nothing at all
281 		 if it is at the end of the line.  */
282 	      if (c1 == ' ' || c1 == '\t')
283 		{
284 		  while (1)
285 		    {
286 		      c1 = *t1++;
287 		      if (c1 == end_char)
288 			break;
289 		      if (c1 != ' ' && c1 != '\t')
290 			{
291 			  --t1;
292 			  c1 = ' ';
293 			  break;
294 			}
295 		    }
296 		}
297 
298 	      /* Likewise for line 2.  */
299 	      if (c2 == ' ' || c2 == '\t')
300 		{
301 		  while (1)
302 		    {
303 		      c2 = *t2++;
304 		      if (c2 == end_char)
305 			break;
306 		      if (c2 != ' ' && c2 != '\t')
307 			{
308 			  --t2;
309 			  c2 = ' ';
310 			  break;
311 			}
312 		    }
313 		}
314 	    }
315 
316 	  /* Upcase all letters if -i is specified.  */
317 
318 	  if (ignore_case_flag)
319 	    {
320 	      if (islower (c1))
321 		c1 = toupper (c1);
322 	      if (islower (c2))
323 		c2 = toupper (c2);
324 	    }
325 
326 	  if (c1 != c2)
327 	    break;
328 	  if (c1 == end_char)
329 	    return 0;
330 	}
331     }
332 
333   return (1);
334 }
335 
336 /* Find the consecutive changes at the start of the script START.
337    Return the last link before the first gap.  */
338 
339 struct change *
find_change(start)340 find_change (start)
341      struct change *start;
342 {
343   return start;
344 }
345 
346 struct change *
find_reverse_change(start)347 find_reverse_change (start)
348      struct change *start;
349 {
350   return start;
351 }
352 
353 /* Divide SCRIPT into pieces by calling HUNKFUN and
354    print each piece with PRINTFUN.
355    Both functions take one arg, an edit script.
356 
357    HUNKFUN is called with the tail of the script
358    and returns the last link that belongs together with the start
359    of the tail.
360 
361    PRINTFUN takes a subscript which belongs together (with a null
362    link at the end) and prints it.  */
363 
364 void
print_script(script,hunkfun,printfun)365 print_script (script, hunkfun, printfun)
366      struct change *script;
367      struct change * (*hunkfun) ();
368      void (*printfun) ();
369 {
370   struct change *next = script;
371 
372   while (next)
373     {
374       struct change *this, *end;
375 
376       /* Find a set of changes that belong together.  */
377       this = next;
378       end = (*hunkfun) (next);
379 
380       /* Disconnect them from the rest of the changes,
381 	 making them a hunk, and remember the rest for next iteration.  */
382       next = end->link;
383       end->link = NULL;
384 #ifdef DEBUG
385       debug_script (this);
386 #endif
387 
388       /* Print this hunk.  */
389       (*printfun) (this);
390 
391       /* Reconnect the script so it will all be freed properly.  */
392       end->link = next;
393     }
394 }
395 
396 /* Print the text of a single line LINE,
397    flagging it with the characters in LINE_FLAG (which say whether
398    the line is inserted, deleted, changed, etc.).  */
399 
400 void
print_1_line(line_flag,line)401 print_1_line (line_flag, line)
402      const char *line_flag;
403      const char * const *line;
404 {
405   const char *text = line[0], *limit = line[1]; /* Help the compiler.  */
406   FILE *out = outfile; /* Help the compiler some more.  */
407   const char *flag_format = 0;
408 
409   /* If -T was specified, use a Tab between the line-flag and the text.
410      Otherwise use a Space (as Unix diff does).
411      Print neither space nor tab if line-flags are empty.  */
412 
413   if (line_flag != NULL && line_flag[0] != 0)
414     {
415       flag_format = tab_align_flag ? "%s\t" : "%s ";
416       fprintf (out, flag_format, line_flag);
417     }
418 
419   output_1_line (text, limit, flag_format, line_flag);
420 
421   if ((line_flag == NULL || line_flag[0] != 0) && limit[-1] != '\n'
422       && line_end_char == '\n')
423     fprintf (out, "\n\\ No newline at end of file\n");
424 }
425 
426 /* Output a line from TEXT up to LIMIT.  Without -t, output verbatim.
427    With -t, expand white space characters to spaces, and if FLAG_FORMAT
428    is nonzero, output it with argument LINE_FLAG after every
429    internal carriage return, so that tab stops continue to line up.  */
430 
431 void
output_1_line(text,limit,flag_format,line_flag)432 output_1_line (text, limit, flag_format, line_flag)
433      const char *text, *limit, *flag_format, *line_flag;
434 {
435   if (!tab_expand_flag)
436     fwrite (text, sizeof (char), limit - text, outfile);
437   else
438     {
439       register FILE *out = outfile;
440       register char c;
441       register const char *t = text;
442       register unsigned column = 0;
443 
444       while (t < limit)
445 	switch ((c = *t++))
446 	  {
447 	  case '\t':
448 	    {
449 	      unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
450 	      column += spaces;
451 	      do
452 		putc (' ', out);
453 	      while (--spaces);
454 	    }
455 	    break;
456 
457 	  case '\r':
458 	    putc (c, out);
459 	    if (flag_format && t < limit && *t != '\n')
460 	      fprintf (out, flag_format, line_flag);
461 	    column = 0;
462 	    break;
463 
464 	  case '\b':
465 	    if (column == 0)
466 	      continue;
467 	    column--;
468 	    putc (c, out);
469 	    break;
470 
471 	  default:
472 	    if (textchar[(unsigned char) c])
473 	      column++;
474 	    /* fall into */
475 	  case '\f':
476 	  case '\v':
477 	    putc (c, out);
478 	    break;
479 	  }
480     }
481 }
482 
483 int
change_letter(inserts,deletes)484 change_letter (inserts, deletes)
485      int inserts, deletes;
486 {
487   if (!inserts)
488     return 'd';
489   else if (!deletes)
490     return 'a';
491   else
492     return 'c';
493 }
494 
495 /* Translate an internal line number (an index into diff's table of lines)
496    into an actual line number in the input file.
497    The internal line number is LNUM.  FILE points to the data on the file.
498 
499    Internal line numbers count from 0 starting after the prefix.
500    Actual line numbers count from 1 within the entire file.  */
501 
502 int
translate_line_number(file,lnum)503 translate_line_number (file, lnum)
504      struct file_data *file;
505      int lnum;
506 {
507   return lnum + file->prefix_lines + 1;
508 }
509 
510 void
translate_range(file,a,b,aptr,bptr)511 translate_range (file, a, b, aptr, bptr)
512      struct file_data *file;
513      int a, b;
514      int *aptr, *bptr;
515 {
516   *aptr = translate_line_number (file, a - 1) + 1;
517   *bptr = translate_line_number (file, b + 1) - 1;
518 }
519 
520 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
521    If the two numbers are identical, print just one number.
522 
523    Args A and B are internal line numbers.
524    We print the translated (real) line numbers.  */
525 
526 void
print_number_range(sepchar,file,a,b)527 print_number_range (sepchar, file, a, b)
528      char sepchar;
529      struct file_data *file;
530      int a, b;
531 {
532   int trans_a, trans_b;
533   translate_range (file, a, b, &trans_a, &trans_b);
534 
535   /* Note: we can have B < A in the case of a range of no lines.
536      In this case, we should print the line number before the range,
537      which is B.  */
538   if (trans_b > trans_a)
539     fprintf (outfile, "%d%c%d", trans_a, sepchar, trans_b);
540   else
541     fprintf (outfile, "%d", trans_b);
542 }
543 
544 /* Look at a hunk of edit script and report the range of lines in each file
545    that it applies to.  HUNK is the start of the hunk, which is a chain
546    of `struct change'.  The first and last line numbers of file 0 are stored in
547    *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
548    Note that these are internal line numbers that count from 0.
549 
550    If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
551 
552    Also set *DELETES nonzero if any lines of file 0 are deleted
553    and set *INSERTS nonzero if any lines of file 1 are inserted.
554    If only ignorable lines are inserted or deleted, both are
555    set to 0.  */
556 
557 void
analyze_hunk(hunk,first0,last0,first1,last1,deletes,inserts)558 analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
559      struct change *hunk;
560      int *first0, *last0, *first1, *last1;
561      int *deletes, *inserts;
562 {
563   int f0, l0, f1, l1, show_from, show_to;
564   int i;
565   int nontrivial = !(ignore_blank_lines_flag || ignore_regexp_list);
566   struct change *next;
567 
568   show_from = show_to = 0;
569 
570   f0 = hunk->line0;
571   f1 = hunk->line1;
572 
573   for (next = hunk; next; next = next->link)
574     {
575       l0 = next->line0 + next->deleted - 1;
576       l1 = next->line1 + next->inserted - 1;
577       show_from += next->deleted;
578       show_to += next->inserted;
579 
580       for (i = next->line0; i <= l0 && ! nontrivial; i++)
581 	if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
582 	  {
583 	    struct regexp_list *r;
584 	    const char *line = files[0].linbuf[i];
585 	    int len = files[0].linbuf[i + 1] - line;
586 
587 	    for (r = ignore_regexp_list; r; r = r->next)
588 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
589 		break;	/* Found a match.  Ignore this line.  */
590 	    /* If we got all the way through the regexp list without
591 	       finding a match, then it's nontrivial.  */
592 	    if (r == NULL)
593 	      nontrivial = 1;
594 	  }
595 
596       for (i = next->line1; i <= l1 && ! nontrivial; i++)
597 	if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
598 	  {
599 	    struct regexp_list *r;
600 	    const char *line = files[1].linbuf[i];
601 	    int len = files[1].linbuf[i + 1] - line;
602 
603 	    for (r = ignore_regexp_list; r; r = r->next)
604 	      if (0 <= re_search (&r->buf, line, len, 0, len, 0))
605 		break;	/* Found a match.  Ignore this line.  */
606 	    /* If we got all the way through the regexp list without
607 	       finding a match, then it's nontrivial.  */
608 	    if (r == NULL)
609 	      nontrivial = 1;
610 	  }
611     }
612 
613   *first0 = f0;
614   *last0 = l0;
615   *first1 = f1;
616   *last1 = l1;
617 
618   /* If all inserted or deleted lines are ignorable,
619      tell the caller to ignore this hunk.  */
620 
621   if (!nontrivial)
622     show_from = show_to = 0;
623 
624   *deletes = show_from;
625   *inserts = show_to;
626 }
627 
628 /* malloc a block of memory, with fatal error message if we can't do it. */
629 
630 VOID *
xmalloc(size)631 xmalloc (size)
632      unsigned size;
633 {
634   register VOID *value;
635 
636   if (size == 0)
637     size = 1;
638 
639   value = (VOID *) malloc (size);
640 
641   if (!value)
642     fatal ("virtual memory exhausted");
643   return value;
644 }
645 
646 /* realloc a block of memory, with fatal error message if we can't do it. */
647 
648 VOID *
xrealloc(old,size)649 xrealloc (old, size)
650      VOID *old;
651      unsigned int size;
652 {
653   register VOID *value;
654 
655   if (size == 0)
656     size = 1;
657 
658   value = (VOID *) realloc (old, size);
659 
660   if (!value)
661     fatal ("virtual memory exhausted");
662   return value;
663 }
664 
665 /* Concatenate three strings, returning a newly malloc'd string.  */
666 
667 char *
concat(s1,s2,s3)668 concat (s1, s2, s3)
669      char *s1, *s2, *s3;
670 {
671   int len = strlen (s1) + strlen (s2) + strlen (s3);
672   char *new = (char *) xmalloc (len + 1);
673   strcpy (new, s1);
674   strcat (new, s2);
675   strcat (new, s3);
676   return new;
677 }
678 
679 void
debug_script(sp)680 debug_script (sp)
681      struct change *sp;
682 {
683   fflush (stdout);
684   for (; sp; sp = sp->link)
685     fprintf (stderr, "%3d %3d delete %d insert %d\n",
686 	     sp->line0, sp->line1, sp->deleted, sp->inserted);
687   fflush (stderr);
688 }
689