1 /* Support routines for GNU DIFF.
2 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU DIFF.
5
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 */
17
18 #include "diff.h"
19
20 #if __STDC__
21 #include <stdarg.h>
22 #else
23 #include <varargs.h>
24 #endif
25
26 #ifndef strerror
27 extern char *strerror ();
28 #endif
29
30 /* Queue up one-line messages to be printed at the end,
31 when -l is specified. Each message is recorded with a `struct msg'. */
32
33 struct msg
34 {
35 struct msg *next;
36 char const *format;
37 char const *arg1;
38 char const *arg2;
39 char const *arg3;
40 char const *arg4;
41 };
42
43 /* Head of the chain of queues messages. */
44
45 static struct msg *msg_chain;
46
47 /* Tail of the chain of queues messages. */
48
49 static struct msg **msg_chain_end = &msg_chain;
50
51 /* Use when a system call returns non-zero status.
52 TEXT should normally be the file name. */
53
54 void
perror_with_name(text)55 perror_with_name (text)
56 char const *text;
57 {
58 int e = errno;
59
60 if (callbacks && callbacks->error)
61 (*callbacks->error) ("%s: %s", text, strerror (e));
62 else
63 {
64 fprintf (stderr, "%s: ", diff_program_name);
65 errno = e;
66 perror (text);
67 }
68 }
69
70 /* Use when a system call returns non-zero status and that is fatal. */
71
72 void
pfatal_with_name(text)73 pfatal_with_name (text)
74 char const *text;
75 {
76 int e = errno;
77 print_message_queue ();
78 if (callbacks && callbacks->error)
79 (*callbacks->error) ("%s: %s", text, strerror (e));
80 else
81 {
82 fprintf (stderr, "%s: ", diff_program_name);
83 errno = e;
84 perror (text);
85 }
86 DIFF_ABORT (2);
87 }
88
89 /* Print an error message from the format-string FORMAT
90 with args ARG1 and ARG2. */
91
92 void
diff_error(format,arg,arg1)93 diff_error (format, arg, arg1)
94 char const *format, *arg, *arg1;
95 {
96 if (callbacks && callbacks->error)
97 (*callbacks->error) (format, arg, arg1);
98 else
99 {
100 fprintf (stderr, "%s: ", diff_program_name);
101 fprintf (stderr, format, arg, arg1);
102 fprintf (stderr, "\n");
103 }
104 }
105
106 /* Print an error message containing the string TEXT, then exit. */
107
108 void
fatal(m)109 fatal (m)
110 char const *m;
111 {
112 print_message_queue ();
113 diff_error ("%s", m, 0);
114 DIFF_ABORT (2);
115 }
116
117 /* Like printf, except if -l in effect then save the message and print later.
118 This is used for things like "binary files differ" and "Only in ...". */
119
120 void
message(format,arg1,arg2)121 message (format, arg1, arg2)
122 char const *format, *arg1, *arg2;
123 {
124 message5 (format, arg1, arg2, 0, 0);
125 }
126
127 void
message5(format,arg1,arg2,arg3,arg4)128 message5 (format, arg1, arg2, arg3, arg4)
129 char const *format, *arg1, *arg2, *arg3, *arg4;
130 {
131 if (paginate_flag)
132 {
133 struct msg *new = (struct msg *) xmalloc (sizeof (struct msg));
134 new->format = format;
135 new->arg1 = concat (arg1, "", "");
136 new->arg2 = concat (arg2, "", "");
137 new->arg3 = arg3 ? concat (arg3, "", "") : 0;
138 new->arg4 = arg4 ? concat (arg4, "", "") : 0;
139 new->next = 0;
140 *msg_chain_end = new;
141 msg_chain_end = &new->next;
142 }
143 else
144 {
145 if (sdiff_help_sdiff)
146 write_output (" ", 1);
147 printf_output (format, arg1, arg2, arg3, arg4);
148 }
149 }
150
151 /* Output all the messages that were saved up by calls to `message'. */
152
153 void
print_message_queue()154 print_message_queue ()
155 {
156 struct msg *m;
157
158 for (m = msg_chain; m; m = m->next)
159 printf_output (m->format, m->arg1, m->arg2, m->arg3, m->arg4);
160 }
161
162 /* Call before outputting the results of comparing files NAME0 and NAME1
163 to set up OUTFILE, the stdio stream for the output to go to.
164
165 Usually, OUTFILE is just stdout. But when -l was specified
166 we fork off a `pr' and make OUTFILE a pipe to it.
167 `pr' then outputs to our stdout. */
168
169 static char const *current_name0;
170 static char const *current_name1;
171 static int current_depth;
172
173 static int output_in_progress = 0;
174
175 void
setup_output(name0,name1,depth)176 setup_output (name0, name1, depth)
177 char const *name0, *name1;
178 int depth;
179 {
180 current_name0 = name0;
181 current_name1 = name1;
182 current_depth = depth;
183 }
184
185 #if HAVE_FORK && defined (PR_PROGRAM)
186 static pid_t pr_pid;
187 #endif
188
189 void
begin_output()190 begin_output ()
191 {
192 char *name;
193
194 if (output_in_progress)
195 return;
196 output_in_progress = 1;
197
198 /* Construct the header of this piece of diff. */
199 name = xmalloc (strlen (current_name0) + strlen (current_name1)
200 + strlen (switch_string) + 7);
201 /* Posix.2 section 4.17.6.1.1 specifies this format. But there is a
202 bug in the first printing (IEEE Std 1003.2-1992 p 251 l 3304):
203 it says that we must print only the last component of the pathnames.
204 This requirement is silly and does not match historical practice. */
205 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
206
207 if (paginate_flag && callbacks && callbacks->write_output)
208 fatal ("can't paginate when using library callbacks");
209
210 if (paginate_flag)
211 {
212 /* Make OUTFILE a pipe to a subsidiary `pr'. */
213
214 #ifdef PR_PROGRAM
215
216 # if HAVE_FORK
217 int pipes[2];
218
219 if (pipe (pipes) != 0)
220 pfatal_with_name ("pipe");
221
222 fflush (stdout);
223
224 pr_pid = vfork ();
225 if (pr_pid < 0)
226 pfatal_with_name ("vfork");
227
228 if (pr_pid == 0)
229 {
230 close (pipes[1]);
231 if (pipes[0] != STDIN_FILENO)
232 {
233 if (dup2 (pipes[0], STDIN_FILENO) < 0)
234 pfatal_with_name ("dup2");
235 close (pipes[0]);
236 }
237
238 execl (PR_PROGRAM, PR_PROGRAM, "-f", "-h", name, NULL);
239 pfatal_with_name (PR_PROGRAM);
240 }
241 else
242 {
243 close (pipes[0]);
244 outfile = fdopen (pipes[1], "w");
245 if (!outfile)
246 pfatal_with_name ("fdopen");
247 }
248 # else /* ! HAVE_FORK */
249 char *command = xmalloc (4 * strlen (name) + strlen (PR_PROGRAM) + 10);
250 char *p;
251 char const *a = name;
252 sprintf (command, "%s -f -h ", PR_PROGRAM);
253 p = command + strlen (command);
254 SYSTEM_QUOTE_ARG (p, a);
255 *p = 0;
256 outfile = popen (command, "w");
257 if (!outfile)
258 pfatal_with_name (command);
259 free (command);
260 # endif /* ! HAVE_FORK */
261 #else
262 fatal ("This port does not support the --paginate option to diff.");
263 #endif
264 }
265 else
266 {
267
268 /* If -l was not specified, output the diff straight to `stdout'. */
269
270 /* If handling multiple files (because scanning a directory),
271 print which files the following output is about. */
272 if (current_depth > 0)
273 printf_output ("%s\n", name);
274 }
275
276 free (name);
277
278 /* A special header is needed at the beginning of context output. */
279 switch (output_style)
280 {
281 case OUTPUT_CONTEXT:
282 print_context_header (files, 0);
283 break;
284
285 case OUTPUT_UNIFIED:
286 print_context_header (files, 1);
287 break;
288
289 default:
290 break;
291 }
292 }
293
294 /* Call after the end of output of diffs for one file.
295 If -l was given, close OUTFILE and get rid of the `pr' subfork. */
296
297 void
finish_output()298 finish_output ()
299 {
300 if (paginate_flag && outfile != 0 && outfile != stdout)
301 {
302 #ifdef PR_PROGRAM
303 int wstatus, w;
304 if (ferror (outfile))
305 fatal ("write error");
306 # if ! HAVE_FORK
307 wstatus = pclose (outfile);
308 # else /* HAVE_FORK */
309 if (fclose (outfile) != 0)
310 pfatal_with_name ("write error");
311 while ((w = waitpid (pr_pid, &wstatus, 0)) < 0 && errno == EINTR)
312 ;
313 if (w < 0)
314 pfatal_with_name ("waitpid");
315 # endif /* HAVE_FORK */
316 if (wstatus != 0)
317 fatal ("subsidiary pr failed");
318 #else
319 fatal ("internal error in finish_output");
320 #endif
321 }
322
323 output_in_progress = 0;
324 }
325
326 /* Write something to the output file. */
327
328 void
write_output(text,len)329 write_output (text, len)
330 char const *text;
331 size_t len;
332 {
333 if (callbacks && callbacks->write_output)
334 (*callbacks->write_output) (text, len);
335 else if (len == 1)
336 putc (*text, outfile);
337 else
338 fwrite (text, sizeof (char), len, outfile);
339 }
340
341 /* Printf something to the output file. */
342
343 #if __STDC__
344 #define VA_START(args, lastarg) va_start(args, lastarg)
345 #else /* ! __STDC__ */
346 #define VA_START(args, lastarg) va_start(args)
347 #endif /* __STDC__ */
348
349 void
350 #if __STDC__
printf_output(const char * format,...)351 printf_output (const char *format, ...)
352 #else
353 printf_output (format, va_alist)
354 char const *format;
355 va_dcl
356 #endif
357 {
358 va_list args;
359
360 VA_START (args, format);
361 if (callbacks && callbacks->write_output)
362 {
363 /* We implement our own limited printf-like functionality (%s, %d,
364 and %c only). Callers who want something fancier can use
365 sprintf. */
366 const char *p = format;
367 char *q;
368 char *str;
369 int num;
370 int ch;
371 char buf[100];
372
373 while ((q = strchr (p, '%')) != NULL)
374 {
375 static const char msg[] =
376 "\ninternal error: bad % in printf_output\n";
377 (*callbacks->write_output) (p, q - p);
378
379 switch (q[1])
380 {
381 case 's':
382 str = va_arg (args, char *);
383 (*callbacks->write_output) (str, strlen (str));
384 break;
385 case 'd':
386 num = va_arg (args, int);
387 sprintf (buf, "%d", num);
388 (*callbacks->write_output) (buf, strlen (buf));
389 break;
390 case 'c':
391 ch = va_arg (args, int);
392 buf[0] = ch;
393 (*callbacks->write_output) (buf, 1);
394 break;
395 default:
396 (*callbacks->write_output) (msg, sizeof (msg) - 1);
397 /* Don't just keep going, because q + 1 might point to the
398 terminating '\0'. */
399 goto out;
400 }
401 p = q + 2;
402 }
403 (*callbacks->write_output) (p, strlen (p));
404 }
405 else
406 vfprintf (outfile, format, args);
407 out:
408 va_end (args);
409 }
410
411 /* Flush the output file. */
412
413 void
flush_output()414 flush_output ()
415 {
416 if (callbacks && callbacks->flush_output)
417 (*callbacks->flush_output) ();
418 else
419 fflush (outfile);
420 }
421
422 /* Compare two lines (typically one from each input file)
423 according to the command line options.
424 For efficiency, this is invoked only when the lines do not match exactly
425 but an option like -i might cause us to ignore the difference.
426 Return nonzero if the lines differ. */
427
428 int
line_cmp(s1,s2)429 line_cmp (s1, s2)
430 char const *s1, *s2;
431 {
432 register unsigned char const *t1 = (unsigned char const *) s1;
433 register unsigned char const *t2 = (unsigned char const *) s2;
434
435 while (1)
436 {
437 register unsigned char c1 = *t1++;
438 register unsigned char c2 = *t2++;
439
440 /* Test for exact char equality first, since it's a common case. */
441 if (c1 != c2)
442 {
443 /* Ignore horizontal white space if -b or -w is specified. */
444
445 if (ignore_all_space_flag)
446 {
447 /* For -w, just skip past any white space. */
448 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
449 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
450 }
451 else if (ignore_space_change_flag)
452 {
453 /* For -b, advance past any sequence of white space in line 1
454 and consider it just one Space, or nothing at all
455 if it is at the end of the line. */
456 if (ISSPACE (c1))
457 {
458 while (c1 != '\n')
459 {
460 c1 = *t1++;
461 if (! ISSPACE (c1))
462 {
463 --t1;
464 c1 = ' ';
465 break;
466 }
467 }
468 }
469
470 /* Likewise for line 2. */
471 if (ISSPACE (c2))
472 {
473 while (c2 != '\n')
474 {
475 c2 = *t2++;
476 if (! ISSPACE (c2))
477 {
478 --t2;
479 c2 = ' ';
480 break;
481 }
482 }
483 }
484
485 if (c1 != c2)
486 {
487 /* If we went too far when doing the simple test
488 for equality, go back to the first non-white-space
489 character in both sides and try again. */
490 if (c2 == ' ' && c1 != '\n'
491 && (unsigned char const *) s1 + 1 < t1
492 && ISSPACE(t1[-2]))
493 {
494 --t1;
495 continue;
496 }
497 if (c1 == ' ' && c2 != '\n'
498 && (unsigned char const *) s2 + 1 < t2
499 && ISSPACE(t2[-2]))
500 {
501 --t2;
502 continue;
503 }
504 }
505 }
506
507 /* Lowercase all letters if -i is specified. */
508
509 if (ignore_case_flag)
510 {
511 if (ISUPPER (c1))
512 c1 = tolower (c1);
513 if (ISUPPER (c2))
514 c2 = tolower (c2);
515 }
516
517 if (c1 != c2)
518 break;
519 }
520 if (c1 == '\n')
521 return 0;
522 }
523
524 return (1);
525 }
526
527 /* Find the consecutive changes at the start of the script START.
528 Return the last link before the first gap. */
529
530 struct change *
find_change(start)531 find_change (start)
532 struct change *start;
533 {
534 return start;
535 }
536
537 struct change *
find_reverse_change(start)538 find_reverse_change (start)
539 struct change *start;
540 {
541 return start;
542 }
543
544 /* Divide SCRIPT into pieces by calling HUNKFUN and
545 print each piece with PRINTFUN.
546 Both functions take one arg, an edit script.
547
548 HUNKFUN is called with the tail of the script
549 and returns the last link that belongs together with the start
550 of the tail.
551
552 PRINTFUN takes a subscript which belongs together (with a null
553 link at the end) and prints it. */
554
555 void
print_script(script,hunkfun,printfun)556 print_script (script, hunkfun, printfun)
557 struct change *script;
558 struct change * (*hunkfun) PARAMS((struct change *));
559 void (*printfun) PARAMS((struct change *));
560 {
561 struct change *next = script;
562
563 while (next)
564 {
565 struct change *this, *end;
566
567 /* Find a set of changes that belong together. */
568 this = next;
569 end = (*hunkfun) (next);
570
571 /* Disconnect them from the rest of the changes,
572 making them a hunk, and remember the rest for next iteration. */
573 next = end->link;
574 end->link = 0;
575 #ifdef DEBUG
576 debug_script (this);
577 #endif
578
579 /* Print this hunk. */
580 (*printfun) (this);
581
582 /* Reconnect the script so it will all be freed properly. */
583 end->link = next;
584 }
585 }
586
587 /* Print the text of a single line LINE,
588 flagging it with the characters in LINE_FLAG (which say whether
589 the line is inserted, deleted, changed, etc.). */
590
591 void
print_1_line(line_flag,line)592 print_1_line (line_flag, line)
593 char const *line_flag;
594 char const * const *line;
595 {
596 char const *text = line[0], *limit = line[1]; /* Help the compiler. */
597 char const *flag_format = 0;
598
599 /* If -T was specified, use a Tab between the line-flag and the text.
600 Otherwise use a Space (as Unix diff does).
601 Print neither space nor tab if line-flags are empty. */
602
603 if (line_flag && *line_flag)
604 {
605 flag_format = tab_align_flag ? "%s\t" : "%s ";
606 printf_output (flag_format, line_flag);
607 }
608
609 output_1_line (text, limit, flag_format, line_flag);
610
611 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
612 printf_output ("\n\\ No newline at end of file\n");
613 }
614
615 /* Output a line from TEXT up to LIMIT. Without -t, output verbatim.
616 With -t, expand white space characters to spaces, and if FLAG_FORMAT
617 is nonzero, output it with argument LINE_FLAG after every
618 internal carriage return, so that tab stops continue to line up. */
619
620 void
output_1_line(text,limit,flag_format,line_flag)621 output_1_line (text, limit, flag_format, line_flag)
622 char const *text, *limit, *flag_format, *line_flag;
623 {
624 if (!tab_expand_flag)
625 write_output (text, limit - text);
626 else
627 {
628 register unsigned char c;
629 register char const *t = text;
630 register unsigned column = 0;
631 /* CC is used to avoid taking the address of the register
632 variable C. */
633 char cc;
634
635 while (t < limit)
636 switch ((c = *t++))
637 {
638 case '\t':
639 {
640 unsigned spaces = TAB_WIDTH - column % TAB_WIDTH;
641 column += spaces;
642 do
643 write_output (" ", 1);
644 while (--spaces);
645 }
646 break;
647
648 case '\r':
649 write_output ("\r", 1);
650 if (flag_format && t < limit && *t != '\n')
651 printf_output (flag_format, line_flag);
652 column = 0;
653 break;
654
655 case '\b':
656 if (column == 0)
657 continue;
658 column--;
659 write_output ("\b", 1);
660 break;
661
662 default:
663 if (ISPRINT (c))
664 column++;
665 cc = c;
666 write_output (&cc, 1);
667 break;
668 }
669 }
670 }
671
672 int
change_letter(inserts,deletes)673 change_letter (inserts, deletes)
674 int inserts, deletes;
675 {
676 if (!inserts)
677 return 'd';
678 else if (!deletes)
679 return 'a';
680 else
681 return 'c';
682 }
683
684 /* Translate an internal line number (an index into diff's table of lines)
685 into an actual line number in the input file.
686 The internal line number is LNUM. FILE points to the data on the file.
687
688 Internal line numbers count from 0 starting after the prefix.
689 Actual line numbers count from 1 within the entire file. */
690
691 int
translate_line_number(file,lnum)692 translate_line_number (file, lnum)
693 struct file_data const *file;
694 int lnum;
695 {
696 return lnum + file->prefix_lines + 1;
697 }
698
699 void
translate_range(file,a,b,aptr,bptr)700 translate_range (file, a, b, aptr, bptr)
701 struct file_data const *file;
702 int a, b;
703 int *aptr, *bptr;
704 {
705 *aptr = translate_line_number (file, a - 1) + 1;
706 *bptr = translate_line_number (file, b + 1) - 1;
707 }
708
709 /* Print a pair of line numbers with SEPCHAR, translated for file FILE.
710 If the two numbers are identical, print just one number.
711
712 Args A and B are internal line numbers.
713 We print the translated (real) line numbers. */
714
715 void
print_number_range(sepchar,file,a,b)716 print_number_range (sepchar, file, a, b)
717 int sepchar;
718 struct file_data *file;
719 int a, b;
720 {
721 int trans_a, trans_b;
722 translate_range (file, a, b, &trans_a, &trans_b);
723
724 /* Note: we can have B < A in the case of a range of no lines.
725 In this case, we should print the line number before the range,
726 which is B. */
727 if (trans_b > trans_a)
728 printf_output ("%d%c%d", trans_a, sepchar, trans_b);
729 else
730 printf_output ("%d", trans_b);
731 }
732
733 /* Look at a hunk of edit script and report the range of lines in each file
734 that it applies to. HUNK is the start of the hunk, which is a chain
735 of `struct change'. The first and last line numbers of file 0 are stored in
736 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
737 Note that these are internal line numbers that count from 0.
738
739 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
740
741 Also set *DELETES nonzero if any lines of file 0 are deleted
742 and set *INSERTS nonzero if any lines of file 1 are inserted.
743 If only ignorable lines are inserted or deleted, both are
744 set to 0. */
745
746 void
analyze_hunk(hunk,first0,last0,first1,last1,deletes,inserts)747 analyze_hunk (hunk, first0, last0, first1, last1, deletes, inserts)
748 struct change *hunk;
749 int *first0, *last0, *first1, *last1;
750 int *deletes, *inserts;
751 {
752 int l0, l1, show_from, show_to;
753 int i;
754 int trivial = ignore_blank_lines_flag || ignore_regexp_list;
755 struct change *next;
756
757 show_from = show_to = 0;
758
759 *first0 = hunk->line0;
760 *first1 = hunk->line1;
761
762 next = hunk;
763 do
764 {
765 l0 = next->line0 + next->deleted - 1;
766 l1 = next->line1 + next->inserted - 1;
767 show_from += next->deleted;
768 show_to += next->inserted;
769
770 for (i = next->line0; i <= l0 && trivial; i++)
771 if (!ignore_blank_lines_flag || files[0].linbuf[i][0] != '\n')
772 {
773 struct regexp_list *r;
774 char const *line = files[0].linbuf[i];
775 int len = files[0].linbuf[i + 1] - line;
776
777 for (r = ignore_regexp_list; r; r = r->next)
778 if (0 <= re_search (&r->buf, line, len, 0, len, 0))
779 break; /* Found a match. Ignore this line. */
780 /* If we got all the way through the regexp list without
781 finding a match, then it's nontrivial. */
782 if (!r)
783 trivial = 0;
784 }
785
786 for (i = next->line1; i <= l1 && trivial; i++)
787 if (!ignore_blank_lines_flag || files[1].linbuf[i][0] != '\n')
788 {
789 struct regexp_list *r;
790 char const *line = files[1].linbuf[i];
791 int len = files[1].linbuf[i + 1] - line;
792
793 for (r = ignore_regexp_list; r; r = r->next)
794 if (0 <= re_search (&r->buf, line, len, 0, len, 0))
795 break; /* Found a match. Ignore this line. */
796 /* If we got all the way through the regexp list without
797 finding a match, then it's nontrivial. */
798 if (!r)
799 trivial = 0;
800 }
801 }
802 while ((next = next->link) != 0);
803
804 *last0 = l0;
805 *last1 = l1;
806
807 /* If all inserted or deleted lines are ignorable,
808 tell the caller to ignore this hunk. */
809
810 if (trivial)
811 show_from = show_to = 0;
812
813 *deletes = show_from;
814 *inserts = show_to;
815 }
816
817 /* Concatenate three strings, returning a newly malloc'd string. */
818
819 char *
concat(s1,s2,s3)820 concat (s1, s2, s3)
821 char const *s1, *s2, *s3;
822 {
823 size_t len = strlen (s1) + strlen (s2) + strlen (s3);
824 char *new = xmalloc (len + 1);
825 sprintf (new, "%s%s%s", s1, s2, s3);
826 return new;
827 }
828
829 /* Yield the newly malloc'd pathname
830 of the file in DIR whose filename is FILE. */
831
832 char *
dir_file_pathname(dir,file)833 dir_file_pathname (dir, file)
834 char const *dir, *file;
835 {
836 char const *p = filename_lastdirchar (dir);
837 return concat (dir, "/" + (p && !p[1]), file);
838 }
839
840 void
debug_script(sp)841 debug_script (sp)
842 struct change *sp;
843 {
844 fflush (stdout);
845 for (; sp; sp = sp->link)
846 fprintf (stderr, "%3d %3d delete %d insert %d\n",
847 sp->line0, sp->line1, sp->deleted, sp->inserted);
848 fflush (stderr);
849 }
850