1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
2
3 /*
4 * Written by Raymond Lai <ray@cyth.net>.
5 * Public domain.
6 */
7
8 #include <sys/param.h>
9 #include <sys/queue.h>
10 #include <sys/stat.h>
11 #include <sys/wait.h>
12
13 #include <ctype.h>
14 #include <err.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <getopt.h>
18 #include <limits.h>
19 #include <paths.h>
20 #include <stdbool.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26
27 #include "extern.h"
28
29 static char diff_path[] = "/usr/bin/diff";
30
31 #define WIDTH 126
32 /*
33 * Each column must be at least one character wide, plus three
34 * characters between the columns (space, [<|>], space).
35 */
36 #define WIDTH_MIN 5
37
38 /* 3 kilobytes of chars */
39 #define MAX_CHECK 768
40
41 /* A single diff line. */
42 struct diffline {
43 STAILQ_ENTRY(diffline) diffentries;
44 char *left;
45 char div;
46 char *right;
47 };
48
49 static void astrcat(char **, const char *);
50 static void enqueue(char *, char, char *);
51 static char *mktmpcpy(const char *);
52 static int istextfile(FILE *);
53 static int bindiff(FILE *, char *, FILE *, char *);
54 static void freediff(struct diffline *);
55 static void int_usage(void);
56 static int parsecmd(FILE *, FILE *, FILE *);
57 static void printa(FILE *, size_t);
58 static void printc(FILE *, size_t, FILE *, size_t);
59 static void printcol(const char *, size_t *, const size_t);
60 static void printd(FILE *, size_t);
61 static void println(const char *, const char, const char *);
62 static void processq(void);
63 static void prompt(const char *, const char *);
64 static void usage(void) __dead2;
65 static char *xfgets(FILE *);
66
67 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
68 static size_t line_width; /* width of a line (two columns and divider) */
69 static size_t width; /* width of each column */
70 static size_t file1ln, file2ln; /* line number of file1 and file2 */
71 static bool Iflag; /* ignore sets matching regexp */
72 static bool lflag; /* print only left column for identical lines */
73 static bool sflag; /* skip identical lines */
74 static bool tflag; /* expand tabs */
75 static int tabsize = 8; /* tab size */
76 FILE *outfp; /* file to save changes to */
77 const char *tmpdir; /* TMPDIR or /tmp */
78
79 enum {
80 HELP_OPT = CHAR_MAX + 1,
81 NORMAL_OPT,
82 FCASE_SENSITIVE_OPT,
83 FCASE_IGNORE_OPT,
84 STRIPCR_OPT,
85 TSIZE_OPT,
86 DIFFPROG_OPT,
87 };
88
89 static struct option longopts[] = {
90 /* options only processed in sdiff */
91 { "suppress-common-lines", no_argument, NULL, 's' },
92 { "width", required_argument, NULL, 'w' },
93
94 { "output", required_argument, NULL, 'o' },
95 { "diff-program", required_argument, NULL, DIFFPROG_OPT },
96
97 /* Options processed by diff. */
98 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT },
99 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT },
100 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT },
101 { "tabsize", required_argument, NULL, TSIZE_OPT },
102 { "help", no_argument, NULL, HELP_OPT },
103 { "text", no_argument, NULL, 'a' },
104 { "ignore-blank-lines", no_argument, NULL, 'B' },
105 { "ignore-space-change", no_argument, NULL, 'b' },
106 { "minimal", no_argument, NULL, 'd' },
107 { "ignore-tab-expansion", no_argument, NULL, 'E' },
108 { "ignore-matching-lines", required_argument, NULL, 'I' },
109 { "ignore-case", no_argument, NULL, 'i' },
110 { "left-column", no_argument, NULL, 'l' },
111 { "expand-tabs", no_argument, NULL, 't' },
112 { "speed-large-files", no_argument, NULL, 'H' },
113 { "ignore-all-space", no_argument, NULL, 'W' },
114
115 { NULL, 0, NULL, '\0'}
116 };
117
118 static const char *help_msg[] = {
119 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
120 "-l, --left-column: only print the left column for identical lines.",
121 "-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
122 "-s, --suppress-common-lines: skip identical lines.",
123 "-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
124 "",
125 "Options passed to diff(1) are:",
126 "\t-a, --text: treat file1 and file2 as text files.",
127 "\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
128 "\t-d, --minimal: minimize diff size.",
129 "\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
130 "\t-i, --ignore-case: do a case-insensitive comparison.",
131 "\t-t, --expand-tabs: expand tabs to spaces.",
132 "\t-W, --ignore-all-spaces: ignore all spaces.",
133 "\t--speed-large-files: assume large file with scattered changes.",
134 "\t--strip-trailing-cr: strip trailing carriage return.",
135 "\t--ignore-file-name-case: ignore case of file names.",
136 "\t--no-ignore-file-name-case: do not ignore file name case",
137 "\t--tabsize NUM: change size of tabs (default 8.)",
138
139 NULL,
140 };
141
142 /*
143 * Create temporary file if source_file is not a regular file.
144 * Returns temporary file name if one was malloced, NULL if unnecessary.
145 */
146 static char *
mktmpcpy(const char * source_file)147 mktmpcpy(const char *source_file)
148 {
149 struct stat sb;
150 ssize_t rcount;
151 int ifd, ofd;
152 u_char buf[BUFSIZ];
153 char *target_file;
154
155 /* Open input and output. */
156 ifd = open(source_file, O_RDONLY, 0);
157 /* File was opened successfully. */
158 if (ifd != -1) {
159 if (fstat(ifd, &sb) == -1)
160 err(2, "error getting file status from %s", source_file);
161
162 /* Regular file. */
163 if (S_ISREG(sb.st_mode)) {
164 close(ifd);
165 return (NULL);
166 }
167 } else {
168 /* If ``-'' does not exist the user meant stdin. */
169 if (errno == ENOENT && strcmp(source_file, "-") == 0)
170 ifd = STDIN_FILENO;
171 else
172 err(2, "error opening %s", source_file);
173 }
174
175 /* Not a regular file, so copy input into temporary file. */
176 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
177 err(2, "asprintf");
178 if ((ofd = mkstemp(target_file)) == -1) {
179 warn("error opening %s", target_file);
180 goto FAIL;
181 }
182 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
183 rcount != 0) {
184 ssize_t wcount;
185
186 wcount = write(ofd, buf, (size_t)rcount);
187 if (-1 == wcount || rcount != wcount) {
188 warn("error writing to %s", target_file);
189 goto FAIL;
190 }
191 }
192 if (rcount == -1) {
193 warn("error reading from %s", source_file);
194 goto FAIL;
195 }
196
197 close(ifd);
198 close(ofd);
199
200 return (target_file);
201
202 FAIL:
203 unlink(target_file);
204 exit(2);
205 }
206
207 int
main(int argc,char ** argv)208 main(int argc, char **argv)
209 {
210 FILE *diffpipe, *file1, *file2;
211 size_t diffargc = 0, flagc = 0, wval = WIDTH;
212 int ch, fd[2], i, ret, status;
213 pid_t pid;
214 const char *errstr, *outfile = NULL;
215 char **diffargv, *diffprog = diff_path, *flagv;
216 char *filename1, *filename2, *tmp1, *tmp2, *s1, *s2;
217 char I_arg[] = "-I";
218 char speed_lf[] = "--speed-large-files";
219
220 /*
221 * Process diff flags.
222 */
223 /*
224 * Allocate memory for diff arguments and NULL.
225 * Each flag has at most one argument, so doubling argc gives an
226 * upper limit of how many diff args can be passed. argv[0],
227 * file1, and file2 won't have arguments so doubling them will
228 * waste some memory; however we need an extra space for the
229 * NULL at the end, so it sort of works out.
230 */
231 if ((diffargv = calloc(argc, sizeof(char *) * 2)) == NULL)
232 err(2, NULL);
233
234 /* Add first argument, the program name. */
235 diffargv[diffargc++] = diffprog;
236
237 /* create a dynamic string for merging single-character options */
238 if ((flagv = malloc(flagc + 2)) == NULL)
239 err(2, NULL);
240 flagv[flagc] = '-';
241 flagv[flagc + 1] = '\0';
242 diffargv[diffargc++] = flagv;
243
244 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
245 longopts, NULL)) != -1) {
246 switch (ch) {
247 /* only compatible --long-name-form with diff */
248 case FCASE_IGNORE_OPT:
249 case FCASE_SENSITIVE_OPT:
250 case STRIPCR_OPT:
251 case 'S':
252 break;
253 /* combine no-arg single switches */
254 case 'a':
255 case 'B':
256 case 'b':
257 case 'd':
258 case 'E':
259 case 'i':
260 case 'W':
261 flagc++;
262 flagv = realloc(flagv, flagc + 2);
263 /*
264 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
265 */
266 flagv[flagc] = ch == 'W' ? 'w' : ch;
267 flagv[flagc + 1] = '\0';
268 break;
269 case 'H':
270 diffargv[diffargc++] = speed_lf;
271 break;
272 case DIFFPROG_OPT:
273 diffargv[0] = diffprog = optarg;
274 break;
275 case 'I':
276 Iflag = true;
277 diffargv[diffargc++] = I_arg;
278 diffargv[diffargc++] = optarg;
279 break;
280 case 'l':
281 lflag = true;
282 break;
283 case 'o':
284 outfile = optarg;
285 break;
286 case 's':
287 sflag = true;
288 break;
289 case 't':
290 tflag = true;
291 break;
292 case 'w':
293 wval = strtonum(optarg, WIDTH_MIN,
294 INT_MAX, &errstr);
295 if (errstr)
296 errx(2, "width is %s: %s", errstr, optarg);
297 break;
298 case HELP_OPT:
299 for (i = 0; help_msg[i] != NULL; i++)
300 printf("%s\n", help_msg[i]);
301 exit(0);
302 break;
303 case TSIZE_OPT:
304 tabsize = strtonum(optarg, 1, INT_MAX, &errstr);
305 if (errstr)
306 errx(2, "tabsize is %s: %s", errstr, optarg);
307 break;
308 default:
309 usage();
310 break;
311 }
312 }
313
314 /* no single-character options were used */
315 if (flagc == 0) {
316 memmove(diffargv + 1, diffargv + 2,
317 sizeof(char *) * (diffargc - 2));
318 diffargc--;
319 free(flagv);
320 }
321
322 argc -= optind;
323 argv += optind;
324
325 if (argc != 2)
326 usage();
327
328 if (outfile && (outfp = fopen(outfile, "w")) == NULL)
329 err(2, "could not open: %s", optarg);
330
331 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
332 tmpdir = _PATH_TMP;
333
334 filename1 = argv[0];
335 filename2 = argv[1];
336
337 /*
338 * Create temporary files for diff and sdiff to share if file1
339 * or file2 are not regular files. This allows sdiff and diff
340 * to read the same inputs if one or both inputs are stdin.
341 *
342 * If any temporary files were created, their names would be
343 * saved in tmp1 or tmp2. tmp1 should never equal tmp2.
344 */
345 tmp1 = tmp2 = NULL;
346 /* file1 and file2 are the same, so copy to same temp file. */
347 if (strcmp(filename1, filename2) == 0) {
348 if ((tmp1 = mktmpcpy(filename1)))
349 filename1 = filename2 = tmp1;
350 /* Copy file1 and file2 into separate temp files. */
351 } else {
352 if ((tmp1 = mktmpcpy(filename1)))
353 filename1 = tmp1;
354 if ((tmp2 = mktmpcpy(filename2)))
355 filename2 = tmp2;
356 }
357
358 if ((file1 = fopen(filename1, "r")) == NULL)
359 err(2, "could not open %s", filename1);
360 if ((file2 = fopen(filename2, "r")) == NULL)
361 err(2, "could not open %s", filename2);
362 if (!istextfile(file1) || !istextfile(file2)) {
363 ret = bindiff(file1, filename1, file2, filename2);
364 goto done;
365 }
366
367 diffargv[diffargc++] = filename1;
368 diffargv[diffargc++] = filename2;
369 /* Add NULL to end of array to indicate end of array. */
370 diffargv[diffargc++] = NULL;
371
372 /* Subtract column divider and divide by two. */
373 width = (wval - 3) / 2;
374 /* Make sure line_width can fit in size_t. */
375 if (width > (SIZE_MAX - 3) / 2)
376 errx(2, "width is too large: %zu", width);
377 line_width = width * 2 + 3;
378
379 if (pipe(fd))
380 err(2, "pipe");
381
382 if ((pid = fork()) < 0)
383 err(1, "fork()");
384 if (pid == 0) {
385 /* child */
386 /* We don't read from the pipe. */
387 close(fd[0]);
388 if (dup2(fd[1], STDOUT_FILENO) != STDOUT_FILENO)
389 _exit(2);
390 /* Free unused descriptor. */
391 close(fd[1]);
392 execvp(diffprog, diffargv);
393 _exit(2);
394 }
395
396 /* parent */
397 /* We don't write to the pipe. */
398 close(fd[1]);
399
400 /* Open pipe to diff command. */
401 if ((diffpipe = fdopen(fd[0], "r")) == NULL)
402 err(2, "could not open diff pipe");
403
404 /* Line numbers start at one. */
405 file1ln = file2ln = 1;
406
407 /* Read and parse diff output. */
408 while (parsecmd(diffpipe, file1, file2) != EOF)
409 ;
410 fclose(diffpipe);
411
412 /* Wait for diff to exit. */
413 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
414 WEXITSTATUS(status) >= 2)
415 errx(2, "diff exited abnormally");
416 ret = WEXITSTATUS(status);
417
418 /* No more diffs, so enqueue common lines. */
419 if (lflag)
420 while ((s1 = xfgets(file1)))
421 enqueue(s1, ' ', NULL);
422 else
423 for (;;) {
424 s1 = xfgets(file1);
425 s2 = xfgets(file2);
426 if (s1 || s2)
427 enqueue(s1, ' ', s2);
428 else
429 break;
430 }
431 fclose(file1);
432 fclose(file2);
433 /* Process unmodified lines. */
434 processq();
435
436 done:
437 /* Delete and free unneeded temporary files. */
438 if (tmp1 != NULL) {
439 if (unlink(tmp1) != 0)
440 warn("failed to delete %s", tmp1);
441 free(tmp1);
442 }
443 if (tmp2 != NULL) {
444 if (unlink(tmp2) != 0)
445 warn("failed to delete %s", tmp2);
446 free(tmp2);
447 }
448
449 /* Return diff exit status. */
450 free(diffargv);
451 if (flagc > 0)
452 free(flagv);
453 return (ret);
454 }
455
456 /*
457 * When sdiff detects a binary file as input.
458 */
459 static int
bindiff(FILE * f1,char * fn1,FILE * f2,char * fn2)460 bindiff(FILE *f1, char *fn1, FILE *f2, char *fn2)
461 {
462 int ch1, ch2;
463
464 flockfile(f1);
465 flockfile(f2);
466 do {
467 ch1 = getc_unlocked(f1);
468 ch2 = getc_unlocked(f2);
469 } while (ch1 != EOF && ch2 != EOF && ch1 == ch2);
470 funlockfile(f2);
471 funlockfile(f1);
472 if (ferror(f1)) {
473 warn("%s", fn1);
474 return (2);
475 }
476 if (ferror(f2)) {
477 warn("%s", fn2);
478 return (2);
479 }
480 if (ch1 != EOF || ch2 != EOF) {
481 printf("Binary files %s and %s differ\n", fn1, fn2);
482 return (1);
483 }
484 return (0);
485 }
486
487 /*
488 * Checks whether a file appears to be a text file.
489 */
490 static int
istextfile(FILE * f)491 istextfile(FILE *f)
492 {
493 int ch, i;
494
495 if (f == NULL)
496 return (1);
497 rewind(f);
498 for (i = 0; i <= MAX_CHECK; i++) {
499 ch = fgetc(f);
500 if (ch == '\0') {
501 rewind(f);
502 return (0);
503 }
504 if (ch == EOF)
505 break;
506 }
507 rewind(f);
508 return (1);
509 }
510
511 /*
512 * Prints an individual column (left or right), taking into account
513 * that tabs are variable-width. Takes a string, the current column
514 * the cursor is on the screen, and the maximum value of the column.
515 * The column value is updated as we go along.
516 */
517 static void
printcol(const char * s,size_t * col,const size_t col_max)518 printcol(const char *s, size_t *col, const size_t col_max)
519 {
520
521 for (; *s && *col < col_max; ++s) {
522 size_t new_col;
523
524 switch (*s) {
525 case '\t':
526 /*
527 * If rounding to next multiple of eight causes
528 * an integer overflow, just return.
529 */
530 if (*col > SIZE_MAX - tabsize)
531 return;
532
533 /* Round to next multiple of eight. */
534 new_col = (*col / tabsize + 1) * tabsize;
535
536 /*
537 * If printing the tab goes past the column
538 * width, don't print it and just quit.
539 */
540 if (new_col > col_max)
541 return;
542
543 if (tflag) {
544 do {
545 putchar(' ');
546 } while (++*col < new_col);
547 } else {
548 putchar(*s);
549 *col = new_col;
550 }
551 break;
552 default:
553 ++*col;
554 putchar(*s);
555 }
556 }
557 }
558
559 /*
560 * Prompts user to either choose between two strings or edit one, both,
561 * or neither.
562 */
563 static void
prompt(const char * s1,const char * s2)564 prompt(const char *s1, const char *s2)
565 {
566 char *cmd;
567
568 /* Print command prompt. */
569 putchar('%');
570
571 /* Get user input. */
572 for (; (cmd = xfgets(stdin)); free(cmd)) {
573 const char *p;
574
575 /* Skip leading whitespace. */
576 for (p = cmd; isspace((unsigned char)*p); ++p)
577 ;
578 switch (*p) {
579 case 'e':
580 /* Skip `e'. */
581 ++p;
582 if (eparse(p, s1, s2) == -1)
583 goto USAGE;
584 break;
585 case 'l':
586 case '1':
587 /* Choose left column as-is. */
588 if (s1 != NULL)
589 fprintf(outfp, "%s\n", s1);
590 /* End of command parsing. */
591 break;
592 case 'q':
593 goto QUIT;
594 case 'r':
595 case '2':
596 /* Choose right column as-is. */
597 if (s2 != NULL)
598 fprintf(outfp, "%s\n", s2);
599 /* End of command parsing. */
600 break;
601 case 's':
602 sflag = true;
603 goto PROMPT;
604 case 'v':
605 sflag = false;
606 /* FALLTHROUGH */
607 default:
608 /* Interactive usage help. */
609 USAGE:
610 int_usage();
611 PROMPT:
612 putchar('%');
613
614 /* Prompt user again. */
615 continue;
616 }
617 free(cmd);
618 return;
619 }
620
621 /*
622 * If there was no error, we received an EOF from stdin, so we
623 * should quit.
624 */
625 QUIT:
626 fclose(outfp);
627 exit(0);
628 }
629
630 /*
631 * Takes two strings, separated by a column divider. NULL strings are
632 * treated as empty columns. If the divider is the ` ' character, the
633 * second column is not printed (-l flag). In this case, the second
634 * string must be NULL. When the second column is NULL, the divider
635 * does not print the trailing space following the divider character.
636 *
637 * Takes into account that tabs can take multiple columns.
638 */
639 static void
println(const char * s1,const char divider,const char * s2)640 println(const char *s1, const char divider, const char *s2)
641 {
642 size_t col;
643
644 /* Print first column. Skips if s1 == NULL. */
645 col = 0;
646 if (s1) {
647 /* Skip angle bracket and space. */
648 printcol(s1, &col, width);
649
650 }
651
652 /* Otherwise, we pad this column up to width. */
653 for (; col < width; ++col)
654 putchar(' ');
655
656 /* Only print left column. */
657 if (divider == ' ' && !s2) {
658 printf(" (\n");
659 return;
660 }
661
662 /*
663 * Print column divider. If there is no second column, we don't
664 * need to add the space for padding.
665 */
666 if (!s2) {
667 printf(" %c\n", divider);
668 return;
669 }
670 printf(" %c ", divider);
671 col += 3;
672
673 /* Skip angle bracket and space. */
674 printcol(s2, &col, line_width);
675
676 putchar('\n');
677 }
678
679 /*
680 * Reads a line from file and returns as a string. If EOF is reached,
681 * NULL is returned. The returned string must be freed afterwards.
682 */
683 static char *
xfgets(FILE * file)684 xfgets(FILE *file)
685 {
686 size_t linecap;
687 ssize_t l;
688 char *s;
689
690 clearerr(file);
691 linecap = 0;
692 s = NULL;
693
694 if ((l = getline(&s, &linecap, file)) == -1) {
695 if (ferror(file))
696 err(2, "error reading file");
697 return (NULL);
698 }
699
700 if (s[l-1] == '\n')
701 s[l-1] = '\0';
702
703 return (s);
704 }
705
706 /*
707 * Parse ed commands from diffpipe and print lines from file1 (lines
708 * to change or delete) or file2 (lines to add or change).
709 * Returns EOF or 0.
710 */
711 static int
parsecmd(FILE * diffpipe,FILE * file1,FILE * file2)712 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
713 {
714 size_t file1start, file1end, file2start, file2end, n;
715 /* ed command line and pointer to characters in line */
716 char *line, *p, *q;
717 const char *errstr;
718 char c, cmd;
719
720 /* Read ed command. */
721 if (!(line = xfgets(diffpipe)))
722 return (EOF);
723
724 p = line;
725 /* Go to character after line number. */
726 while (isdigit((unsigned char)*p))
727 ++p;
728 c = *p;
729 *p++ = 0;
730 file1start = strtonum(line, 0, INT_MAX, &errstr);
731 if (errstr)
732 errx(2, "file1 start is %s: %s", errstr, line);
733
734 /* A range is specified for file1. */
735 if (c == ',') {
736 q = p;
737 /* Go to character after file2end. */
738 while (isdigit((unsigned char)*p))
739 ++p;
740 c = *p;
741 *p++ = 0;
742 file1end = strtonum(q, 0, INT_MAX, &errstr);
743 if (errstr)
744 errx(2, "file1 end is %s: %s", errstr, line);
745 if (file1start > file1end)
746 errx(2, "invalid line range in file1: %s", line);
747 } else
748 file1end = file1start;
749
750 cmd = c;
751 /* Check that cmd is valid. */
752 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
753 errx(2, "ed command not recognized: %c: %s", cmd, line);
754
755 q = p;
756 /* Go to character after line number. */
757 while (isdigit((unsigned char)*p))
758 ++p;
759 c = *p;
760 *p++ = 0;
761 file2start = strtonum(q, 0, INT_MAX, &errstr);
762 if (errstr)
763 errx(2, "file2 start is %s: %s", errstr, line);
764
765 /*
766 * There should either be a comma signifying a second line
767 * number or the line should just end here.
768 */
769 if (c != ',' && c != '\0')
770 errx(2, "invalid line range in file2: %c: %s", c, line);
771
772 if (c == ',') {
773
774 file2end = strtonum(p, 0, INT_MAX, &errstr);
775 if (errstr)
776 errx(2, "file2 end is %s: %s", errstr, line);
777 if (file2start >= file2end)
778 errx(2, "invalid line range in file2: %s", line);
779 } else
780 file2end = file2start;
781
782 /* Appends happen _after_ stated line. */
783 if (cmd == 'a') {
784 if (file1start != file1end)
785 errx(2, "append cannot have a file1 range: %s",
786 line);
787 if (file1start == SIZE_MAX)
788 errx(2, "file1 line range too high: %s", line);
789 file1start = ++file1end;
790 }
791 /*
792 * I'm not sure what the deal is with the line numbers for
793 * deletes, though.
794 */
795 else if (cmd == 'd') {
796 if (file2start != file2end)
797 errx(2, "delete cannot have a file2 range: %s",
798 line);
799 if (file2start == SIZE_MAX)
800 errx(2, "file2 line range too high: %s", line);
801 file2start = ++file2end;
802 }
803
804 /*
805 * Continue reading file1 and file2 until we reach line numbers
806 * specified by diff. Should only happen with -I flag.
807 */
808 for (; file1ln < file1start && file2ln < file2start;
809 ++file1ln, ++file2ln) {
810 char *s1, *s2;
811
812 if (!(s1 = xfgets(file1)))
813 errx(2, "file1 shorter than expected");
814 if (!(s2 = xfgets(file2)))
815 errx(2, "file2 shorter than expected");
816
817 /* If the -l flag was specified, print only left column. */
818 if (lflag) {
819 free(s2);
820 /*
821 * XXX - If -l and -I are both specified, all
822 * unchanged or ignored lines are shown with a
823 * `(' divider. This matches GNU sdiff, but I
824 * believe it is a bug. Just check out:
825 * gsdiff -l -I '^$' samefile samefile.
826 */
827 if (Iflag)
828 enqueue(s1, '(', NULL);
829 else
830 enqueue(s1, ' ', NULL);
831 } else
832 enqueue(s1, ' ', s2);
833 }
834 /* Ignore deleted lines. */
835 for (; file1ln < file1start; ++file1ln) {
836 char *s;
837
838 if (!(s = xfgets(file1)))
839 errx(2, "file1 shorter than expected");
840
841 enqueue(s, '(', NULL);
842 }
843 /* Ignore added lines. */
844 for (; file2ln < file2start; ++file2ln) {
845 char *s;
846
847 if (!(s = xfgets(file2)))
848 errx(2, "file2 shorter than expected");
849
850 /* If -l flag was given, don't print right column. */
851 if (lflag)
852 free(s);
853 else
854 enqueue(NULL, ')', s);
855 }
856
857 /* Process unmodified or skipped lines. */
858 processq();
859
860 switch (cmd) {
861 case 'a':
862 printa(file2, file2end);
863 n = file2end - file2start + 1;
864 break;
865 case 'c':
866 printc(file1, file1end, file2, file2end);
867 n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
868 break;
869 case 'd':
870 printd(file1, file1end);
871 n = file1end - file1start + 1;
872 break;
873 default:
874 errx(2, "invalid diff command: %c: %s", cmd, line);
875 }
876 free(line);
877
878 /* Skip to next ed line. */
879 while (n--) {
880 if (!(line = xfgets(diffpipe)))
881 errx(2, "diff ended early");
882 free(line);
883 }
884
885 return (0);
886 }
887
888 /*
889 * Queues up a diff line.
890 */
891 static void
enqueue(char * left,char divider,char * right)892 enqueue(char *left, char divider, char *right)
893 {
894 struct diffline *diffp;
895
896 if (!(diffp = malloc(sizeof(struct diffline))))
897 err(2, "enqueue");
898 diffp->left = left;
899 diffp->div = divider;
900 diffp->right = right;
901 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
902 }
903
904 /*
905 * Free a diffline structure and its elements.
906 */
907 static void
freediff(struct diffline * diffp)908 freediff(struct diffline *diffp)
909 {
910
911 free(diffp->left);
912 free(diffp->right);
913 free(diffp);
914 }
915
916 /*
917 * Append second string into first. Repeated appends to the same string
918 * are cached, making this an O(n) function, where n = strlen(append).
919 */
920 static void
astrcat(char ** s,const char * append)921 astrcat(char **s, const char *append)
922 {
923 /* Length of string in previous run. */
924 static size_t offset = 0;
925 size_t newsiz;
926 /*
927 * String from previous run. Compared to *s to see if we are
928 * dealing with the same string. If so, we can use offset.
929 */
930 static const char *oldstr = NULL;
931 char *newstr;
932
933 /*
934 * First string is NULL, so just copy append.
935 */
936 if (!*s) {
937 if (!(*s = strdup(append)))
938 err(2, "astrcat");
939
940 /* Keep track of string. */
941 offset = strlen(*s);
942 oldstr = *s;
943
944 return;
945 }
946
947 /*
948 * *s is a string so concatenate.
949 */
950
951 /* Did we process the same string in the last run? */
952 /*
953 * If this is a different string from the one we just processed
954 * cache new string.
955 */
956 if (oldstr != *s) {
957 offset = strlen(*s);
958 oldstr = *s;
959 }
960
961 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */
962 newsiz = offset + 1 + strlen(append) + 1;
963
964 /* Resize *s to fit new string. */
965 newstr = realloc(*s, newsiz);
966 if (newstr == NULL)
967 err(2, "astrcat");
968 *s = newstr;
969
970 /* *s + offset should be end of string. */
971 /* Concatenate. */
972 strlcpy(*s + offset, "\n", newsiz - offset);
973 strlcat(*s + offset, append, newsiz - offset);
974
975 /* New string length should be exactly newsiz - 1 characters. */
976 /* Store generated string's values. */
977 offset = newsiz - 1;
978 oldstr = *s;
979 }
980
981 /*
982 * Process diff set queue, printing, prompting, and saving each diff
983 * line stored in queue.
984 */
985 static void
processq(void)986 processq(void)
987 {
988 struct diffline *diffp;
989 char divc, *left, *right;
990
991 /* Don't process empty queue. */
992 if (STAILQ_EMPTY(&diffhead))
993 return;
994
995 /* Remember the divider. */
996 divc = STAILQ_FIRST(&diffhead)->div;
997
998 left = NULL;
999 right = NULL;
1000 /*
1001 * Go through set of diffs, concatenating each line in left or
1002 * right column into two long strings, `left' and `right'.
1003 */
1004 STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1005 /*
1006 * Print changed lines if -s was given,
1007 * print all lines if -s was not given.
1008 */
1009 if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1010 diffp->div == '>')
1011 println(diffp->left, diffp->div, diffp->right);
1012
1013 /* Append new lines to diff set. */
1014 if (diffp->left)
1015 astrcat(&left, diffp->left);
1016 if (diffp->right)
1017 astrcat(&right, diffp->right);
1018 }
1019
1020 /* Empty queue and free each diff line and its elements. */
1021 while (!STAILQ_EMPTY(&diffhead)) {
1022 diffp = STAILQ_FIRST(&diffhead);
1023 STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1024 freediff(diffp);
1025 }
1026
1027 /* Write to outfp, prompting user if lines are different. */
1028 if (outfp)
1029 switch (divc) {
1030 case ' ': case '(': case ')':
1031 fprintf(outfp, "%s\n", left);
1032 break;
1033 case '|': case '<': case '>':
1034 prompt(left, right);
1035 break;
1036 default:
1037 errx(2, "invalid divider: %c", divc);
1038 }
1039
1040 /* Free left and right. */
1041 free(left);
1042 free(right);
1043 }
1044
1045 /*
1046 * Print lines following an (a)ppend command.
1047 */
1048 static void
printa(FILE * file,size_t line2)1049 printa(FILE *file, size_t line2)
1050 {
1051 char *line;
1052
1053 for (; file2ln <= line2; ++file2ln) {
1054 if (!(line = xfgets(file)))
1055 errx(2, "append ended early");
1056 enqueue(NULL, '>', line);
1057 }
1058 processq();
1059 }
1060
1061 /*
1062 * Print lines following a (c)hange command, from file1ln to file1end
1063 * and from file2ln to file2end.
1064 */
1065 static void
printc(FILE * file1,size_t file1end,FILE * file2,size_t file2end)1066 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1067 {
1068 struct fileline {
1069 STAILQ_ENTRY(fileline) fileentries;
1070 char *line;
1071 };
1072 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1073
1074 /* Read lines to be deleted. */
1075 for (; file1ln <= file1end; ++file1ln) {
1076 struct fileline *linep;
1077 char *line1;
1078
1079 /* Read lines from both. */
1080 if (!(line1 = xfgets(file1)))
1081 errx(2, "error reading file1 in delete in change");
1082
1083 /* Add to delete queue. */
1084 if (!(linep = malloc(sizeof(struct fileline))))
1085 err(2, "printc");
1086 linep->line = line1;
1087 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1088 }
1089
1090 /* Process changed lines.. */
1091 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1092 ++file2ln) {
1093 struct fileline *del;
1094 char *add;
1095
1096 /* Get add line. */
1097 if (!(add = xfgets(file2)))
1098 errx(2, "error reading add in change");
1099
1100 del = STAILQ_FIRST(&delqhead);
1101 enqueue(del->line, '|', add);
1102 STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1103 /*
1104 * Free fileline structure but not its elements since
1105 * they are queued up.
1106 */
1107 free(del);
1108 }
1109 processq();
1110
1111 /* Process remaining lines to add. */
1112 for (; file2ln <= file2end; ++file2ln) {
1113 char *add;
1114
1115 /* Get add line. */
1116 if (!(add = xfgets(file2)))
1117 errx(2, "error reading add in change");
1118
1119 enqueue(NULL, '>', add);
1120 }
1121 processq();
1122
1123 /* Process remaining lines to delete. */
1124 while (!STAILQ_EMPTY(&delqhead)) {
1125 struct fileline *filep;
1126
1127 filep = STAILQ_FIRST(&delqhead);
1128 enqueue(filep->line, '<', NULL);
1129 STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1130 free(filep);
1131 }
1132 processq();
1133 }
1134
1135 /*
1136 * Print deleted lines from file, from file1ln to file1end.
1137 */
1138 static void
printd(FILE * file1,size_t file1end)1139 printd(FILE *file1, size_t file1end)
1140 {
1141 char *line1;
1142
1143 /* Print out lines file1ln to line2. */
1144 for (; file1ln <= file1end; ++file1ln) {
1145 if (!(line1 = xfgets(file1)))
1146 errx(2, "file1 ended early in delete");
1147 enqueue(line1, '<', NULL);
1148 }
1149 processq();
1150 }
1151
1152 /*
1153 * Interactive mode usage.
1154 */
1155 static void
int_usage(void)1156 int_usage(void)
1157 {
1158
1159 puts("e:\tedit blank diff\n"
1160 "eb:\tedit both diffs concatenated\n"
1161 "el:\tedit left diff\n"
1162 "er:\tedit right diff\n"
1163 "l | 1:\tchoose left diff\n"
1164 "r | 2:\tchoose right diff\n"
1165 "s:\tsilent mode--don't print identical lines\n"
1166 "v:\tverbose mode--print identical lines\n"
1167 "q:\tquit");
1168 }
1169
1170 static void
usage(void)1171 usage(void)
1172 {
1173
1174 fprintf(stderr,
1175 "usage: sdiff [-abdilstHW] [-I regexp] [-o outfile] [-w width] file1"
1176 " file2\n");
1177 exit(2);
1178 }
1179