xref: /netbsd/usr.bin/sdiff/sdiff.c (revision 6c4b13ed)
1 /*	$NetBSD: sdiff.c,v 1.3 2021/08/27 17:38:57 rillig Exp $	*/
2 /*	$OpenBSD: sdiff.c,v 1.20 2006/09/19 05:52:23 otto Exp $ */
3 
4 /*
5  * Written by Raymond Lai <ray@cyth.net>.
6  * Public domain.
7  */
8 
9 #include <sys/param.h>
10 #include <sys/queue.h>
11 #include <sys/stat.h>
12 #include <sys/types.h>
13 #include <sys/wait.h>
14 
15 #include <ctype.h>
16 #include <err.h>
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <getopt.h>
20 #include <limits.h>
21 #include <paths.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 #include <util.h>
27 
28 #include "common.h"
29 #include "extern.h"
30 
31 #define WIDTH 130
32 /*
33  * Each column must be at least one character wide, plus three
34  * characters between the columns (space, [<|>], space).
35  */
36 #define WIDTH_MIN 5
37 
38 /* A single diff line. */
39 struct diffline {
40 	SIMPLEQ_ENTRY(diffline) diffentries;
41 	char	*left;
42 	char	 div;
43 	char	*right;
44 };
45 
46 static void astrcat(char **, const char *);
47 static void enqueue(char *, char, char *);
48 static char *mktmpcpy(const char *);
49 static void freediff(struct diffline *);
50 static void int_usage(void);
51 static int parsecmd(FILE *, FILE *, FILE *);
52 static void printa(FILE *, size_t);
53 static void printc(FILE *, size_t, FILE *, size_t);
54 static void printcol(const char *, size_t *, const size_t);
55 static void printd(FILE *, size_t);
56 static void println(const char *, const char, const char *);
57 static void processq(void);
58 static void prompt(const char *, const char *);
59 __dead static void usage(void);
60 static char *xfgets(FILE *);
61 
62 SIMPLEQ_HEAD(, diffline) diffhead = SIMPLEQ_HEAD_INITIALIZER(diffhead);
63 size_t	 line_width;	/* width of a line (two columns and divider) */
64 size_t	 width;		/* width of each column */
65 size_t	 file1ln, file2ln;	/* line number of file1 and file2 */
66 int	 Iflag = 0;	/* ignore sets matching regexp */
67 int	 lflag;		/* print only left column for identical lines */
68 int	 sflag;		/* skip identical lines */
69 FILE	*outfile;	/* file to save changes to */
70 const char *tmpdir;	/* TMPDIR or /tmp */
71 
72 static struct option longopts[] = {
73 	{ "text",			no_argument,		NULL,	'a' },
74 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
75 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
76 	{ "minimal",			no_argument,		NULL,	'd' },
77 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
78 	{ "diff-program",		required_argument,	NULL,	'F' },
79 	{ "speed-large-files",		no_argument,		NULL,	'H' },
80 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
81 	{ "left-column",		no_argument,		NULL,	'l' },
82 	{ "output",			required_argument,	NULL,	'o' },
83 	{ "strip-trailing-cr",		no_argument,		NULL,	'S' },
84 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
85 	{ "expand-tabs",		no_argument,		NULL,	't' },
86 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
87 	{ "width",			required_argument,	NULL,	'w' },
88 	{ NULL,				0,			NULL,	 0  }
89 };
90 
91 /*
92  * Create temporary file if source_file is not a regular file.
93  * Returns temporary file name if one was malloced, NULL if unnecessary.
94  */
95 static char *
mktmpcpy(const char * source_file)96 mktmpcpy(const char *source_file)
97 {
98 	struct stat sb;
99 	ssize_t rcount;
100 	int ifd, ofd;
101 	u_char buf[BUFSIZ];
102 	char *target_file;
103 
104 	/* Open input and output. */
105 	ifd = open(source_file, O_RDONLY, 0);
106 	/* File was opened successfully. */
107 	if (ifd != -1) {
108 		if (fstat(ifd, &sb) == -1)
109 			err(2, "error getting file status from %s", source_file);
110 
111 		/* Regular file. */
112 		if (S_ISREG(sb.st_mode))
113 			return (NULL);
114 	} else {
115 		/* If ``-'' does not exist the user meant stdin. */
116 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
117 			ifd = STDIN_FILENO;
118 		else
119 			err(2, "error opening %s", source_file);
120 	}
121 
122 	/* Not a regular file, so copy input into temporary file. */
123 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
124 		err(2, "asprintf");
125 	if ((ofd = mkstemp(target_file)) == -1) {
126 		warn("error opening %s", target_file);
127 		goto FAIL;
128 	}
129 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
130 	    rcount != 0) {
131 		ssize_t wcount;
132 
133 		wcount = write(ofd, buf, (size_t)rcount);
134 		if (-1 == wcount || rcount != wcount) {
135 			warn("error writing to %s", target_file);
136 			goto FAIL;
137 		}
138 	}
139 	if (rcount == -1) {
140 		warn("error reading from %s", source_file);
141 		goto FAIL;
142 	}
143 
144 	close(ifd);
145 	close(ofd);
146 
147 	return (target_file);
148 
149 FAIL:
150 	unlink(target_file);
151 	exit(2);
152 }
153 
154 int
main(int argc,char ** argv)155 main(int argc, char **argv)
156 {
157 	FILE *diffpipe, *file1, *file2;
158 	size_t diffargc = 0, wflag = WIDTH;
159 	int ch, fd[2], status;
160 	pid_t pid;
161 	char **diffargv, *diffprog = "diff", *filename1, *filename2,
162 	    *tmp1, *tmp2, *s1, *s2;
163 
164 	/*
165 	 * Process diff flags.
166 	 */
167 	/*
168 	 * Allocate memory for diff arguments and NULL.
169 	 * Each flag has at most one argument, so doubling argc gives an
170 	 * upper limit of how many diff args can be passed.  argv[0],
171 	 * file1, and file2 won't have arguments so doubling them will
172 	 * waste some memory; however we need an extra space for the
173 	 * NULL at the end, so it sort of works out.
174 	 */
175 	if (!(diffargv = malloc(sizeof(char **) * argc * 2)))
176 		err(2, "main");
177 
178 	/* Add first argument, the program name. */
179 	diffargv[diffargc++] = diffprog;
180 
181 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
182 	    longopts, NULL)) != -1) {
183 		const char *errstr;
184 
185 		switch (ch) {
186 		case 'a':
187 			diffargv[diffargc++] = "-a";
188 			break;
189 		case 'B':
190 			diffargv[diffargc++] = "-B";
191 			break;
192 		case 'b':
193 			diffargv[diffargc++] = "-b";
194 			break;
195 		case 'd':
196 			diffargv[diffargc++] = "-d";
197 			break;
198 		case 'E':
199 			diffargv[diffargc++] = "-E";
200 			break;
201 		case 'F':
202 			diffargv[0] = diffprog = optarg;
203 			break;
204 		case 'H':
205 			diffargv[diffargc++] = "-H";
206 			break;
207 		case 'I':
208 			Iflag = 1;
209 			diffargv[diffargc++] = "-I";
210 			diffargv[diffargc++] = optarg;
211 			break;
212 		case 'i':
213 			diffargv[diffargc++] = "-i";
214 			break;
215 		case 'l':
216 			lflag = 1;
217 			break;
218 		case 'o':
219 			if ((outfile = fopen(optarg, "w")) == NULL)
220 				err(2, "could not open: %s", optarg);
221 			break;
222 		case 'S':
223 			diffargv[diffargc++] = "--strip-trailing-cr";
224 			break;
225 		case 's':
226 			sflag = 1;
227 			break;
228 		case 't':
229 			diffargv[diffargc++] = "-t";
230 			break;
231 		case 'W':
232 			diffargv[diffargc++] = "-w";
233 			break;
234 		case 'w':
235 			wflag = strtonum(optarg, WIDTH_MIN,
236 			    INT_MAX, &errstr);
237 			if (errstr)
238 				errx(2, "width is %s: %s", errstr, optarg);
239 			break;
240 		default:
241 			usage();
242 		}
243 
244 	}
245 	argc -= optind;
246 	argv += optind;
247 
248 	if (argc != 2)
249 		usage();
250 
251 	if ((tmpdir = getenv("TMPDIR")) == NULL)
252 		tmpdir = _PATH_TMP;
253 
254 	filename1 = argv[0];
255 	filename2 = argv[1];
256 
257 	/*
258 	 * Create temporary files for diff and sdiff to share if file1
259 	 * or file2 are not regular files.  This allows sdiff and diff
260 	 * to read the same inputs if one or both inputs are stdin.
261 	 *
262 	 * If any temporary files were created, their names would be
263 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
264 	 */
265 	tmp1 = tmp2 = NULL;
266 	/* file1 and file2 are the same, so copy to same temp file. */
267 	if (strcmp(filename1, filename2) == 0) {
268 		if ((tmp1 = mktmpcpy(filename1)))
269 			filename1 = filename2 = tmp1;
270 	/* Copy file1 and file2 into separate temp files. */
271 	} else {
272 		if ((tmp1 = mktmpcpy(filename1)))
273 			filename1 = tmp1;
274 		if ((tmp2 = mktmpcpy(filename2)))
275 			filename2 = tmp2;
276 	}
277 
278 	diffargv[diffargc++] = filename1;
279 	diffargv[diffargc++] = filename2;
280 	/* Add NULL to end of array to indicate end of array. */
281 	diffargv[diffargc++] = NULL;
282 
283 	/* Subtract column divider and divide by two. */
284 	width = (wflag - 3) / 2;
285 	/* Make sure line_width can fit in size_t. */
286 	if (width > (SIZE_T_MAX - 3) / 2)
287 		errx(2, "width is too large: %zu", width);
288 	line_width = width * 2 + 3;
289 
290 	if (pipe(fd))
291 		err(2, "pipe");
292 
293 	switch(pid = fork()) {
294 	case 0:
295 		/* child */
296 		/* We don't read from the pipe. */
297 		close(fd[0]);
298 		if (dup2(fd[1], STDOUT_FILENO) == -1)
299 			err(2, "child could not duplicate descriptor");
300 		/* Free unused descriptor. */
301 		close(fd[1]);
302 
303 		execvp(diffprog, diffargv);
304 		err(2, "could not execute diff: %s", diffprog);
305 	case -1:
306 		err(2, "could not fork");
307 	}
308 
309 	/* parent */
310 	/* We don't write to the pipe. */
311 	close(fd[1]);
312 
313 	/* Open pipe to diff command. */
314 	if ((diffpipe = fdopen(fd[0], "r")) == NULL)
315 		err(2, "could not open diff pipe");
316 	if ((file1 = fopen(filename1, "r")) == NULL)
317 		err(2, "could not open %s", filename1);
318 	if ((file2 = fopen(filename2, "r")) == NULL)
319 		err(2, "could not open %s", filename2);
320 
321 	/* Line numbers start at one. */
322 	file1ln = file2ln = 1;
323 
324 	/* Read and parse diff output. */
325 	while (parsecmd(diffpipe, file1, file2) != EOF)
326 		;
327 	fclose(diffpipe);
328 
329 	/* Wait for diff to exit. */
330 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
331 	    WEXITSTATUS(status) >= 2)
332 		err(2, "diff exited abnormally");
333 
334 	/* Delete and free unneeded temporary files. */
335 	if (tmp1)
336 		if (unlink(tmp1))
337 			warn("error deleting %s", tmp1);
338 	if (tmp2)
339 		if (unlink(tmp2))
340 			warn("error deleting %s", tmp2);
341 	free(tmp1);
342 	free(tmp2);
343 	filename1 = filename2 = tmp1 = tmp2 = NULL;
344 
345 	/* No more diffs, so print common lines. */
346 	if (lflag)
347 		while ((s1 = xfgets(file1)))
348 			enqueue(s1, ' ', NULL);
349 	else
350 		for (;;) {
351 			s1 = xfgets(file1);
352 			s2 = xfgets(file2);
353 			if (s1 || s2)
354 				enqueue(s1, ' ', s2);
355 			else
356 				break;
357 		}
358 	fclose(file1);
359 	fclose(file2);
360 	/* Process unmodified lines. */
361 	processq();
362 
363 	/* Return diff exit status. */
364 	return (WEXITSTATUS(status));
365 }
366 
367 /*
368  * Prints an individual column (left or right), taking into account
369  * that tabs are variable-width.  Takes a string, the current column
370  * the cursor is on the screen, and the maximum value of the column.
371  * The column value is updated as we go along.
372  */
373 static void
printcol(const char * s,size_t * col,const size_t col_max)374 printcol(const char *s, size_t *col, const size_t col_max)
375 {
376 
377 	for (; *s && *col < col_max; ++s) {
378 		size_t new_col;
379 
380 		switch (*s) {
381 		case '\t':
382 			/*
383 			 * If rounding to next multiple of eight causes
384 			 * an integer overflow, just return.
385 			 */
386 			if (*col > SIZE_T_MAX - 8)
387 				return;
388 
389 			/* Round to next multiple of eight. */
390 			new_col = (*col / 8 + 1) * 8;
391 
392 			/*
393 			 * If printing the tab goes past the column
394 			 * width, don't print it and just quit.
395 			 */
396 			if (new_col > col_max)
397 				return;
398 			*col = new_col;
399 			break;
400 
401 		default:
402 			++(*col);
403 		}
404 
405 		putchar(*s);
406 	}
407 }
408 
409 /*
410  * Prompts user to either choose between two strings or edit one, both,
411  * or neither.
412  */
413 static void
prompt(const char * s1,const char * s2)414 prompt(const char *s1, const char *s2)
415 {
416 	char *cmd;
417 
418 	/* Print command prompt. */
419 	putchar('%');
420 
421 	/* Get user input. */
422 	for (; (cmd = xfgets(stdin)); free(cmd)) {
423 		const char *p;
424 
425 		/* Skip leading whitespace. */
426 		for (p = cmd; isspace((unsigned char)(*p)); ++p)
427 			;
428 
429 		switch (*p) {
430 		case 'e':
431 			/* Skip `e'. */
432 			++p;
433 
434 			if (eparse(p, s1, s2) == -1)
435 				goto USAGE;
436 			break;
437 
438 		case 'l':
439 			/* Choose left column as-is. */
440 			if (s1 != NULL)
441 				fprintf(outfile, "%s\n", s1);
442 
443 			/* End of command parsing. */
444 			break;
445 
446 		case 'q':
447 			goto QUIT;
448 
449 		case 'r':
450 			/* Choose right column as-is. */
451 			if (s2 != NULL)
452 				fprintf(outfile, "%s\n", s2);
453 
454 			/* End of command parsing. */
455 			break;
456 
457 		case 's':
458 			sflag = 1;
459 			goto PROMPT;
460 
461 		case 'v':
462 			sflag = 0;
463 			/* FALLTHROUGH */
464 
465 		default:
466 			/* Interactive usage help. */
467 USAGE:
468 			int_usage();
469 PROMPT:
470 			putchar('%');
471 
472 			/* Prompt user again. */
473 			continue;
474 		}
475 
476 		free(cmd);
477 		return;
478 	}
479 
480 	/*
481 	 * If there was no error, we received an EOF from stdin, so we
482 	 * should quit.
483 	 */
484 QUIT:
485 	fclose(outfile);
486 	exit(0);
487 }
488 
489 /*
490  * Takes two strings, separated by a column divider.  NULL strings are
491  * treated as empty columns.  If the divider is the ` ' character, the
492  * second column is not printed (-l flag).  In this case, the second
493  * string must be NULL.  When the second column is NULL, the divider
494  * does not print the trailing space following the divider character.
495  *
496  * Takes into account that tabs can take multiple columns.
497  */
498 static void
println(const char * s1,const char divc,const char * s2)499 println(const char *s1, const char divc, const char *s2)
500 {
501 	size_t col;
502 
503 	/* Print first column.  Skips if s1 == NULL. */
504 	col = 0;
505 	if (s1) {
506 		/* Skip angle bracket and space. */
507 		printcol(s1, &col, width);
508 
509 	}
510 
511 	/* Only print left column. */
512 	if (divc == ' ' && !s2) {
513 		putchar('\n');
514 		return;
515 	}
516 
517 	/* Otherwise, we pad this column up to width. */
518 	for (; col < width; ++col)
519 		putchar(' ');
520 
521 	/*
522 	 * Print column divider.  If there is no second column, we don't
523 	 * need to add the space for padding.
524 	 */
525 	if (!s2) {
526 		printf(" %c\n", divc);
527 		return;
528 	}
529 	printf(" %c ", divc);
530 	col += 3;
531 
532 	/* Skip angle bracket and space. */
533 	printcol(s2, &col, line_width);
534 
535 	putchar('\n');
536 }
537 
538 /*
539  * Reads a line from file and returns as a string.  If EOF is reached,
540  * NULL is returned.  The returned string must be freed afterwards.
541  */
542 static char *
xfgets(FILE * file)543 xfgets(FILE *file)
544 {
545 	const char delim[3] = {'\0', '\0', '\0'};
546 	char *s;
547 
548 	/* XXX - Is this necessary? */
549 	clearerr(file);
550 
551 	if (!(s = fparseln(file, NULL, NULL, delim, 0)) &&
552 	    ferror(file))
553 		err(2, "error reading file");
554 
555 	if (!s) {
556 		return (NULL);
557 	}
558 
559 	return (s);
560 }
561 
562 /*
563  * Parse ed commands from diffpipe and print lines from file1 (lines
564  * to change or delete) or file2 (lines to add or change).
565  * Returns EOF or 0.
566  */
567 static int
parsecmd(FILE * diffpipe,FILE * file1,FILE * file2)568 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
569 {
570 	size_t file1start, file1end, file2start, file2end, n;
571 	/* ed command line and pointer to characters in line */
572 	char *line, *p, *q;
573 	const char *errstr;
574 	char c, cmd;
575 
576 	/* Read ed command. */
577 	if (!(line = xfgets(diffpipe)))
578 		return (EOF);
579 
580 	p = line;
581 	/* Go to character after line number. */
582 	while (isdigit((unsigned char)(*p)))
583 		++p;
584 	c = *p;
585 	*p++ = 0;
586 	file1start = strtonum(line, 0, INT_MAX, &errstr);
587 	if (errstr)
588 		errx(2, "file1 start is %s: %s", errstr, line);
589 
590 	/* A range is specified for file1. */
591 	if (c == ',') {
592 
593 		q = p;
594 		/* Go to character after file2end. */
595 		while (isdigit((unsigned char)(*p)))
596 			++p;
597 		c = *p;
598 		*p++ = 0;
599 		file1end = strtonum(q, 0, INT_MAX, &errstr);
600 		if (errstr)
601 			errx(2, "file1 end is %s: %s", errstr, line);
602 		if (file1start > file1end)
603 			errx(2, "invalid line range in file1: %s", line);
604 
605 	} else
606 		file1end = file1start;
607 
608 	cmd = c;
609 	/* Check that cmd is valid. */
610 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
611 		errx(2, "ed command not recognized: %c: %s", cmd, line);
612 
613 	q = p;
614 	/* Go to character after line number. */
615 	while (isdigit((unsigned char)(*p)))
616 		++p;
617 	c = *p;
618 	*p++ = 0;
619 	file2start = strtonum(q, 0, INT_MAX, &errstr);
620 	if (errstr)
621 		errx(2, "file2 start is %s: %s", errstr, line);
622 
623 	/*
624 	 * There should either be a comma signifying a second line
625 	 * number or the line should just end here.
626 	 */
627 	if (c != ',' && c != '\0')
628 		errx(2, "invalid line range in file2: %c: %s", c, line);
629 
630 	if (c == ',') {
631 
632 		file2end = strtonum(p, 0, INT_MAX, &errstr);
633 		if (errstr)
634 			errx(2, "file2 end is %s: %s", errstr, line);
635 		if (file2start >= file2end)
636 			errx(2, "invalid line range in file2: %s", line);
637 	} else
638 		file2end = file2start;
639 
640 	/* Appends happen _after_ stated line. */
641 	if (cmd == 'a') {
642 		if (file1start != file1end)
643 			errx(2, "append cannot have a file1 range: %s",
644 			    line);
645 		if (file1start == SIZE_T_MAX)
646 			errx(2, "file1 line range too high: %s", line);
647 		file1start = ++file1end;
648 	}
649 	/*
650 	 * I'm not sure what the deal is with the line numbers for
651 	 * deletes, though.
652 	 */
653 	else if (cmd == 'd') {
654 		if (file2start != file2end)
655 			errx(2, "delete cannot have a file2 range: %s",
656 			    line);
657 		if (file2start == SIZE_T_MAX)
658 			errx(2, "file2 line range too high: %s", line);
659 		file2start = ++file2end;
660 	}
661 
662 	/*
663 	 * Continue reading file1 and file2 until we reach line numbers
664 	 * specified by diff.  Should only happen with -I flag.
665 	 */
666 	for (; file1ln < file1start && file2ln < file2start;
667 	    ++file1ln, ++file2ln) {
668 		char *s1, *s2;
669 
670 		if (!(s1 = xfgets(file1)))
671 			errx(2, "file1 shorter than expected");
672 		if (!(s2 = xfgets(file2)))
673 			errx(2, "file2 shorter than expected");
674 
675 		/* If the -l flag was specified, print only left column. */
676 		if (lflag) {
677 			free(s2);
678 			/*
679 			 * XXX - If -l and -I are both specified, all
680 			 * unchanged or ignored lines are shown with a
681 			 * `(' divider.  This matches GNU sdiff, but I
682 			 * believe it is a bug.  Just check out:
683 			 * gsdiff -l -I '^$' samefile samefile.
684 			 */
685 			if (Iflag)
686 				enqueue(s1, '(', NULL);
687 			else
688 				enqueue(s1, ' ', NULL);
689 		} else
690 			enqueue(s1, ' ', s2);
691 	}
692 	/* Ignore deleted lines. */
693 	for (; file1ln < file1start; ++file1ln) {
694 		char *s;
695 
696 		if (!(s = xfgets(file1)))
697 			errx(2, "file1 shorter than expected");
698 
699 		enqueue(s, '(', NULL);
700 	}
701 	/* Ignore added lines. */
702 	for (; file2ln < file2start; ++file2ln) {
703 		char *s;
704 
705 		if (!(s = xfgets(file2)))
706 			errx(2, "file2 shorter than expected");
707 
708 		/* If -l flag was given, don't print right column. */
709 		if (lflag)
710 			free(s);
711 		else
712 			enqueue(NULL, ')', s);
713 	}
714 
715 	/* Process unmodified or skipped lines. */
716 	processq();
717 
718 	switch (cmd) {
719 	case 'a':
720 		printa(file2, file2end);
721 		n = file2end - file2start + 1;
722 		break;
723 
724 	case 'c':
725 		printc(file1, file1end, file2, file2end);
726 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
727 		break;
728 
729 	case 'd':
730 		printd(file1, file1end);
731 		n = file1end - file1start + 1;
732 		break;
733 
734 	default:
735 		errx(2, "invalid diff command: %c: %s", cmd, line);
736 	}
737 
738 	/* Skip to next ed line. */
739 	while (n--)
740 		if (!xfgets(diffpipe))
741 			errx(2, "diff ended early");
742 
743 	return (0);
744 }
745 
746 /*
747  * Queues up a diff line.
748  */
749 static void
enqueue(char * left,char divc,char * right)750 enqueue(char *left, char divc, char *right)
751 {
752 	struct diffline *diffp;
753 
754 	if (!(diffp = malloc(sizeof(struct diffline))))
755 		err(2, "enqueue");
756 	diffp->left = left;
757 	diffp->div = divc;
758 	diffp->right = right;
759 	SIMPLEQ_INSERT_TAIL(&diffhead, diffp, diffentries);
760 }
761 
762 /*
763  * Free a diffline structure and its elements.
764  */
765 static void
freediff(struct diffline * diffp)766 freediff(struct diffline *diffp)
767 {
768 	free(diffp->left);
769 	free(diffp->right);
770 	free(diffp);
771 }
772 
773 /*
774  * Append second string into first.  Repeated appends to the same string
775  * are cached, making this an O(n) function, where n = strlen(append).
776  */
777 static void
astrcat(char ** s,const char * append)778 astrcat(char **s, const char *append)
779 {
780 	/* Length of string in previous run. */
781 	static size_t offset = 0;
782 	size_t newsiz;
783 	/*
784 	 * String from previous run.  Compared to *s to see if we are
785 	 * dealing with the same string.  If so, we can use offset.
786 	 */
787 	static const char *oldstr = NULL;
788 	char *newstr;
789 
790 
791 	/*
792 	 * First string is NULL, so just copy append.
793 	 */
794 	if (!*s) {
795 		if (!(*s = strdup(append)))
796 			err(2, "astrcat");
797 
798 		/* Keep track of string. */
799 		offset = strlen(*s);
800 		oldstr = *s;
801 
802 		return;
803 	}
804 
805 	/*
806 	 * *s is a string so concatenate.
807 	 */
808 
809 	/* Did we process the same string in the last run? */
810 	/*
811 	 * If this is a different string from the one we just processed
812 	 * cache new string.
813 	 */
814 	if (oldstr != *s) {
815 		offset = strlen(*s);
816 		oldstr = *s;
817 	}
818 
819 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
820 	newsiz = offset + 1 + strlen(append) + 1;
821 
822 	/* Resize *s to fit new string. */
823 	newstr = realloc(*s, newsiz);
824 	if (newstr == NULL)
825 		err(2, "astrcat");
826 	*s = newstr;
827 
828 	/* *s + offset should be end of string. */
829 	/* Concatenate. */
830 	strlcpy(*s + offset, "\n", newsiz - offset);
831 	strlcat(*s + offset, append, newsiz - offset);
832 
833 	/* New string length should be exactly newsiz - 1 characters. */
834 	/* Store generated string's values. */
835 	offset = newsiz - 1;
836 	oldstr = *s;
837 }
838 
839 /*
840  * Process diff set queue, printing, prompting, and saving each diff
841  * line stored in queue.
842  */
843 static void
processq(void)844 processq(void)
845 {
846 	struct diffline *diffp;
847 	char divc, *left, *right;
848 
849 	/* Don't process empty queue. */
850 	if (SIMPLEQ_EMPTY(&diffhead))
851 		return;
852 
853 	/* Remember the divider. */
854 	divc = SIMPLEQ_FIRST(&diffhead)->div;
855 
856 	left = NULL;
857 	right = NULL;
858 	/*
859 	 * Go through set of diffs, concatenating each line in left or
860 	 * right column into two long strings, `left' and `right'.
861 	 */
862 	SIMPLEQ_FOREACH(diffp, &diffhead, diffentries) {
863 		/*
864 		 * Print changed lines if -s was given,
865 		 * print all lines if -s was not given.
866 		 */
867 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
868 		    diffp->div == '>')
869 			println(diffp->left, diffp->div, diffp->right);
870 
871 		/* Append new lines to diff set. */
872 		if (diffp->left)
873 			astrcat(&left, diffp->left);
874 		if (diffp->right)
875 			astrcat(&right, diffp->right);
876 	}
877 
878 	/* Empty queue and free each diff line and its elements. */
879 	while (!SIMPLEQ_EMPTY(&diffhead)) {
880 		diffp = SIMPLEQ_FIRST(&diffhead);
881 		SIMPLEQ_REMOVE_HEAD(&diffhead, diffentries);
882 		freediff(diffp);
883 	}
884 
885 	/* Write to outfile, prompting user if lines are different. */
886 	if (outfile)
887 		switch (divc) {
888 		case ' ': case '(': case ')':
889 			fprintf(outfile, "%s\n", left);
890 			break;
891 		case '|': case '<': case '>':
892 			prompt(left, right);
893 			break;
894 		default:
895 			errx(2, "invalid divider: %c", divc);
896 		}
897 
898 	/* Free left and right. */
899 	free(left);
900 	free(right);
901 }
902 
903 /*
904  * Print lines following an (a)ppend command.
905  */
906 static void
printa(FILE * file,size_t line2)907 printa(FILE *file, size_t line2)
908 {
909 	char *line;
910 
911 	for (; file2ln <= line2; ++file2ln) {
912 		if (!(line = xfgets(file)))
913 			errx(2, "append ended early");
914 		enqueue(NULL, '>', line);
915 	}
916 
917 	processq();
918 }
919 
920 /*
921  * Print lines following a (c)hange command, from file1ln to file1end
922  * and from file2ln to file2end.
923  */
924 static void
printc(FILE * file1,size_t file1end,FILE * file2,size_t file2end)925 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
926 {
927 	struct fileline {
928 		SIMPLEQ_ENTRY(fileline)	 fileentries;
929 		char			*line;
930 	};
931 	SIMPLEQ_HEAD(, fileline) delqhead = SIMPLEQ_HEAD_INITIALIZER(delqhead);
932 
933 	/* Read lines to be deleted. */
934 	for (; file1ln <= file1end; ++file1ln) {
935 		struct fileline *linep;
936 		char *line1;
937 
938 		/* Read lines from both. */
939 		if (!(line1 = xfgets(file1)))
940 			errx(2, "error reading file1 in delete in change");
941 
942 		/* Add to delete queue. */
943 		if (!(linep = malloc(sizeof(struct fileline))))
944 			err(2, "printc");
945 		linep->line = line1;
946 		SIMPLEQ_INSERT_TAIL(&delqhead, linep, fileentries);
947 	}
948 
949 	/* Process changed lines.. */
950 	for (; !SIMPLEQ_EMPTY(&delqhead) && file2ln <= file2end;
951 	    ++file2ln) {
952 		struct fileline *del;
953 		char *add;
954 
955 		/* Get add line. */
956 		if (!(add = xfgets(file2)))
957 			errx(2, "error reading add in change");
958 
959 		del = SIMPLEQ_FIRST(&delqhead);
960 		enqueue(del->line, '|', add);
961 		SIMPLEQ_REMOVE_HEAD(&delqhead, fileentries);
962 		/*
963 		 * Free fileline structure but not its elements since
964 		 * they are queued up.
965 		 */
966 		free(del);
967 	}
968 	processq();
969 
970 	/* Process remaining lines to add. */
971 	for (; file2ln <= file2end; ++file2ln) {
972 		char *add;
973 
974 		/* Get add line. */
975 		if (!(add = xfgets(file2)))
976 			errx(2, "error reading add in change");
977 
978 		enqueue(NULL, '>', add);
979 	}
980 	processq();
981 
982 	/* Process remaining lines to delete. */
983 	while (!SIMPLEQ_EMPTY(&delqhead)) {
984 		struct fileline *filep;
985 
986 		filep = SIMPLEQ_FIRST(&delqhead);
987 		enqueue(filep->line, '<', NULL);
988 		SIMPLEQ_REMOVE_HEAD(&delqhead, fileentries);
989 		free(filep);
990 	}
991 	processq();
992 }
993 
994 /*
995  * Print deleted lines from file, from file1ln to file1end.
996  */
997 static void
printd(FILE * file1,size_t file1end)998 printd(FILE *file1, size_t file1end)
999 {
1000 	char *line1;
1001 
1002 	/* Print out lines file1ln to line2. */
1003 	for (; file1ln <= file1end; ++file1ln) {
1004 		/* XXX - Why can't this handle stdin? */
1005 		if (!(line1 = xfgets(file1)))
1006 			errx(2, "file1 ended early in delete");
1007 		enqueue(line1, '<', NULL);
1008 	}
1009 	processq();
1010 }
1011 
1012 /*
1013  * Interactive mode usage.
1014  */
1015 static void
int_usage(void)1016 int_usage(void)
1017 {
1018 	puts("e:\tedit blank diff\n"
1019 	    "eb:\tedit both diffs concatenated\n"
1020 	    "el:\tedit left diff\n"
1021 	    "er:\tedit right diff\n"
1022 	    "l:\tchoose left diff\n"
1023 	    "r:\tchoose right diff\n"
1024 	    "s:\tsilent mode--don't print identical lines\n"
1025 	    "v:\tverbose mode--print identical lines\n"
1026 	    "q:\tquit");
1027 }
1028 
1029 static void
usage(void)1030 usage(void)
1031 {
1032 	extern char *__progname;
1033 
1034 	fprintf(stderr,
1035 	    "usage: %s [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
1036 	    __progname);
1037 	exit(2);
1038 }
1039