xref: /freebsd/usr.bin/diff/diffreg.c (revision 8a0a413e)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (C) Caldera International Inc.  2001-2002.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code and documentation must retain the above
13  *    copyright notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed or owned by Caldera
20  *	International, Inc.
21  * 4. Neither the name of Caldera International, Inc. nor the names of other
22  *    contributors may be used to endorse or promote products derived from
23  *    this software without specific prior written permission.
24  *
25  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
26  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
27  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
30  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 /*-
39  * Copyright (c) 1991, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
67  */
68 
69 #include <sys/cdefs.h>
70 __FBSDID("$FreeBSD$");
71 
72 #include <sys/capsicum.h>
73 #include <sys/procdesc.h>
74 #include <sys/stat.h>
75 #include <sys/types.h>
76 #include <sys/event.h>
77 #include <sys/wait.h>
78 
79 #include <capsicum_helpers.h>
80 #include <ctype.h>
81 #include <err.h>
82 #include <errno.h>
83 #include <fcntl.h>
84 #include <paths.h>
85 #include <regex.h>
86 #include <stddef.h>
87 #include <stdint.h>
88 #include <stdio.h>
89 #include <stdlib.h>
90 #include <string.h>
91 #include <unistd.h>
92 #include <limits.h>
93 #include <signal.h>
94 
95 #include "diff.h"
96 #include "xmalloc.h"
97 
98 #define _PATH_PR "/usr/bin/pr"
99 
100 /*
101  * diff - compare two files.
102  */
103 
104 /*
105  *	Uses an algorithm due to Harold Stone, which finds
106  *	a pair of longest identical subsequences in the two
107  *	files.
108  *
109  *	The major goal is to generate the match vector J.
110  *	J[i] is the index of the line in file1 corresponding
111  *	to line i file0. J[i] = 0 if there is no
112  *	such line in file1.
113  *
114  *	Lines are hashed so as to work in core. All potential
115  *	matches are located by sorting the lines of each file
116  *	on the hash (called ``value''). In particular, this
117  *	collects the equivalence classes in file1 together.
118  *	Subroutine equiv replaces the value of each line in
119  *	file0 by the index of the first element of its
120  *	matching equivalence in (the reordered) file1.
121  *	To save space equiv squeezes file1 into a single
122  *	array member in which the equivalence classes
123  *	are simply concatenated, except that their first
124  *	members are flagged by changing sign.
125  *
126  *	Next the indices that point into member are unsorted into
127  *	array class according to the original order of file0.
128  *
129  *	The cleverness lies in routine stone. This marches
130  *	through the lines of file0, developing a vector klist
131  *	of "k-candidates". At step i a k-candidate is a matched
132  *	pair of lines x,y (x in file0 y in file1) such that
133  *	there is a common subsequence of length k
134  *	between the first i lines of file0 and the first y
135  *	lines of file1, but there is no such subsequence for
136  *	any smaller y. x is the earliest possible mate to y
137  *	that occurs in such a subsequence.
138  *
139  *	Whenever any of the members of the equivalence class of
140  *	lines in file1 matable to a line in file0 has serial number
141  *	less than the y of some k-candidate, that k-candidate
142  *	with the smallest such y is replaced. The new
143  *	k-candidate is chained (via pred) to the current
144  *	k-1 candidate so that the actual subsequence can
145  *	be recovered. When a member has serial number greater
146  *	that the y of all k-candidates, the klist is extended.
147  *	At the end, the longest subsequence is pulled out
148  *	and placed in the array J by unravel
149  *
150  *	With J in hand, the matches there recorded are
151  *	check'ed against reality to assure that no spurious
152  *	matches have crept in due to hashing. If they have,
153  *	they are broken, and "jackpot" is recorded--a harmless
154  *	matter except that a true match for a spuriously
155  *	mated line may now be unnecessarily reported as a change.
156  *
157  *	Much of the complexity of the program comes simply
158  *	from trying to minimize core utilization and
159  *	maximize the range of doable problems by dynamically
160  *	allocating what is needed and reusing what is not.
161  *	The core requirements for problems larger than somewhat
162  *	are (in words) 2*length(file0) + length(file1) +
163  *	3*(number of k-candidates installed),  typically about
164  *	6n words for files of length n.
165  */
166 
167 struct cand {
168 	int	x;
169 	int	y;
170 	int	pred;
171 };
172 
173 static struct line {
174 	int	serial;
175 	int	value;
176 } *file[2];
177 
178 /*
179  * The following struct is used to record change information when
180  * doing a "context" or "unified" diff.  (see routine "change" to
181  * understand the highly mnemonic field names)
182  */
183 struct context_vec {
184 	int	a;		/* start line in old file */
185 	int	b;		/* end line in old file */
186 	int	c;		/* start line in new file */
187 	int	d;		/* end line in new file */
188 };
189 
190 #define	diff_output	printf
191 static FILE	*opentemp(const char *);
192 static void	 output(char *, FILE *, char *, FILE *, int);
193 static void	 check(FILE *, FILE *, int);
194 static void	 range(int, int, const char *);
195 static void	 uni_range(int, int);
196 static void	 dump_context_vec(FILE *, FILE *, int);
197 static void	 dump_unified_vec(FILE *, FILE *, int);
198 static void	 prepare(int, FILE *, size_t, int);
199 static void	 prune(void);
200 static void	 equiv(struct line *, int, struct line *, int, int *);
201 static void	 unravel(int);
202 static void	 unsort(struct line *, int, int *);
203 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
204 static void	 sort(struct line *, int);
205 static void	 print_header(const char *, const char *);
206 static int	 ignoreline(char *);
207 static int	 asciifile(FILE *);
208 static int	 fetch(long *, int, int, FILE *, int, int, int);
209 static int	 newcand(int, int, int);
210 static int	 search(int *, int, int);
211 static int	 skipline(FILE *);
212 static int	 isqrt(int);
213 static int	 stone(int *, int, int *, int *, int);
214 static int	 readhash(FILE *, int);
215 static int	 files_differ(FILE *, FILE *, int);
216 static char	*match_function(const long *, int, FILE *);
217 static char	*preadline(int, size_t, off_t);
218 
219 static int  *J;			/* will be overlaid on class */
220 static int  *class;		/* will be overlaid on file[0] */
221 static int  *klist;		/* will be overlaid on file[0] after class */
222 static int  *member;		/* will be overlaid on file[1] */
223 static int   clen;
224 static int   inifdef;		/* whether or not we are in a #ifdef block */
225 static int   len[2];
226 static int   pref, suff;	/* length of prefix and suffix */
227 static int   slen[2];
228 static int   anychange;
229 static long *ixnew;		/* will be overlaid on file[1] */
230 static long *ixold;		/* will be overlaid on klist */
231 static struct cand *clist;	/* merely a free storage pot for candidates */
232 static int   clistlen;		/* the length of clist */
233 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
234 static u_char *chrtran;		/* translation table for case-folding */
235 static struct context_vec *context_vec_start;
236 static struct context_vec *context_vec_end;
237 static struct context_vec *context_vec_ptr;
238 
239 #define FUNCTION_CONTEXT_SIZE	55
240 static char lastbuf[FUNCTION_CONTEXT_SIZE];
241 static int lastline;
242 static int lastmatchline;
243 
244 
245 /*
246  * chrtran points to one of 2 translation tables: cup2low if folding upper to
247  * lower case clow2low if not folding case
248  */
249 static u_char clow2low[256] = {
250 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
251 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
252 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
253 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
254 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
255 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
256 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
257 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
258 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
259 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
260 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
261 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
262 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
263 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
264 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
265 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
266 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
267 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
268 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
269 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
270 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
271 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
272 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
273 	0xfd, 0xfe, 0xff
274 };
275 
276 static u_char cup2low[256] = {
277 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
278 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
279 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
280 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
281 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
282 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
283 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
284 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
285 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
286 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
287 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
288 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
289 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
290 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
291 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
292 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
293 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
294 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
295 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
296 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
297 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
298 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
299 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
300 	0xfd, 0xfe, 0xff
301 };
302 
303 int
304 diffreg(char *file1, char *file2, int flags, int capsicum)
305 {
306 	FILE *f1, *f2;
307 	int i, rval;
308 	int	ostdout = -1;
309 	int pr_pd, kq;
310 	struct kevent *e;
311 	cap_rights_t rights_ro;
312 
313 	e = NULL;
314 	kq = -1;
315 	f1 = f2 = NULL;
316 	rval = D_SAME;
317 	anychange = 0;
318 	lastline = 0;
319 	lastmatchline = 0;
320 	context_vec_ptr = context_vec_start - 1;
321 	if (flags & D_IGNORECASE)
322 		chrtran = cup2low;
323 	else
324 		chrtran = clow2low;
325 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
326 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
327 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
328 		goto closem;
329 
330 	if (flags & D_EMPTY1)
331 		f1 = fopen(_PATH_DEVNULL, "r");
332 	else {
333 		if (!S_ISREG(stb1.st_mode)) {
334 			if ((f1 = opentemp(file1)) == NULL ||
335 			    fstat(fileno(f1), &stb1) < 0) {
336 				warn("%s", file1);
337 				status |= 2;
338 				goto closem;
339 			}
340 		} else if (strcmp(file1, "-") == 0)
341 			f1 = stdin;
342 		else
343 			f1 = fopen(file1, "r");
344 	}
345 	if (f1 == NULL) {
346 		warn("%s", file1);
347 		status |= 2;
348 		goto closem;
349 	}
350 
351 	if (flags & D_EMPTY2)
352 		f2 = fopen(_PATH_DEVNULL, "r");
353 	else {
354 		if (!S_ISREG(stb2.st_mode)) {
355 			if ((f2 = opentemp(file2)) == NULL ||
356 			    fstat(fileno(f2), &stb2) < 0) {
357 				warn("%s", file2);
358 				status |= 2;
359 				goto closem;
360 			}
361 		} else if (strcmp(file2, "-") == 0)
362 			f2 = stdin;
363 		else
364 			f2 = fopen(file2, "r");
365 	}
366 	if (f2 == NULL) {
367 		warn("%s", file2);
368 		status |= 2;
369 		goto closem;
370 	}
371 
372 	if (lflag) {
373 		/* redirect stdout to pr */
374 		int	 pfd[2];
375 		pid_t	pid;
376 		char	*header;
377 
378 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
379 		signal(SIGPIPE, SIG_IGN);
380 		fflush(stdout);
381 		rewind(stdout);
382 		pipe(pfd);
383 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
384 		case -1:
385 			status |= 2;
386 			free(header);
387 			err(2, "No more processes");
388 		case 0:
389 			/* child */
390 			if (pfd[0] != STDIN_FILENO) {
391 				dup2(pfd[0], STDIN_FILENO);
392 				close(pfd[0]);
393 			}
394 			close(pfd[1]);
395 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
396 			_exit(127);
397 		default:
398 
399 			/* parent */
400 			if (pfd[1] != STDOUT_FILENO) {
401 				ostdout = dup(STDOUT_FILENO);
402 				dup2(pfd[1], STDOUT_FILENO);
403 				close(pfd[1]);
404 			}
405 			close(pfd[0]);
406 			rewind(stdout);
407 			free(header);
408 			kq = kqueue();
409 			if (kq == -1)
410 				err(2, "kqueue");
411 			e = xmalloc(sizeof(struct kevent));
412 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
413 			    NULL);
414 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
415 				err(2, "kevent");
416 		}
417 	}
418 
419 	if (capsicum) {
420 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
421 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
422 		    && errno != ENOSYS)
423 			err(2, "unable to limit rights on: %s", file1);
424 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
425 		    errno != ENOSYS)
426 			err(2, "unable to limit rights on: %s", file2);
427 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
428 			/* stding has already been limited */
429 			if (caph_limit_stderr() == -1)
430 				err(2, "unable to limit stderr");
431 			if (caph_limit_stdout() == -1)
432 				err(2, "unable to limit stdout");
433 		} else if (caph_limit_stdio() == -1)
434 				err(2, "unable to limit stdio");
435 
436 		caph_cache_catpages();
437 		caph_cache_tzdata();
438 		if (cap_enter() < 0 && errno != ENOSYS)
439 			err(2, "unable to enter capability mode");
440 	}
441 
442 	switch (files_differ(f1, f2, flags)) {
443 	case 0:
444 		goto closem;
445 	case 1:
446 		break;
447 	default:
448 		/* error */
449 		status |= 2;
450 		goto closem;
451 	}
452 
453 	if ((flags & D_FORCEASCII) == 0 &&
454 	    (!asciifile(f1) || !asciifile(f2))) {
455 		rval = D_BINARY;
456 		status |= 1;
457 		goto closem;
458 	}
459 	prepare(0, f1, stb1.st_size, flags);
460 	prepare(1, f2, stb2.st_size, flags);
461 
462 	prune();
463 	sort(sfile[0], slen[0]);
464 	sort(sfile[1], slen[1]);
465 
466 	member = (int *)file[1];
467 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
468 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
469 
470 	class = (int *)file[0];
471 	unsort(sfile[0], slen[0], class);
472 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
473 
474 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
475 	clen = 0;
476 	clistlen = 100;
477 	clist = xcalloc(clistlen, sizeof(*clist));
478 	i = stone(class, slen[0], member, klist, flags);
479 	free(member);
480 	free(class);
481 
482 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
483 	unravel(klist[i]);
484 	free(clist);
485 	free(klist);
486 
487 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
488 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
489 	check(f1, f2, flags);
490 	output(file1, f1, file2, f2, flags);
491 	if (ostdout != -1 && e != NULL) {
492 		/* close the pipe to pr and restore stdout */
493 		int wstatus;
494 
495 		fflush(stdout);
496 		if (ostdout != STDOUT_FILENO) {
497 			close(STDOUT_FILENO);
498 			dup2(ostdout, STDOUT_FILENO);
499 			close(ostdout);
500 		}
501 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
502 			err(2, "kevent");
503 		wstatus = e[0].data;
504 		close(kq);
505 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
506 			errx(2, "pr exited abnormally");
507 		else if (WIFSIGNALED(wstatus))
508 			errx(2, "pr killed by signal %d",
509 			    WTERMSIG(wstatus));
510 	}
511 
512 closem:
513 	if (anychange) {
514 		status |= 1;
515 		if (rval == D_SAME)
516 			rval = D_DIFFER;
517 	}
518 	if (f1 != NULL)
519 		fclose(f1);
520 	if (f2 != NULL)
521 		fclose(f2);
522 
523 	return (rval);
524 }
525 
526 /*
527  * Check to see if the given files differ.
528  * Returns 0 if they are the same, 1 if different, and -1 on error.
529  * XXX - could use code from cmp(1) [faster]
530  */
531 static int
532 files_differ(FILE *f1, FILE *f2, int flags)
533 {
534 	char buf1[BUFSIZ], buf2[BUFSIZ];
535 	size_t i, j;
536 
537 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
538 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
539 		return (1);
540 	for (;;) {
541 		i = fread(buf1, 1, sizeof(buf1), f1);
542 		j = fread(buf2, 1, sizeof(buf2), f2);
543 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
544 			return (-1);
545 		if (i != j)
546 			return (1);
547 		if (i == 0)
548 			return (0);
549 		if (memcmp(buf1, buf2, i) != 0)
550 			return (1);
551 	}
552 }
553 
554 static FILE *
555 opentemp(const char *f)
556 {
557 	char buf[BUFSIZ], tempfile[PATH_MAX];
558 	ssize_t nread;
559 	int ifd, ofd;
560 
561 	if (strcmp(f, "-") == 0)
562 		ifd = STDIN_FILENO;
563 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
564 		return (NULL);
565 
566 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
567 
568 	if ((ofd = mkstemp(tempfile)) < 0) {
569 		close(ifd);
570 		return (NULL);
571 	}
572 	unlink(tempfile);
573 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
574 		if (write(ofd, buf, nread) != nread) {
575 			close(ifd);
576 			close(ofd);
577 			return (NULL);
578 		}
579 	}
580 	close(ifd);
581 	lseek(ofd, (off_t)0, SEEK_SET);
582 	return (fdopen(ofd, "r"));
583 }
584 
585 char *
586 splice(char *dir, char *path)
587 {
588 	char *tail, *buf;
589 	size_t dirlen;
590 
591 	dirlen = strlen(dir);
592 	while (dirlen != 0 && dir[dirlen - 1] == '/')
593 	    dirlen--;
594 	if ((tail = strrchr(path, '/')) == NULL)
595 		tail = path;
596 	else
597 		tail++;
598 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
599 	return (buf);
600 }
601 
602 static void
603 prepare(int i, FILE *fd, size_t filesize, int flags)
604 {
605 	struct line *p;
606 	int h;
607 	size_t sz, j;
608 
609 	rewind(fd);
610 
611 	sz = MIN(filesize, SIZE_MAX) / 25;
612 	if (sz < 100)
613 		sz = 100;
614 
615 	p = xcalloc(sz + 3, sizeof(*p));
616 	for (j = 0; (h = readhash(fd, flags));) {
617 		if (j == sz) {
618 			sz = sz * 3 / 2;
619 			p = xreallocarray(p, sz + 3, sizeof(*p));
620 		}
621 		p[++j].value = h;
622 	}
623 	len[i] = j;
624 	file[i] = p;
625 }
626 
627 static void
628 prune(void)
629 {
630 	int i, j;
631 
632 	for (pref = 0; pref < len[0] && pref < len[1] &&
633 	    file[0][pref + 1].value == file[1][pref + 1].value;
634 	    pref++)
635 		;
636 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
637 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
638 	    suff++)
639 		;
640 	for (j = 0; j < 2; j++) {
641 		sfile[j] = file[j] + pref;
642 		slen[j] = len[j] - pref - suff;
643 		for (i = 0; i <= slen[j]; i++)
644 			sfile[j][i].serial = i;
645 	}
646 }
647 
648 static void
649 equiv(struct line *a, int n, struct line *b, int m, int *c)
650 {
651 	int i, j;
652 
653 	i = j = 1;
654 	while (i <= n && j <= m) {
655 		if (a[i].value < b[j].value)
656 			a[i++].value = 0;
657 		else if (a[i].value == b[j].value)
658 			a[i++].value = j;
659 		else
660 			j++;
661 	}
662 	while (i <= n)
663 		a[i++].value = 0;
664 	b[m + 1].value = 0;
665 	j = 0;
666 	while (++j <= m) {
667 		c[j] = -b[j].serial;
668 		while (b[j + 1].value == b[j].value) {
669 			j++;
670 			c[j] = b[j].serial;
671 		}
672 	}
673 	c[j] = -1;
674 }
675 
676 /* Code taken from ping.c */
677 static int
678 isqrt(int n)
679 {
680 	int y, x = 1;
681 
682 	if (n == 0)
683 		return (0);
684 
685 	do { /* newton was a stinker */
686 		y = x;
687 		x = n / x;
688 		x += y;
689 		x /= 2;
690 	} while ((x - y) > 1 || (x - y) < -1);
691 
692 	return (x);
693 }
694 
695 static int
696 stone(int *a, int n, int *b, int *c, int flags)
697 {
698 	int i, k, y, j, l;
699 	int oldc, tc, oldl, sq;
700 	u_int numtries, bound;
701 
702 	if (flags & D_MINIMAL)
703 		bound = UINT_MAX;
704 	else {
705 		sq = isqrt(n);
706 		bound = MAX(256, sq);
707 	}
708 
709 	k = 0;
710 	c[0] = newcand(0, 0, 0);
711 	for (i = 1; i <= n; i++) {
712 		j = a[i];
713 		if (j == 0)
714 			continue;
715 		y = -b[j];
716 		oldl = 0;
717 		oldc = c[0];
718 		numtries = 0;
719 		do {
720 			if (y <= clist[oldc].y)
721 				continue;
722 			l = search(c, k, y);
723 			if (l != oldl + 1)
724 				oldc = c[l - 1];
725 			if (l <= k) {
726 				if (clist[c[l]].y <= y)
727 					continue;
728 				tc = c[l];
729 				c[l] = newcand(i, y, oldc);
730 				oldc = tc;
731 				oldl = l;
732 				numtries++;
733 			} else {
734 				c[l] = newcand(i, y, oldc);
735 				k++;
736 				break;
737 			}
738 		} while ((y = b[++j]) > 0 && numtries < bound);
739 	}
740 	return (k);
741 }
742 
743 static int
744 newcand(int x, int y, int pred)
745 {
746 	struct cand *q;
747 
748 	if (clen == clistlen) {
749 		clistlen = clistlen * 11 / 10;
750 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
751 	}
752 	q = clist + clen;
753 	q->x = x;
754 	q->y = y;
755 	q->pred = pred;
756 	return (clen++);
757 }
758 
759 static int
760 search(int *c, int k, int y)
761 {
762 	int i, j, l, t;
763 
764 	if (clist[c[k]].y < y)	/* quick look for typical case */
765 		return (k + 1);
766 	i = 0;
767 	j = k + 1;
768 	for (;;) {
769 		l = (i + j) / 2;
770 		if (l <= i)
771 			break;
772 		t = clist[c[l]].y;
773 		if (t > y)
774 			j = l;
775 		else if (t < y)
776 			i = l;
777 		else
778 			return (l);
779 	}
780 	return (l + 1);
781 }
782 
783 static void
784 unravel(int p)
785 {
786 	struct cand *q;
787 	int i;
788 
789 	for (i = 0; i <= len[0]; i++)
790 		J[i] = i <= pref ? i :
791 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
792 	for (q = clist + p; q->y != 0; q = clist + q->pred)
793 		J[q->x + pref] = q->y + pref;
794 }
795 
796 /*
797  * Check does double duty:
798  *  1.	ferret out any fortuitous correspondences due
799  *	to confounding by hashing (which result in "jackpot")
800  *  2.  collect random access indexes to the two files
801  */
802 static void
803 check(FILE *f1, FILE *f2, int flags)
804 {
805 	int i, j, jackpot, c, d;
806 	long ctold, ctnew;
807 
808 	rewind(f1);
809 	rewind(f2);
810 	j = 1;
811 	ixold[0] = ixnew[0] = 0;
812 	jackpot = 0;
813 	ctold = ctnew = 0;
814 	for (i = 1; i <= len[0]; i++) {
815 		if (J[i] == 0) {
816 			ixold[i] = ctold += skipline(f1);
817 			continue;
818 		}
819 		while (j < J[i]) {
820 			ixnew[j] = ctnew += skipline(f2);
821 			j++;
822 		}
823 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
824 			for (;;) {
825 				c = getc(f1);
826 				d = getc(f2);
827 				/*
828 				 * GNU diff ignores a missing newline
829 				 * in one file for -b or -w.
830 				 */
831 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
832 					if (c == EOF && d == '\n') {
833 						ctnew++;
834 						break;
835 					} else if (c == '\n' && d == EOF) {
836 						ctold++;
837 						break;
838 					}
839 				}
840 				ctold++;
841 				ctnew++;
842 				if (flags & D_STRIPCR) {
843 					if (c == '\r') {
844 						if ((c = getc(f1)) == '\n') {
845 							ctnew++;
846 							break;
847 						}
848 					}
849 					if (d == '\r') {
850 						if ((d = getc(f2)) == '\n') {
851 							ctold++;
852 							break;
853 						}
854 					}
855 				}
856 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
857 				    isspace(d)) {
858 					do {
859 						if (c == '\n')
860 							break;
861 						ctold++;
862 					} while (isspace(c = getc(f1)));
863 					do {
864 						if (d == '\n')
865 							break;
866 						ctnew++;
867 					} while (isspace(d = getc(f2)));
868 				} else if ((flags & D_IGNOREBLANKS)) {
869 					while (isspace(c) && c != '\n') {
870 						c = getc(f1);
871 						ctold++;
872 					}
873 					while (isspace(d) && d != '\n') {
874 						d = getc(f2);
875 						ctnew++;
876 					}
877 				}
878 				if (chrtran[c] != chrtran[d]) {
879 					jackpot++;
880 					J[i] = 0;
881 					if (c != '\n' && c != EOF)
882 						ctold += skipline(f1);
883 					if (d != '\n' && c != EOF)
884 						ctnew += skipline(f2);
885 					break;
886 				}
887 				if (c == '\n' || c == EOF)
888 					break;
889 			}
890 		} else {
891 			for (;;) {
892 				ctold++;
893 				ctnew++;
894 				if ((c = getc(f1)) != (d = getc(f2))) {
895 					/* jackpot++; */
896 					J[i] = 0;
897 					if (c != '\n' && c != EOF)
898 						ctold += skipline(f1);
899 					if (d != '\n' && c != EOF)
900 						ctnew += skipline(f2);
901 					break;
902 				}
903 				if (c == '\n' || c == EOF)
904 					break;
905 			}
906 		}
907 		ixold[i] = ctold;
908 		ixnew[j] = ctnew;
909 		j++;
910 	}
911 	for (; j <= len[1]; j++) {
912 		ixnew[j] = ctnew += skipline(f2);
913 	}
914 	/*
915 	 * if (jackpot)
916 	 *	fprintf(stderr, "jackpot\n");
917 	 */
918 }
919 
920 /* shellsort CACM #201 */
921 static void
922 sort(struct line *a, int n)
923 {
924 	struct line *ai, *aim, w;
925 	int j, m = 0, k;
926 
927 	if (n == 0)
928 		return;
929 	for (j = 1; j <= n; j *= 2)
930 		m = 2 * j - 1;
931 	for (m /= 2; m != 0; m /= 2) {
932 		k = n - m;
933 		for (j = 1; j <= k; j++) {
934 			for (ai = &a[j]; ai > a; ai -= m) {
935 				aim = &ai[m];
936 				if (aim < ai)
937 					break;	/* wraparound */
938 				if (aim->value > ai[0].value ||
939 				    (aim->value == ai[0].value &&
940 					aim->serial > ai[0].serial))
941 					break;
942 				w.value = ai[0].value;
943 				ai[0].value = aim->value;
944 				aim->value = w.value;
945 				w.serial = ai[0].serial;
946 				ai[0].serial = aim->serial;
947 				aim->serial = w.serial;
948 			}
949 		}
950 	}
951 }
952 
953 static void
954 unsort(struct line *f, int l, int *b)
955 {
956 	int *a, i;
957 
958 	a = xcalloc(l + 1, sizeof(*a));
959 	for (i = 1; i <= l; i++)
960 		a[f[i].serial] = f[i].value;
961 	for (i = 1; i <= l; i++)
962 		b[i] = a[i];
963 	free(a);
964 }
965 
966 static int
967 skipline(FILE *f)
968 {
969 	int i, c;
970 
971 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
972 		continue;
973 	return (i);
974 }
975 
976 static void
977 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
978 {
979 	int m, i0, i1, j0, j1;
980 
981 	rewind(f1);
982 	rewind(f2);
983 	m = len[0];
984 	J[0] = 0;
985 	J[m + 1] = len[1] + 1;
986 	if (diff_format != D_EDIT) {
987 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
988 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
989 				i0++;
990 			j0 = J[i0 - 1] + 1;
991 			i1 = i0 - 1;
992 			while (i1 < m && J[i1 + 1] == 0)
993 				i1++;
994 			j1 = J[i1 + 1] - 1;
995 			J[i1] = j1;
996 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
997 		}
998 	} else {
999 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
1000 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
1001 				i0--;
1002 			j0 = J[i0 + 1] - 1;
1003 			i1 = i0 + 1;
1004 			while (i1 > 1 && J[i1 - 1] == 0)
1005 				i1--;
1006 			j1 = J[i1 - 1] + 1;
1007 			J[i1] = j1;
1008 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1009 		}
1010 	}
1011 	if (m == 0)
1012 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1013 	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
1014 		for (;;) {
1015 #define	c i0
1016 			if ((c = getc(f1)) == EOF)
1017 				return;
1018 			diff_output("%c", c);
1019 		}
1020 #undef c
1021 	}
1022 	if (anychange != 0) {
1023 		if (diff_format == D_CONTEXT)
1024 			dump_context_vec(f1, f2, flags);
1025 		else if (diff_format == D_UNIFIED)
1026 			dump_unified_vec(f1, f2, flags);
1027 	}
1028 }
1029 
1030 static void
1031 range(int a, int b, const char *separator)
1032 {
1033 	diff_output("%d", a > b ? b : a);
1034 	if (a < b)
1035 		diff_output("%s%d", separator, b);
1036 }
1037 
1038 static void
1039 uni_range(int a, int b)
1040 {
1041 	if (a < b)
1042 		diff_output("%d,%d", a, b - a + 1);
1043 	else if (a == b)
1044 		diff_output("%d", b);
1045 	else
1046 		diff_output("%d,0", b);
1047 }
1048 
1049 static char *
1050 preadline(int fd, size_t rlen, off_t off)
1051 {
1052 	char *line;
1053 	ssize_t nr;
1054 
1055 	line = xmalloc(rlen + 1);
1056 	if ((nr = pread(fd, line, rlen, off)) < 0)
1057 		err(2, "preadline");
1058 	if (nr > 0 && line[nr-1] == '\n')
1059 		nr--;
1060 	line[nr] = '\0';
1061 	return (line);
1062 }
1063 
1064 static int
1065 ignoreline(char *line)
1066 {
1067 	int ret;
1068 
1069 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1070 	free(line);
1071 	return (ret == 0);	/* if it matched, it should be ignored. */
1072 }
1073 
1074 /*
1075  * Indicate that there is a difference between lines a and b of the from file
1076  * to get to lines c to d of the to file.  If a is greater then b then there
1077  * are no lines in the from file involved and this means that there were
1078  * lines appended (beginning at b).  If c is greater than d then there are
1079  * lines missing from the to file.
1080  */
1081 static void
1082 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1083     int *pflags)
1084 {
1085 	static size_t max_context = 64;
1086 	long curpos;
1087 	int i, nc, f;
1088 	const char *walk;
1089 
1090 restart:
1091 	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1092 	    a > b && c > d)
1093 		return;
1094 	if (ignore_pats != NULL) {
1095 		char *line;
1096 		/*
1097 		 * All lines in the change, insert, or delete must
1098 		 * match an ignore pattern for the change to be
1099 		 * ignored.
1100 		 */
1101 		if (a <= b) {		/* Changes and deletes. */
1102 			for (i = a; i <= b; i++) {
1103 				line = preadline(fileno(f1),
1104 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1105 				if (!ignoreline(line))
1106 					goto proceed;
1107 			}
1108 		}
1109 		if (a > b || c <= d) {	/* Changes and inserts. */
1110 			for (i = c; i <= d; i++) {
1111 				line = preadline(fileno(f2),
1112 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1113 				if (!ignoreline(line))
1114 					goto proceed;
1115 			}
1116 		}
1117 		return;
1118 	}
1119 proceed:
1120 	if (*pflags & D_HEADER && diff_format != D_BRIEF) {
1121 		diff_output("%s %s %s\n", diffargs, file1, file2);
1122 		*pflags &= ~D_HEADER;
1123 	}
1124 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1125 		/*
1126 		 * Allocate change records as needed.
1127 		 */
1128 		if (context_vec_ptr == context_vec_end - 1) {
1129 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1130 			max_context <<= 1;
1131 			context_vec_start = xreallocarray(context_vec_start,
1132 			    max_context, sizeof(*context_vec_start));
1133 			context_vec_end = context_vec_start + max_context;
1134 			context_vec_ptr = context_vec_start + offset;
1135 		}
1136 		if (anychange == 0) {
1137 			/*
1138 			 * Print the context/unidiff header first time through.
1139 			 */
1140 			print_header(file1, file2);
1141 			anychange = 1;
1142 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1143 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1144 			/*
1145 			 * If this change is more than 'diff_context' lines from the
1146 			 * previous change, dump the record and reset it.
1147 			 */
1148 			if (diff_format == D_CONTEXT)
1149 				dump_context_vec(f1, f2, *pflags);
1150 			else
1151 				dump_unified_vec(f1, f2, *pflags);
1152 		}
1153 		context_vec_ptr++;
1154 		context_vec_ptr->a = a;
1155 		context_vec_ptr->b = b;
1156 		context_vec_ptr->c = c;
1157 		context_vec_ptr->d = d;
1158 		return;
1159 	}
1160 	if (anychange == 0)
1161 		anychange = 1;
1162 	switch (diff_format) {
1163 	case D_BRIEF:
1164 		return;
1165 	case D_NORMAL:
1166 	case D_EDIT:
1167 		range(a, b, ",");
1168 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1169 		if (diff_format == D_NORMAL)
1170 			range(c, d, ",");
1171 		diff_output("\n");
1172 		break;
1173 	case D_REVERSE:
1174 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1175 		range(a, b, " ");
1176 		diff_output("\n");
1177 		break;
1178 	case D_NREVERSE:
1179 		if (a > b)
1180 			diff_output("a%d %d\n", b, d - c + 1);
1181 		else {
1182 			diff_output("d%d %d\n", a, b - a + 1);
1183 			if (!(c > d))
1184 				/* add changed lines */
1185 				diff_output("a%d %d\n", b, d - c + 1);
1186 		}
1187 		break;
1188 	}
1189 	if (diff_format == D_GFORMAT) {
1190 		curpos = ftell(f1);
1191 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1192 		nc = ixold[a > b ? b : a - 1] - curpos;
1193 		for (i = 0; i < nc; i++)
1194 			diff_output("%c", getc(f1));
1195 		for (walk = group_format; *walk != '\0'; walk++) {
1196 			if (*walk == '%') {
1197 				walk++;
1198 				switch (*walk) {
1199 				case '<':
1200 					fetch(ixold, a, b, f1, '<', 1, *pflags);
1201 					break;
1202 				case '>':
1203 					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1204 					break;
1205 				default:
1206 					diff_output("%%%c", *walk);
1207 					break;
1208 				}
1209 				continue;
1210 			}
1211 			diff_output("%c", *walk);
1212 		}
1213 	}
1214 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1215 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1216 		if (a <= b && c <= d && diff_format == D_NORMAL)
1217 			diff_output("---\n");
1218 	}
1219 	f = 0;
1220 	if (diff_format != D_GFORMAT)
1221 		f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1222 	if (f != 0 && diff_format == D_EDIT) {
1223 		/*
1224 		 * A non-zero return value for D_EDIT indicates that the
1225 		 * last line printed was a bare dot (".") that has been
1226 		 * escaped as ".." to prevent ed(1) from misinterpreting
1227 		 * it.  We have to add a substitute command to change this
1228 		 * back and restart where we left off.
1229 		 */
1230 		diff_output(".\n");
1231 		diff_output("%ds/.//\n", a + f - 1);
1232 		b = a + f - 1;
1233 		a = b + 1;
1234 		c += f;
1235 		goto restart;
1236 	}
1237 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1238 		diff_output(".\n");
1239 	if (inifdef) {
1240 		diff_output("#endif /* %s */\n", ifdefname);
1241 		inifdef = 0;
1242 	}
1243 }
1244 
1245 static int
1246 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1247 {
1248 	int i, j, c, lastc, col, nc;
1249 	int	newcol;
1250 
1251 	/*
1252 	 * When doing #ifdef's, copy down to current line
1253 	 * if this is the first file, so that stuff makes it to output.
1254 	 */
1255 	if ((diff_format == D_IFDEF) && oldfile) {
1256 		long curpos = ftell(lb);
1257 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1258 		nc = f[a > b ? b : a - 1] - curpos;
1259 		for (i = 0; i < nc; i++)
1260 			diff_output("%c", getc(lb));
1261 	}
1262 	if (a > b)
1263 		return (0);
1264 	if (diff_format == D_IFDEF) {
1265 		if (inifdef) {
1266 			diff_output("#else /* %s%s */\n",
1267 			    oldfile == 1 ? "!" : "", ifdefname);
1268 		} else {
1269 			if (oldfile)
1270 				diff_output("#ifndef %s\n", ifdefname);
1271 			else
1272 				diff_output("#ifdef %s\n", ifdefname);
1273 		}
1274 		inifdef = 1 + oldfile;
1275 	}
1276 	for (i = a; i <= b; i++) {
1277 		fseek(lb, f[i - 1], SEEK_SET);
1278 		nc = f[i] - f[i - 1];
1279 		if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
1280 		    ch != '\0') {
1281 			diff_output("%c", ch);
1282 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1283 			    || diff_format == D_UNIFIED))
1284 				diff_output("\t");
1285 			else if (diff_format != D_UNIFIED)
1286 				diff_output(" ");
1287 		}
1288 		col = 0;
1289 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1290 			if ((c = getc(lb)) == EOF) {
1291 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1292 				    diff_format == D_NREVERSE)
1293 					warnx("No newline at end of file");
1294 				else
1295 					diff_output("\n\\ No newline at end of "
1296 					    "file\n");
1297 				return (0);
1298 			}
1299 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1300 				newcol = ((col/tabsize)+1)*tabsize;
1301 				do {
1302 					diff_output(" ");
1303 				} while (++col < newcol);
1304 			} else {
1305 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1306 				    && lastc == '.') {
1307 					/*
1308 					 * Don't print a bare "." line
1309 					 * since that will confuse ed(1).
1310 					 * Print ".." instead and return,
1311 					 * giving the caller an offset
1312 					 * from which to restart.
1313 					 */
1314 					diff_output(".\n");
1315 					return (i - a + 1);
1316 				}
1317 				diff_output("%c", c);
1318 				col++;
1319 			}
1320 		}
1321 	}
1322 	return (0);
1323 }
1324 
1325 /*
1326  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1327  */
1328 static int
1329 readhash(FILE *f, int flags)
1330 {
1331 	int i, t, space;
1332 	int sum;
1333 
1334 	sum = 1;
1335 	space = 0;
1336 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1337 		if (flags & D_IGNORECASE)
1338 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1339 				if (flags & D_STRIPCR && t == '\r') {
1340 					t = getc(f);
1341 					if (t == '\n')
1342 						break;
1343 					ungetc(t, f);
1344 				}
1345 				if (t == EOF) {
1346 					if (i == 0)
1347 						return (0);
1348 					break;
1349 				}
1350 				sum = sum * 127 + chrtran[t];
1351 			}
1352 		else
1353 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1354 				if (flags & D_STRIPCR && t == '\r') {
1355 					t = getc(f);
1356 					if (t == '\n')
1357 						break;
1358 					ungetc(t, f);
1359 				}
1360 				if (t == EOF) {
1361 					if (i == 0)
1362 						return (0);
1363 					break;
1364 				}
1365 				sum = sum * 127 + t;
1366 			}
1367 	} else {
1368 		for (i = 0;;) {
1369 			switch (t = getc(f)) {
1370 			case '\r':
1371 			case '\t':
1372 			case '\v':
1373 			case '\f':
1374 			case ' ':
1375 				space++;
1376 				continue;
1377 			default:
1378 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1379 					i++;
1380 					space = 0;
1381 				}
1382 				sum = sum * 127 + chrtran[t];
1383 				i++;
1384 				continue;
1385 			case EOF:
1386 				if (i == 0)
1387 					return (0);
1388 				/* FALLTHROUGH */
1389 			case '\n':
1390 				break;
1391 			}
1392 			break;
1393 		}
1394 	}
1395 	/*
1396 	 * There is a remote possibility that we end up with a zero sum.
1397 	 * Zero is used as an EOF marker, so return 1 instead.
1398 	 */
1399 	return (sum == 0 ? 1 : sum);
1400 }
1401 
1402 static int
1403 asciifile(FILE *f)
1404 {
1405 	unsigned char buf[BUFSIZ];
1406 	size_t cnt;
1407 
1408 	if (f == NULL)
1409 		return (1);
1410 
1411 	rewind(f);
1412 	cnt = fread(buf, 1, sizeof(buf), f);
1413 	return (memchr(buf, '\0', cnt) == NULL);
1414 }
1415 
1416 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1417 
1418 static char *
1419 match_function(const long *f, int pos, FILE *fp)
1420 {
1421 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1422 	size_t nc;
1423 	int last = lastline;
1424 	const char *state = NULL;
1425 
1426 	lastline = pos;
1427 	while (pos > last) {
1428 		fseek(fp, f[pos - 1], SEEK_SET);
1429 		nc = f[pos] - f[pos - 1];
1430 		if (nc >= sizeof(buf))
1431 			nc = sizeof(buf) - 1;
1432 		nc = fread(buf, 1, nc, fp);
1433 		if (nc > 0) {
1434 			buf[nc] = '\0';
1435 			buf[strcspn(buf, "\n")] = '\0';
1436 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1437 				if (begins_with(buf, "private:")) {
1438 					if (!state)
1439 						state = " (private)";
1440 				} else if (begins_with(buf, "protected:")) {
1441 					if (!state)
1442 						state = " (protected)";
1443 				} else if (begins_with(buf, "public:")) {
1444 					if (!state)
1445 						state = " (public)";
1446 				} else {
1447 					strlcpy(lastbuf, buf, sizeof lastbuf);
1448 					if (state)
1449 						strlcat(lastbuf, state,
1450 						    sizeof lastbuf);
1451 					lastmatchline = pos;
1452 					return lastbuf;
1453 				}
1454 			}
1455 		}
1456 		pos--;
1457 	}
1458 	return lastmatchline > 0 ? lastbuf : NULL;
1459 }
1460 
1461 /* dump accumulated "context" diff changes */
1462 static void
1463 dump_context_vec(FILE *f1, FILE *f2, int flags)
1464 {
1465 	struct context_vec *cvp = context_vec_start;
1466 	int lowa, upb, lowc, upd, do_output;
1467 	int a, b, c, d;
1468 	char ch, *f;
1469 
1470 	if (context_vec_start > context_vec_ptr)
1471 		return;
1472 
1473 	b = d = 0;		/* gcc */
1474 	lowa = MAX(1, cvp->a - diff_context);
1475 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1476 	lowc = MAX(1, cvp->c - diff_context);
1477 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1478 
1479 	diff_output("***************");
1480 	if ((flags & D_PROTOTYPE)) {
1481 		f = match_function(ixold, lowa-1, f1);
1482 		if (f != NULL)
1483 			diff_output(" %s", f);
1484 	}
1485 	diff_output("\n*** ");
1486 	range(lowa, upb, ",");
1487 	diff_output(" ****\n");
1488 
1489 	/*
1490 	 * Output changes to the "old" file.  The first loop suppresses
1491 	 * output if there were no changes to the "old" file (we'll see
1492 	 * the "old" lines as context in the "new" list).
1493 	 */
1494 	do_output = 0;
1495 	for (; cvp <= context_vec_ptr; cvp++)
1496 		if (cvp->a <= cvp->b) {
1497 			cvp = context_vec_start;
1498 			do_output++;
1499 			break;
1500 		}
1501 	if (do_output) {
1502 		while (cvp <= context_vec_ptr) {
1503 			a = cvp->a;
1504 			b = cvp->b;
1505 			c = cvp->c;
1506 			d = cvp->d;
1507 
1508 			if (a <= b && c <= d)
1509 				ch = 'c';
1510 			else
1511 				ch = (a <= b) ? 'd' : 'a';
1512 
1513 			if (ch == 'a')
1514 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1515 			else {
1516 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1517 				fetch(ixold, a, b, f1,
1518 				    ch == 'c' ? '!' : '-', 0, flags);
1519 			}
1520 			lowa = b + 1;
1521 			cvp++;
1522 		}
1523 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1524 	}
1525 	/* output changes to the "new" file */
1526 	diff_output("--- ");
1527 	range(lowc, upd, ",");
1528 	diff_output(" ----\n");
1529 
1530 	do_output = 0;
1531 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1532 		if (cvp->c <= cvp->d) {
1533 			cvp = context_vec_start;
1534 			do_output++;
1535 			break;
1536 		}
1537 	if (do_output) {
1538 		while (cvp <= context_vec_ptr) {
1539 			a = cvp->a;
1540 			b = cvp->b;
1541 			c = cvp->c;
1542 			d = cvp->d;
1543 
1544 			if (a <= b && c <= d)
1545 				ch = 'c';
1546 			else
1547 				ch = (a <= b) ? 'd' : 'a';
1548 
1549 			if (ch == 'd')
1550 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1551 			else {
1552 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1553 				fetch(ixnew, c, d, f2,
1554 				    ch == 'c' ? '!' : '+', 0, flags);
1555 			}
1556 			lowc = d + 1;
1557 			cvp++;
1558 		}
1559 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1560 	}
1561 	context_vec_ptr = context_vec_start - 1;
1562 }
1563 
1564 /* dump accumulated "unified" diff changes */
1565 static void
1566 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1567 {
1568 	struct context_vec *cvp = context_vec_start;
1569 	int lowa, upb, lowc, upd;
1570 	int a, b, c, d;
1571 	char ch, *f;
1572 
1573 	if (context_vec_start > context_vec_ptr)
1574 		return;
1575 
1576 	b = d = 0;		/* gcc */
1577 	lowa = MAX(1, cvp->a - diff_context);
1578 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1579 	lowc = MAX(1, cvp->c - diff_context);
1580 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1581 
1582 	diff_output("@@ -");
1583 	uni_range(lowa, upb);
1584 	diff_output(" +");
1585 	uni_range(lowc, upd);
1586 	diff_output(" @@");
1587 	if ((flags & D_PROTOTYPE)) {
1588 		f = match_function(ixold, lowa-1, f1);
1589 		if (f != NULL)
1590 			diff_output(" %s", f);
1591 	}
1592 	diff_output("\n");
1593 
1594 	/*
1595 	 * Output changes in "unified" diff format--the old and new lines
1596 	 * are printed together.
1597 	 */
1598 	for (; cvp <= context_vec_ptr; cvp++) {
1599 		a = cvp->a;
1600 		b = cvp->b;
1601 		c = cvp->c;
1602 		d = cvp->d;
1603 
1604 		/*
1605 		 * c: both new and old changes
1606 		 * d: only changes in the old file
1607 		 * a: only changes in the new file
1608 		 */
1609 		if (a <= b && c <= d)
1610 			ch = 'c';
1611 		else
1612 			ch = (a <= b) ? 'd' : 'a';
1613 
1614 		switch (ch) {
1615 		case 'c':
1616 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1617 			fetch(ixold, a, b, f1, '-', 0, flags);
1618 			fetch(ixnew, c, d, f2, '+', 0, flags);
1619 			break;
1620 		case 'd':
1621 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1622 			fetch(ixold, a, b, f1, '-', 0, flags);
1623 			break;
1624 		case 'a':
1625 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1626 			fetch(ixnew, c, d, f2, '+', 0, flags);
1627 			break;
1628 		}
1629 		lowa = b + 1;
1630 		lowc = d + 1;
1631 	}
1632 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1633 
1634 	context_vec_ptr = context_vec_start - 1;
1635 }
1636 
1637 static void
1638 print_header(const char *file1, const char *file2)
1639 {
1640 	const char *time_format;
1641 	char buf1[256];
1642 	char buf2[256];
1643 	char end1[10];
1644 	char end2[10];
1645 	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1646 	int nsec1 = stb1.st_mtim.tv_nsec;
1647 	int nsec2 = stb2.st_mtim.tv_nsec;
1648 
1649 	time_format = "%Y-%m-%d %H:%M:%S";
1650 
1651 	if (cflag)
1652 		time_format = "%c";
1653 	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1654 	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1655 	strftime(buf1, 256, time_format, tm_ptr1);
1656 	strftime(buf2, 256, time_format, tm_ptr2);
1657 	if (!cflag) {
1658 		strftime(end1, 10, "%z", tm_ptr1);
1659 		strftime(end2, 10, "%z", tm_ptr2);
1660 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1661 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1662 	}
1663 	if (label[0] != NULL)
1664 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1665 		    label[0]);
1666 	else
1667 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1668 		    file1, buf1);
1669 	if (label[1] != NULL)
1670 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1671 		    label[1]);
1672 	else
1673 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1674 		    file2, buf2);
1675 }
1676