xref: /freebsd/usr.bin/diff/diffreg.c (revision 0e6acb26)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <stddef.h>
84 #include <stdint.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <limits.h>
90 #include <signal.h>
91 
92 #include "diff.h"
93 #include "xmalloc.h"
94 
95 #define _PATH_PR "/usr/bin/pr"
96 
97 /*
98  * diff - compare two files.
99  */
100 
101 /*
102  *	Uses an algorithm due to Harold Stone, which finds
103  *	a pair of longest identical subsequences in the two
104  *	files.
105  *
106  *	The major goal is to generate the match vector J.
107  *	J[i] is the index of the line in file1 corresponding
108  *	to line i file0. J[i] = 0 if there is no
109  *	such line in file1.
110  *
111  *	Lines are hashed so as to work in core. All potential
112  *	matches are located by sorting the lines of each file
113  *	on the hash (called ``value''). In particular, this
114  *	collects the equivalence classes in file1 together.
115  *	Subroutine equiv replaces the value of each line in
116  *	file0 by the index of the first element of its
117  *	matching equivalence in (the reordered) file1.
118  *	To save space equiv squeezes file1 into a single
119  *	array member in which the equivalence classes
120  *	are simply concatenated, except that their first
121  *	members are flagged by changing sign.
122  *
123  *	Next the indices that point into member are unsorted into
124  *	array class according to the original order of file0.
125  *
126  *	The cleverness lies in routine stone. This marches
127  *	through the lines of file0, developing a vector klist
128  *	of "k-candidates". At step i a k-candidate is a matched
129  *	pair of lines x,y (x in file0 y in file1) such that
130  *	there is a common subsequence of length k
131  *	between the first i lines of file0 and the first y
132  *	lines of file1, but there is no such subsequence for
133  *	any smaller y. x is the earliest possible mate to y
134  *	that occurs in such a subsequence.
135  *
136  *	Whenever any of the members of the equivalence class of
137  *	lines in file1 matable to a line in file0 has serial number
138  *	less than the y of some k-candidate, that k-candidate
139  *	with the smallest such y is replaced. The new
140  *	k-candidate is chained (via pred) to the current
141  *	k-1 candidate so that the actual subsequence can
142  *	be recovered. When a member has serial number greater
143  *	that the y of all k-candidates, the klist is extended.
144  *	At the end, the longest subsequence is pulled out
145  *	and placed in the array J by unravel
146  *
147  *	With J in hand, the matches there recorded are
148  *	check'ed against reality to assure that no spurious
149  *	matches have crept in due to hashing. If they have,
150  *	they are broken, and "jackpot" is recorded--a harmless
151  *	matter except that a true match for a spuriously
152  *	mated line may now be unnecessarily reported as a change.
153  *
154  *	Much of the complexity of the program comes simply
155  *	from trying to minimize core utilization and
156  *	maximize the range of doable problems by dynamically
157  *	allocating what is needed and reusing what is not.
158  *	The core requirements for problems larger than somewhat
159  *	are (in words) 2*length(file0) + length(file1) +
160  *	3*(number of k-candidates installed),  typically about
161  *	6n words for files of length n.
162  */
163 
164 struct cand {
165 	int	x;
166 	int	y;
167 	int	pred;
168 };
169 
170 static struct line {
171 	int	serial;
172 	int	value;
173 } *file[2];
174 
175 /*
176  * The following struct is used to record change information when
177  * doing a "context" or "unified" diff.  (see routine "change" to
178  * understand the highly mnemonic field names)
179  */
180 struct context_vec {
181 	int	a;		/* start line in old file */
182 	int	b;		/* end line in old file */
183 	int	c;		/* start line in new file */
184 	int	d;		/* end line in new file */
185 };
186 
187 #define	diff_output	printf
188 static FILE	*opentemp(const char *);
189 static void	 output(char *, FILE *, char *, FILE *, int);
190 static void	 check(FILE *, FILE *, int);
191 static void	 range(int, int, const char *);
192 static void	 uni_range(int, int);
193 static void	 dump_context_vec(FILE *, FILE *, int);
194 static void	 dump_unified_vec(FILE *, FILE *, int);
195 static void	 prepare(int, FILE *, size_t, int);
196 static void	 prune(void);
197 static void	 equiv(struct line *, int, struct line *, int, int *);
198 static void	 unravel(int);
199 static void	 unsort(struct line *, int, int *);
200 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
201 static void	 sort(struct line *, int);
202 static void	 print_header(const char *, const char *);
203 static int	 ignoreline(char *);
204 static int	 asciifile(FILE *);
205 static int	 fetch(long *, int, int, FILE *, int, int, int);
206 static int	 newcand(int, int, int);
207 static int	 search(int *, int, int);
208 static int	 skipline(FILE *);
209 static int	 isqrt(int);
210 static int	 stone(int *, int, int *, int *, int);
211 static int	 readhash(FILE *, int);
212 static int	 files_differ(FILE *, FILE *, int);
213 static char	*match_function(const long *, int, FILE *);
214 static char	*preadline(int, size_t, off_t);
215 
216 static int  *J;			/* will be overlaid on class */
217 static int  *class;		/* will be overlaid on file[0] */
218 static int  *klist;		/* will be overlaid on file[0] after class */
219 static int  *member;		/* will be overlaid on file[1] */
220 static int   clen;
221 static int   inifdef;		/* whether or not we are in a #ifdef block */
222 static int   len[2];
223 static int   pref, suff;	/* length of prefix and suffix */
224 static int   slen[2];
225 static int   anychange;
226 static long *ixnew;		/* will be overlaid on file[1] */
227 static long *ixold;		/* will be overlaid on klist */
228 static struct cand *clist;	/* merely a free storage pot for candidates */
229 static int   clistlen;		/* the length of clist */
230 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
231 static u_char *chrtran;		/* translation table for case-folding */
232 static struct context_vec *context_vec_start;
233 static struct context_vec *context_vec_end;
234 static struct context_vec *context_vec_ptr;
235 
236 #define FUNCTION_CONTEXT_SIZE	55
237 static char lastbuf[FUNCTION_CONTEXT_SIZE];
238 static int lastline;
239 static int lastmatchline;
240 
241 
242 /*
243  * chrtran points to one of 2 translation tables: cup2low if folding upper to
244  * lower case clow2low if not folding case
245  */
246 static u_char clow2low[256] = {
247 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
248 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
249 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
250 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
251 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
252 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
253 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
254 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
255 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
256 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
257 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
258 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
259 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
260 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
261 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
262 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
263 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
264 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
265 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
266 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
267 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
268 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
269 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
270 	0xfd, 0xfe, 0xff
271 };
272 
273 static u_char cup2low[256] = {
274 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
275 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
276 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
277 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
278 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
279 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
280 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
281 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
282 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
283 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
284 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
285 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
286 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
287 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
288 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
289 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
290 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
291 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
292 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
293 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
294 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
295 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
296 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
297 	0xfd, 0xfe, 0xff
298 };
299 
300 int
301 diffreg(char *file1, char *file2, int flags, int capsicum)
302 {
303 	FILE *f1, *f2;
304 	int i, rval;
305 	int	ostdout = -1;
306 	int pr_pd, kq;
307 	struct kevent *e;
308 	cap_rights_t rights_ro;
309 
310 	e = NULL;
311 	kq = -1;
312 	f1 = f2 = NULL;
313 	rval = D_SAME;
314 	anychange = 0;
315 	lastline = 0;
316 	lastmatchline = 0;
317 	context_vec_ptr = context_vec_start - 1;
318 	if (flags & D_IGNORECASE)
319 		chrtran = cup2low;
320 	else
321 		chrtran = clow2low;
322 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
323 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
324 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
325 		goto closem;
326 
327 	if (flags & D_EMPTY1)
328 		f1 = fopen(_PATH_DEVNULL, "r");
329 	else {
330 		if (!S_ISREG(stb1.st_mode)) {
331 			if ((f1 = opentemp(file1)) == NULL ||
332 			    fstat(fileno(f1), &stb1) < 0) {
333 				warn("%s", file1);
334 				status |= 2;
335 				goto closem;
336 			}
337 		} else if (strcmp(file1, "-") == 0)
338 			f1 = stdin;
339 		else
340 			f1 = fopen(file1, "r");
341 	}
342 	if (f1 == NULL) {
343 		warn("%s", file1);
344 		status |= 2;
345 		goto closem;
346 	}
347 
348 	if (flags & D_EMPTY2)
349 		f2 = fopen(_PATH_DEVNULL, "r");
350 	else {
351 		if (!S_ISREG(stb2.st_mode)) {
352 			if ((f2 = opentemp(file2)) == NULL ||
353 			    fstat(fileno(f2), &stb2) < 0) {
354 				warn("%s", file2);
355 				status |= 2;
356 				goto closem;
357 			}
358 		} else if (strcmp(file2, "-") == 0)
359 			f2 = stdin;
360 		else
361 			f2 = fopen(file2, "r");
362 	}
363 	if (f2 == NULL) {
364 		warn("%s", file2);
365 		status |= 2;
366 		goto closem;
367 	}
368 
369 	if (lflag) {
370 		/* redirect stdout to pr */
371 		int	 pfd[2];
372 		pid_t	pid;
373 		char	*header;
374 
375 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
376 		signal(SIGPIPE, SIG_IGN);
377 		fflush(stdout);
378 		rewind(stdout);
379 		pipe(pfd);
380 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
381 		case -1:
382 			status |= 2;
383 			free(header);
384 			err(2, "No more processes");
385 		case 0:
386 			/* child */
387 			if (pfd[0] != STDIN_FILENO) {
388 				dup2(pfd[0], STDIN_FILENO);
389 				close(pfd[0]);
390 			}
391 			close(pfd[1]);
392 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
393 			_exit(127);
394 		default:
395 
396 			/* parent */
397 			if (pfd[1] != STDOUT_FILENO) {
398 				ostdout = dup(STDOUT_FILENO);
399 				dup2(pfd[1], STDOUT_FILENO);
400 				close(pfd[1]);
401 			}
402 			close(pfd[0]);
403 			rewind(stdout);
404 			free(header);
405 			kq = kqueue();
406 			if (kq == -1)
407 				err(2, "kqueue");
408 			e = xmalloc(sizeof(struct kevent));
409 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
410 			    NULL);
411 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
412 				err(2, "kevent");
413 		}
414 	}
415 
416 	if (capsicum) {
417 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
418 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
419 		    && errno != ENOSYS)
420 			err(2, "unable to limit rights on: %s", file1);
421 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
422 		    errno != ENOSYS)
423 			err(2, "unable to limit rights on: %s", file2);
424 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
425 			/* stding has already been limited */
426 			if (caph_limit_stderr() == -1)
427 				err(2, "unable to limit stderr");
428 			if (caph_limit_stdout() == -1)
429 				err(2, "unable to limit stdout");
430 		} else if (caph_limit_stdio() == -1)
431 				err(2, "unable to limit stdio");
432 
433 		caph_cache_catpages();
434 		caph_cache_tzdata();
435 		if (cap_enter() < 0 && errno != ENOSYS)
436 			err(2, "unable to enter capability mode");
437 	}
438 
439 	switch (files_differ(f1, f2, flags)) {
440 	case 0:
441 		goto closem;
442 	case 1:
443 		break;
444 	default:
445 		/* error */
446 		status |= 2;
447 		goto closem;
448 	}
449 
450 	if ((flags & D_FORCEASCII) == 0 &&
451 	    (!asciifile(f1) || !asciifile(f2))) {
452 		rval = D_BINARY;
453 		status |= 1;
454 		goto closem;
455 	}
456 	prepare(0, f1, stb1.st_size, flags);
457 	prepare(1, f2, stb2.st_size, flags);
458 
459 	prune();
460 	sort(sfile[0], slen[0]);
461 	sort(sfile[1], slen[1]);
462 
463 	member = (int *)file[1];
464 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
465 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
466 
467 	class = (int *)file[0];
468 	unsort(sfile[0], slen[0], class);
469 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
470 
471 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
472 	clen = 0;
473 	clistlen = 100;
474 	clist = xcalloc(clistlen, sizeof(*clist));
475 	i = stone(class, slen[0], member, klist, flags);
476 	free(member);
477 	free(class);
478 
479 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
480 	unravel(klist[i]);
481 	free(clist);
482 	free(klist);
483 
484 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
485 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
486 	check(f1, f2, flags);
487 	output(file1, f1, file2, f2, flags);
488 	if (ostdout != -1 && e != NULL) {
489 		/* close the pipe to pr and restore stdout */
490 		int wstatus;
491 
492 		fflush(stdout);
493 		if (ostdout != STDOUT_FILENO) {
494 			close(STDOUT_FILENO);
495 			dup2(ostdout, STDOUT_FILENO);
496 			close(ostdout);
497 		}
498 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
499 			err(2, "kevent");
500 		wstatus = e[0].data;
501 		close(kq);
502 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
503 			errx(2, "pr exited abnormally");
504 		else if (WIFSIGNALED(wstatus))
505 			errx(2, "pr killed by signal %d",
506 			    WTERMSIG(wstatus));
507 	}
508 
509 closem:
510 	if (anychange) {
511 		status |= 1;
512 		if (rval == D_SAME)
513 			rval = D_DIFFER;
514 	}
515 	if (f1 != NULL)
516 		fclose(f1);
517 	if (f2 != NULL)
518 		fclose(f2);
519 
520 	return (rval);
521 }
522 
523 /*
524  * Check to see if the given files differ.
525  * Returns 0 if they are the same, 1 if different, and -1 on error.
526  * XXX - could use code from cmp(1) [faster]
527  */
528 static int
529 files_differ(FILE *f1, FILE *f2, int flags)
530 {
531 	char buf1[BUFSIZ], buf2[BUFSIZ];
532 	size_t i, j;
533 
534 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
535 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
536 		return (1);
537 	for (;;) {
538 		i = fread(buf1, 1, sizeof(buf1), f1);
539 		j = fread(buf2, 1, sizeof(buf2), f2);
540 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
541 			return (-1);
542 		if (i != j)
543 			return (1);
544 		if (i == 0)
545 			return (0);
546 		if (memcmp(buf1, buf2, i) != 0)
547 			return (1);
548 	}
549 }
550 
551 static FILE *
552 opentemp(const char *f)
553 {
554 	char buf[BUFSIZ], tempfile[PATH_MAX];
555 	ssize_t nread;
556 	int ifd, ofd;
557 
558 	if (strcmp(f, "-") == 0)
559 		ifd = STDIN_FILENO;
560 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
561 		return (NULL);
562 
563 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
564 
565 	if ((ofd = mkstemp(tempfile)) < 0) {
566 		close(ifd);
567 		return (NULL);
568 	}
569 	unlink(tempfile);
570 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
571 		if (write(ofd, buf, nread) != nread) {
572 			close(ifd);
573 			close(ofd);
574 			return (NULL);
575 		}
576 	}
577 	close(ifd);
578 	lseek(ofd, (off_t)0, SEEK_SET);
579 	return (fdopen(ofd, "r"));
580 }
581 
582 char *
583 splice(char *dir, char *path)
584 {
585 	char *tail, *buf;
586 	size_t dirlen;
587 
588 	dirlen = strlen(dir);
589 	while (dirlen != 0 && dir[dirlen - 1] == '/')
590 	    dirlen--;
591 	if ((tail = strrchr(path, '/')) == NULL)
592 		tail = path;
593 	else
594 		tail++;
595 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
596 	return (buf);
597 }
598 
599 static void
600 prepare(int i, FILE *fd, size_t filesize, int flags)
601 {
602 	struct line *p;
603 	int h;
604 	size_t sz, j;
605 
606 	rewind(fd);
607 
608 	sz = MIN(filesize, SIZE_MAX) / 25;
609 	if (sz < 100)
610 		sz = 100;
611 
612 	p = xcalloc(sz + 3, sizeof(*p));
613 	for (j = 0; (h = readhash(fd, flags));) {
614 		if (j == sz) {
615 			sz = sz * 3 / 2;
616 			p = xreallocarray(p, sz + 3, sizeof(*p));
617 		}
618 		p[++j].value = h;
619 	}
620 	len[i] = j;
621 	file[i] = p;
622 }
623 
624 static void
625 prune(void)
626 {
627 	int i, j;
628 
629 	for (pref = 0; pref < len[0] && pref < len[1] &&
630 	    file[0][pref + 1].value == file[1][pref + 1].value;
631 	    pref++)
632 		;
633 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
634 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
635 	    suff++)
636 		;
637 	for (j = 0; j < 2; j++) {
638 		sfile[j] = file[j] + pref;
639 		slen[j] = len[j] - pref - suff;
640 		for (i = 0; i <= slen[j]; i++)
641 			sfile[j][i].serial = i;
642 	}
643 }
644 
645 static void
646 equiv(struct line *a, int n, struct line *b, int m, int *c)
647 {
648 	int i, j;
649 
650 	i = j = 1;
651 	while (i <= n && j <= m) {
652 		if (a[i].value < b[j].value)
653 			a[i++].value = 0;
654 		else if (a[i].value == b[j].value)
655 			a[i++].value = j;
656 		else
657 			j++;
658 	}
659 	while (i <= n)
660 		a[i++].value = 0;
661 	b[m + 1].value = 0;
662 	j = 0;
663 	while (++j <= m) {
664 		c[j] = -b[j].serial;
665 		while (b[j + 1].value == b[j].value) {
666 			j++;
667 			c[j] = b[j].serial;
668 		}
669 	}
670 	c[j] = -1;
671 }
672 
673 /* Code taken from ping.c */
674 static int
675 isqrt(int n)
676 {
677 	int y, x = 1;
678 
679 	if (n == 0)
680 		return (0);
681 
682 	do { /* newton was a stinker */
683 		y = x;
684 		x = n / x;
685 		x += y;
686 		x /= 2;
687 	} while ((x - y) > 1 || (x - y) < -1);
688 
689 	return (x);
690 }
691 
692 static int
693 stone(int *a, int n, int *b, int *c, int flags)
694 {
695 	int i, k, y, j, l;
696 	int oldc, tc, oldl, sq;
697 	u_int numtries, bound;
698 
699 	if (flags & D_MINIMAL)
700 		bound = UINT_MAX;
701 	else {
702 		sq = isqrt(n);
703 		bound = MAX(256, sq);
704 	}
705 
706 	k = 0;
707 	c[0] = newcand(0, 0, 0);
708 	for (i = 1; i <= n; i++) {
709 		j = a[i];
710 		if (j == 0)
711 			continue;
712 		y = -b[j];
713 		oldl = 0;
714 		oldc = c[0];
715 		numtries = 0;
716 		do {
717 			if (y <= clist[oldc].y)
718 				continue;
719 			l = search(c, k, y);
720 			if (l != oldl + 1)
721 				oldc = c[l - 1];
722 			if (l <= k) {
723 				if (clist[c[l]].y <= y)
724 					continue;
725 				tc = c[l];
726 				c[l] = newcand(i, y, oldc);
727 				oldc = tc;
728 				oldl = l;
729 				numtries++;
730 			} else {
731 				c[l] = newcand(i, y, oldc);
732 				k++;
733 				break;
734 			}
735 		} while ((y = b[++j]) > 0 && numtries < bound);
736 	}
737 	return (k);
738 }
739 
740 static int
741 newcand(int x, int y, int pred)
742 {
743 	struct cand *q;
744 
745 	if (clen == clistlen) {
746 		clistlen = clistlen * 11 / 10;
747 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
748 	}
749 	q = clist + clen;
750 	q->x = x;
751 	q->y = y;
752 	q->pred = pred;
753 	return (clen++);
754 }
755 
756 static int
757 search(int *c, int k, int y)
758 {
759 	int i, j, l, t;
760 
761 	if (clist[c[k]].y < y)	/* quick look for typical case */
762 		return (k + 1);
763 	i = 0;
764 	j = k + 1;
765 	for (;;) {
766 		l = (i + j) / 2;
767 		if (l <= i)
768 			break;
769 		t = clist[c[l]].y;
770 		if (t > y)
771 			j = l;
772 		else if (t < y)
773 			i = l;
774 		else
775 			return (l);
776 	}
777 	return (l + 1);
778 }
779 
780 static void
781 unravel(int p)
782 {
783 	struct cand *q;
784 	int i;
785 
786 	for (i = 0; i <= len[0]; i++)
787 		J[i] = i <= pref ? i :
788 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
789 	for (q = clist + p; q->y != 0; q = clist + q->pred)
790 		J[q->x + pref] = q->y + pref;
791 }
792 
793 /*
794  * Check does double duty:
795  *  1.	ferret out any fortuitous correspondences due
796  *	to confounding by hashing (which result in "jackpot")
797  *  2.  collect random access indexes to the two files
798  */
799 static void
800 check(FILE *f1, FILE *f2, int flags)
801 {
802 	int i, j, jackpot, c, d;
803 	long ctold, ctnew;
804 
805 	rewind(f1);
806 	rewind(f2);
807 	j = 1;
808 	ixold[0] = ixnew[0] = 0;
809 	jackpot = 0;
810 	ctold = ctnew = 0;
811 	for (i = 1; i <= len[0]; i++) {
812 		if (J[i] == 0) {
813 			ixold[i] = ctold += skipline(f1);
814 			continue;
815 		}
816 		while (j < J[i]) {
817 			ixnew[j] = ctnew += skipline(f2);
818 			j++;
819 		}
820 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
821 			for (;;) {
822 				c = getc(f1);
823 				d = getc(f2);
824 				/*
825 				 * GNU diff ignores a missing newline
826 				 * in one file for -b or -w.
827 				 */
828 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
829 					if (c == EOF && d == '\n') {
830 						ctnew++;
831 						break;
832 					} else if (c == '\n' && d == EOF) {
833 						ctold++;
834 						break;
835 					}
836 				}
837 				ctold++;
838 				ctnew++;
839 				if (flags & D_STRIPCR) {
840 					if (c == '\r') {
841 						if ((c = getc(f1)) == '\n') {
842 							ctnew++;
843 							break;
844 						}
845 					}
846 					if (d == '\r') {
847 						if ((d = getc(f2)) == '\n') {
848 							ctold++;
849 							break;
850 						}
851 					}
852 				}
853 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
854 				    isspace(d)) {
855 					do {
856 						if (c == '\n')
857 							break;
858 						ctold++;
859 					} while (isspace(c = getc(f1)));
860 					do {
861 						if (d == '\n')
862 							break;
863 						ctnew++;
864 					} while (isspace(d = getc(f2)));
865 				} else if ((flags & D_IGNOREBLANKS)) {
866 					while (isspace(c) && c != '\n') {
867 						c = getc(f1);
868 						ctold++;
869 					}
870 					while (isspace(d) && d != '\n') {
871 						d = getc(f2);
872 						ctnew++;
873 					}
874 				}
875 				if (chrtran[c] != chrtran[d]) {
876 					jackpot++;
877 					J[i] = 0;
878 					if (c != '\n' && c != EOF)
879 						ctold += skipline(f1);
880 					if (d != '\n' && c != EOF)
881 						ctnew += skipline(f2);
882 					break;
883 				}
884 				if (c == '\n' || c == EOF)
885 					break;
886 			}
887 		} else {
888 			for (;;) {
889 				ctold++;
890 				ctnew++;
891 				if ((c = getc(f1)) != (d = getc(f2))) {
892 					/* jackpot++; */
893 					J[i] = 0;
894 					if (c != '\n' && c != EOF)
895 						ctold += skipline(f1);
896 					if (d != '\n' && c != EOF)
897 						ctnew += skipline(f2);
898 					break;
899 				}
900 				if (c == '\n' || c == EOF)
901 					break;
902 			}
903 		}
904 		ixold[i] = ctold;
905 		ixnew[j] = ctnew;
906 		j++;
907 	}
908 	for (; j <= len[1]; j++) {
909 		ixnew[j] = ctnew += skipline(f2);
910 	}
911 	/*
912 	 * if (jackpot)
913 	 *	fprintf(stderr, "jackpot\n");
914 	 */
915 }
916 
917 /* shellsort CACM #201 */
918 static void
919 sort(struct line *a, int n)
920 {
921 	struct line *ai, *aim, w;
922 	int j, m = 0, k;
923 
924 	if (n == 0)
925 		return;
926 	for (j = 1; j <= n; j *= 2)
927 		m = 2 * j - 1;
928 	for (m /= 2; m != 0; m /= 2) {
929 		k = n - m;
930 		for (j = 1; j <= k; j++) {
931 			for (ai = &a[j]; ai > a; ai -= m) {
932 				aim = &ai[m];
933 				if (aim < ai)
934 					break;	/* wraparound */
935 				if (aim->value > ai[0].value ||
936 				    (aim->value == ai[0].value &&
937 					aim->serial > ai[0].serial))
938 					break;
939 				w.value = ai[0].value;
940 				ai[0].value = aim->value;
941 				aim->value = w.value;
942 				w.serial = ai[0].serial;
943 				ai[0].serial = aim->serial;
944 				aim->serial = w.serial;
945 			}
946 		}
947 	}
948 }
949 
950 static void
951 unsort(struct line *f, int l, int *b)
952 {
953 	int *a, i;
954 
955 	a = xcalloc(l + 1, sizeof(*a));
956 	for (i = 1; i <= l; i++)
957 		a[f[i].serial] = f[i].value;
958 	for (i = 1; i <= l; i++)
959 		b[i] = a[i];
960 	free(a);
961 }
962 
963 static int
964 skipline(FILE *f)
965 {
966 	int i, c;
967 
968 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
969 		continue;
970 	return (i);
971 }
972 
973 static void
974 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
975 {
976 	int m, i0, i1, j0, j1;
977 
978 	rewind(f1);
979 	rewind(f2);
980 	m = len[0];
981 	J[0] = 0;
982 	J[m + 1] = len[1] + 1;
983 	if (diff_format != D_EDIT) {
984 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
985 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
986 				i0++;
987 			j0 = J[i0 - 1] + 1;
988 			i1 = i0 - 1;
989 			while (i1 < m && J[i1 + 1] == 0)
990 				i1++;
991 			j1 = J[i1 + 1] - 1;
992 			J[i1] = j1;
993 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
994 		}
995 	} else {
996 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
997 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
998 				i0--;
999 			j0 = J[i0 + 1] - 1;
1000 			i1 = i0 + 1;
1001 			while (i1 > 1 && J[i1 - 1] == 0)
1002 				i1--;
1003 			j1 = J[i1 - 1] + 1;
1004 			J[i1] = j1;
1005 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1006 		}
1007 	}
1008 	if (m == 0)
1009 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1010 	if (diff_format == D_IFDEF) {
1011 		for (;;) {
1012 #define	c i0
1013 			if ((c = getc(f1)) == EOF)
1014 				return;
1015 			diff_output("%c", c);
1016 		}
1017 #undef c
1018 	}
1019 	if (anychange != 0) {
1020 		if (diff_format == D_CONTEXT)
1021 			dump_context_vec(f1, f2, flags);
1022 		else if (diff_format == D_UNIFIED)
1023 			dump_unified_vec(f1, f2, flags);
1024 	}
1025 }
1026 
1027 static void
1028 range(int a, int b, const char *separator)
1029 {
1030 	diff_output("%d", a > b ? b : a);
1031 	if (a < b)
1032 		diff_output("%s%d", separator, b);
1033 }
1034 
1035 static void
1036 uni_range(int a, int b)
1037 {
1038 	if (a < b)
1039 		diff_output("%d,%d", a, b - a + 1);
1040 	else if (a == b)
1041 		diff_output("%d", b);
1042 	else
1043 		diff_output("%d,0", b);
1044 }
1045 
1046 static char *
1047 preadline(int fd, size_t rlen, off_t off)
1048 {
1049 	char *line;
1050 	ssize_t nr;
1051 
1052 	line = xmalloc(rlen + 1);
1053 	if ((nr = pread(fd, line, rlen, off)) < 0)
1054 		err(2, "preadline");
1055 	if (nr > 0 && line[nr-1] == '\n')
1056 		nr--;
1057 	line[nr] = '\0';
1058 	return (line);
1059 }
1060 
1061 static int
1062 ignoreline(char *line)
1063 {
1064 	int ret;
1065 
1066 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1067 	free(line);
1068 	return (ret == 0);	/* if it matched, it should be ignored. */
1069 }
1070 
1071 /*
1072  * Indicate that there is a difference between lines a and b of the from file
1073  * to get to lines c to d of the to file.  If a is greater then b then there
1074  * are no lines in the from file involved and this means that there were
1075  * lines appended (beginning at b).  If c is greater than d then there are
1076  * lines missing from the to file.
1077  */
1078 static void
1079 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1080     int *pflags)
1081 {
1082 	static size_t max_context = 64;
1083 	int i;
1084 
1085 restart:
1086 	if (diff_format != D_IFDEF && a > b && c > d)
1087 		return;
1088 	if (ignore_pats != NULL) {
1089 		char *line;
1090 		/*
1091 		 * All lines in the change, insert, or delete must
1092 		 * match an ignore pattern for the change to be
1093 		 * ignored.
1094 		 */
1095 		if (a <= b) {		/* Changes and deletes. */
1096 			for (i = a; i <= b; i++) {
1097 				line = preadline(fileno(f1),
1098 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1099 				if (!ignoreline(line))
1100 					goto proceed;
1101 			}
1102 		}
1103 		if (a > b || c <= d) {	/* Changes and inserts. */
1104 			for (i = c; i <= d; i++) {
1105 				line = preadline(fileno(f2),
1106 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1107 				if (!ignoreline(line))
1108 					goto proceed;
1109 			}
1110 		}
1111 		return;
1112 	}
1113 proceed:
1114 	if (*pflags & D_HEADER) {
1115 		diff_output("%s %s %s\n", diffargs, file1, file2);
1116 		*pflags &= ~D_HEADER;
1117 	}
1118 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1119 		/*
1120 		 * Allocate change records as needed.
1121 		 */
1122 		if (context_vec_ptr == context_vec_end - 1) {
1123 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1124 			max_context <<= 1;
1125 			context_vec_start = xreallocarray(context_vec_start,
1126 			    max_context, sizeof(*context_vec_start));
1127 			context_vec_end = context_vec_start + max_context;
1128 			context_vec_ptr = context_vec_start + offset;
1129 		}
1130 		if (anychange == 0) {
1131 			/*
1132 			 * Print the context/unidiff header first time through.
1133 			 */
1134 			print_header(file1, file2);
1135 			anychange = 1;
1136 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1137 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1138 			/*
1139 			 * If this change is more than 'diff_context' lines from the
1140 			 * previous change, dump the record and reset it.
1141 			 */
1142 			if (diff_format == D_CONTEXT)
1143 				dump_context_vec(f1, f2, *pflags);
1144 			else
1145 				dump_unified_vec(f1, f2, *pflags);
1146 		}
1147 		context_vec_ptr++;
1148 		context_vec_ptr->a = a;
1149 		context_vec_ptr->b = b;
1150 		context_vec_ptr->c = c;
1151 		context_vec_ptr->d = d;
1152 		return;
1153 	}
1154 	if (anychange == 0)
1155 		anychange = 1;
1156 	switch (diff_format) {
1157 	case D_BRIEF:
1158 		return;
1159 	case D_NORMAL:
1160 	case D_EDIT:
1161 		range(a, b, ",");
1162 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1163 		if (diff_format == D_NORMAL)
1164 			range(c, d, ",");
1165 		diff_output("\n");
1166 		break;
1167 	case D_REVERSE:
1168 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1169 		range(a, b, " ");
1170 		diff_output("\n");
1171 		break;
1172 	case D_NREVERSE:
1173 		if (a > b)
1174 			diff_output("a%d %d\n", b, d - c + 1);
1175 		else {
1176 			diff_output("d%d %d\n", a, b - a + 1);
1177 			if (!(c > d))
1178 				/* add changed lines */
1179 				diff_output("a%d %d\n", b, d - c + 1);
1180 		}
1181 		break;
1182 	}
1183 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1184 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1185 		if (a <= b && c <= d && diff_format == D_NORMAL)
1186 			diff_output("---\n");
1187 	}
1188 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1189 	if (i != 0 && diff_format == D_EDIT) {
1190 		/*
1191 		 * A non-zero return value for D_EDIT indicates that the
1192 		 * last line printed was a bare dot (".") that has been
1193 		 * escaped as ".." to prevent ed(1) from misinterpreting
1194 		 * it.  We have to add a substitute command to change this
1195 		 * back and restart where we left off.
1196 		 */
1197 		diff_output(".\n");
1198 		diff_output("%ds/.//\n", a + i - 1);
1199 		b = a + i - 1;
1200 		a = b + 1;
1201 		c += i;
1202 		goto restart;
1203 	}
1204 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1205 		diff_output(".\n");
1206 	if (inifdef) {
1207 		diff_output("#endif /* %s */\n", ifdefname);
1208 		inifdef = 0;
1209 	}
1210 }
1211 
1212 static int
1213 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1214 {
1215 	int i, j, c, lastc, col, nc;
1216 	int	newcol;
1217 
1218 	/*
1219 	 * When doing #ifdef's, copy down to current line
1220 	 * if this is the first file, so that stuff makes it to output.
1221 	 */
1222 	if (diff_format == D_IFDEF && oldfile) {
1223 		long curpos = ftell(lb);
1224 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1225 		nc = f[a > b ? b : a - 1] - curpos;
1226 		for (i = 0; i < nc; i++)
1227 			diff_output("%c", getc(lb));
1228 	}
1229 	if (a > b)
1230 		return (0);
1231 	if (diff_format == D_IFDEF) {
1232 		if (inifdef) {
1233 			diff_output("#else /* %s%s */\n",
1234 			    oldfile == 1 ? "!" : "", ifdefname);
1235 		} else {
1236 			if (oldfile)
1237 				diff_output("#ifndef %s\n", ifdefname);
1238 			else
1239 				diff_output("#ifdef %s\n", ifdefname);
1240 		}
1241 		inifdef = 1 + oldfile;
1242 	}
1243 	for (i = a; i <= b; i++) {
1244 		fseek(lb, f[i - 1], SEEK_SET);
1245 		nc = f[i] - f[i - 1];
1246 		if (diff_format != D_IFDEF && ch != '\0') {
1247 			diff_output("%c", ch);
1248 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1249 			    || diff_format == D_UNIFIED))
1250 				diff_output("\t");
1251 			else if (diff_format != D_UNIFIED)
1252 				diff_output(" ");
1253 		}
1254 		col = 0;
1255 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1256 			if ((c = getc(lb)) == EOF) {
1257 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1258 				    diff_format == D_NREVERSE)
1259 					warnx("No newline at end of file");
1260 				else
1261 					diff_output("\n\\ No newline at end of "
1262 					    "file\n");
1263 				return (0);
1264 			}
1265 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1266 				newcol = ((col/tabsize)+1)*tabsize;
1267 				do {
1268 					diff_output(" ");
1269 				} while (++col < newcol);
1270 			} else {
1271 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1272 				    && lastc == '.') {
1273 					/*
1274 					 * Don't print a bare "." line
1275 					 * since that will confuse ed(1).
1276 					 * Print ".." instead and return,
1277 					 * giving the caller an offset
1278 					 * from which to restart.
1279 					 */
1280 					diff_output(".\n");
1281 					return (i - a + 1);
1282 				}
1283 				diff_output("%c", c);
1284 				col++;
1285 			}
1286 		}
1287 	}
1288 	return (0);
1289 }
1290 
1291 /*
1292  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1293  */
1294 static int
1295 readhash(FILE *f, int flags)
1296 {
1297 	int i, t, space;
1298 	int sum;
1299 
1300 	sum = 1;
1301 	space = 0;
1302 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1303 		if (flags & D_IGNORECASE)
1304 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1305 				if (flags & D_STRIPCR && t == '\r') {
1306 					t = getc(f);
1307 					if (t == '\n')
1308 						break;
1309 					ungetc(t, f);
1310 				}
1311 				if (t == EOF) {
1312 					if (i == 0)
1313 						return (0);
1314 					break;
1315 				}
1316 				sum = sum * 127 + chrtran[t];
1317 			}
1318 		else
1319 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1320 				if (flags & D_STRIPCR && t == '\r') {
1321 					t = getc(f);
1322 					if (t == '\n')
1323 						break;
1324 					ungetc(t, f);
1325 				}
1326 				if (t == EOF) {
1327 					if (i == 0)
1328 						return (0);
1329 					break;
1330 				}
1331 				sum = sum * 127 + t;
1332 			}
1333 	} else {
1334 		for (i = 0;;) {
1335 			switch (t = getc(f)) {
1336 			case '\r':
1337 			case '\t':
1338 			case '\v':
1339 			case '\f':
1340 			case ' ':
1341 				space++;
1342 				continue;
1343 			default:
1344 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1345 					i++;
1346 					space = 0;
1347 				}
1348 				sum = sum * 127 + chrtran[t];
1349 				i++;
1350 				continue;
1351 			case EOF:
1352 				if (i == 0)
1353 					return (0);
1354 				/* FALLTHROUGH */
1355 			case '\n':
1356 				break;
1357 			}
1358 			break;
1359 		}
1360 	}
1361 	/*
1362 	 * There is a remote possibility that we end up with a zero sum.
1363 	 * Zero is used as an EOF marker, so return 1 instead.
1364 	 */
1365 	return (sum == 0 ? 1 : sum);
1366 }
1367 
1368 static int
1369 asciifile(FILE *f)
1370 {
1371 	unsigned char buf[BUFSIZ];
1372 	size_t cnt;
1373 
1374 	if (f == NULL)
1375 		return (1);
1376 
1377 	rewind(f);
1378 	cnt = fread(buf, 1, sizeof(buf), f);
1379 	return (memchr(buf, '\0', cnt) == NULL);
1380 }
1381 
1382 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1383 
1384 static char *
1385 match_function(const long *f, int pos, FILE *fp)
1386 {
1387 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1388 	size_t nc;
1389 	int last = lastline;
1390 	const char *state = NULL;
1391 
1392 	lastline = pos;
1393 	while (pos > last) {
1394 		fseek(fp, f[pos - 1], SEEK_SET);
1395 		nc = f[pos] - f[pos - 1];
1396 		if (nc >= sizeof(buf))
1397 			nc = sizeof(buf) - 1;
1398 		nc = fread(buf, 1, nc, fp);
1399 		if (nc > 0) {
1400 			buf[nc] = '\0';
1401 			buf[strcspn(buf, "\n")] = '\0';
1402 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1403 				if (begins_with(buf, "private:")) {
1404 					if (!state)
1405 						state = " (private)";
1406 				} else if (begins_with(buf, "protected:")) {
1407 					if (!state)
1408 						state = " (protected)";
1409 				} else if (begins_with(buf, "public:")) {
1410 					if (!state)
1411 						state = " (public)";
1412 				} else {
1413 					strlcpy(lastbuf, buf, sizeof lastbuf);
1414 					if (state)
1415 						strlcat(lastbuf, state,
1416 						    sizeof lastbuf);
1417 					lastmatchline = pos;
1418 					return lastbuf;
1419 				}
1420 			}
1421 		}
1422 		pos--;
1423 	}
1424 	return lastmatchline > 0 ? lastbuf : NULL;
1425 }
1426 
1427 /* dump accumulated "context" diff changes */
1428 static void
1429 dump_context_vec(FILE *f1, FILE *f2, int flags)
1430 {
1431 	struct context_vec *cvp = context_vec_start;
1432 	int lowa, upb, lowc, upd, do_output;
1433 	int a, b, c, d;
1434 	char ch, *f;
1435 
1436 	if (context_vec_start > context_vec_ptr)
1437 		return;
1438 
1439 	b = d = 0;		/* gcc */
1440 	lowa = MAX(1, cvp->a - diff_context);
1441 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1442 	lowc = MAX(1, cvp->c - diff_context);
1443 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1444 
1445 	diff_output("***************");
1446 	if ((flags & D_PROTOTYPE)) {
1447 		f = match_function(ixold, lowa-1, f1);
1448 		if (f != NULL)
1449 			diff_output(" %s", f);
1450 	}
1451 	diff_output("\n*** ");
1452 	range(lowa, upb, ",");
1453 	diff_output(" ****\n");
1454 
1455 	/*
1456 	 * Output changes to the "old" file.  The first loop suppresses
1457 	 * output if there were no changes to the "old" file (we'll see
1458 	 * the "old" lines as context in the "new" list).
1459 	 */
1460 	do_output = 0;
1461 	for (; cvp <= context_vec_ptr; cvp++)
1462 		if (cvp->a <= cvp->b) {
1463 			cvp = context_vec_start;
1464 			do_output++;
1465 			break;
1466 		}
1467 	if (do_output) {
1468 		while (cvp <= context_vec_ptr) {
1469 			a = cvp->a;
1470 			b = cvp->b;
1471 			c = cvp->c;
1472 			d = cvp->d;
1473 
1474 			if (a <= b && c <= d)
1475 				ch = 'c';
1476 			else
1477 				ch = (a <= b) ? 'd' : 'a';
1478 
1479 			if (ch == 'a')
1480 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1481 			else {
1482 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1483 				fetch(ixold, a, b, f1,
1484 				    ch == 'c' ? '!' : '-', 0, flags);
1485 			}
1486 			lowa = b + 1;
1487 			cvp++;
1488 		}
1489 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1490 	}
1491 	/* output changes to the "new" file */
1492 	diff_output("--- ");
1493 	range(lowc, upd, ",");
1494 	diff_output(" ----\n");
1495 
1496 	do_output = 0;
1497 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1498 		if (cvp->c <= cvp->d) {
1499 			cvp = context_vec_start;
1500 			do_output++;
1501 			break;
1502 		}
1503 	if (do_output) {
1504 		while (cvp <= context_vec_ptr) {
1505 			a = cvp->a;
1506 			b = cvp->b;
1507 			c = cvp->c;
1508 			d = cvp->d;
1509 
1510 			if (a <= b && c <= d)
1511 				ch = 'c';
1512 			else
1513 				ch = (a <= b) ? 'd' : 'a';
1514 
1515 			if (ch == 'd')
1516 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1517 			else {
1518 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1519 				fetch(ixnew, c, d, f2,
1520 				    ch == 'c' ? '!' : '+', 0, flags);
1521 			}
1522 			lowc = d + 1;
1523 			cvp++;
1524 		}
1525 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1526 	}
1527 	context_vec_ptr = context_vec_start - 1;
1528 }
1529 
1530 /* dump accumulated "unified" diff changes */
1531 static void
1532 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1533 {
1534 	struct context_vec *cvp = context_vec_start;
1535 	int lowa, upb, lowc, upd;
1536 	int a, b, c, d;
1537 	char ch, *f;
1538 
1539 	if (context_vec_start > context_vec_ptr)
1540 		return;
1541 
1542 	b = d = 0;		/* gcc */
1543 	lowa = MAX(1, cvp->a - diff_context);
1544 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1545 	lowc = MAX(1, cvp->c - diff_context);
1546 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1547 
1548 	diff_output("@@ -");
1549 	uni_range(lowa, upb);
1550 	diff_output(" +");
1551 	uni_range(lowc, upd);
1552 	diff_output(" @@");
1553 	if ((flags & D_PROTOTYPE)) {
1554 		f = match_function(ixold, lowa-1, f1);
1555 		if (f != NULL)
1556 			diff_output(" %s", f);
1557 	}
1558 	diff_output("\n");
1559 
1560 	/*
1561 	 * Output changes in "unified" diff format--the old and new lines
1562 	 * are printed together.
1563 	 */
1564 	for (; cvp <= context_vec_ptr; cvp++) {
1565 		a = cvp->a;
1566 		b = cvp->b;
1567 		c = cvp->c;
1568 		d = cvp->d;
1569 
1570 		/*
1571 		 * c: both new and old changes
1572 		 * d: only changes in the old file
1573 		 * a: only changes in the new file
1574 		 */
1575 		if (a <= b && c <= d)
1576 			ch = 'c';
1577 		else
1578 			ch = (a <= b) ? 'd' : 'a';
1579 
1580 		switch (ch) {
1581 		case 'c':
1582 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1583 			fetch(ixold, a, b, f1, '-', 0, flags);
1584 			fetch(ixnew, c, d, f2, '+', 0, flags);
1585 			break;
1586 		case 'd':
1587 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1588 			fetch(ixold, a, b, f1, '-', 0, flags);
1589 			break;
1590 		case 'a':
1591 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1592 			fetch(ixnew, c, d, f2, '+', 0, flags);
1593 			break;
1594 		}
1595 		lowa = b + 1;
1596 		lowc = d + 1;
1597 	}
1598 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1599 
1600 	context_vec_ptr = context_vec_start - 1;
1601 }
1602 
1603 static void
1604 print_header(const char *file1, const char *file2)
1605 {
1606 	const char *time_format;
1607 	char buf1[256];
1608 	char buf2[256];
1609 	char end1[10];
1610 	char end2[10];
1611 	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1612 	int nsec1 = stb1.st_mtim.tv_nsec;
1613 	int nsec2 = stb2.st_mtim.tv_nsec;
1614 
1615 	time_format = "%Y-%m-%d %H:%M:%S";
1616 
1617 	if (cflag)
1618 		time_format = "%c";
1619 	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1620 	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1621 	strftime(buf1, 256, time_format, tm_ptr1);
1622 	strftime(buf2, 256, time_format, tm_ptr2);
1623 	if (!cflag) {
1624 		strftime(end1, 10, "%z", tm_ptr1);
1625 		strftime(end2, 10, "%z", tm_ptr2);
1626 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1627 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1628 	}
1629 	if (label[0] != NULL)
1630 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1631 		    label[0]);
1632 	else
1633 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1634 		    file1, buf1);
1635 	if (label[1] != NULL)
1636 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1637 		    label[1]);
1638 	else
1639 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1640 		    file2, buf2);
1641 }
1642