xref: /openbsd/usr.bin/diff/diffreg.c (revision 264ca280)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 
70 #include <ctype.h>
71 #include <err.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <paths.h>
75 #include <stddef.h>
76 #include <stdint.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 #include <limits.h>
82 
83 #include "diff.h"
84 #include "xmalloc.h"
85 
86 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 #define	diff_output	printf
180 static FILE	*opentemp(const char *);
181 static void	 output(char *, FILE *, char *, FILE *, int);
182 static void	 check(FILE *, FILE *, int);
183 static void	 range(int, int, char *);
184 static void	 uni_range(int, int);
185 static void	 dump_context_vec(FILE *, FILE *, int);
186 static void	 dump_unified_vec(FILE *, FILE *, int);
187 static void	 prepare(int, FILE *, off_t, int);
188 static void	 prune(void);
189 static void	 equiv(struct line *, int, struct line *, int, int *);
190 static void	 unravel(int);
191 static void	 unsort(struct line *, int, int *);
192 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193 static void	 sort(struct line *, int);
194 static void	 print_header(const char *, const char *);
195 static int	 ignoreline(char *);
196 static int	 asciifile(FILE *);
197 static int	 fetch(long *, int, int, FILE *, int, int, int);
198 static int	 newcand(int, int, int);
199 static int	 search(int *, int, int);
200 static int	 skipline(FILE *);
201 static int	 isqrt(int);
202 static int	 stone(int *, int, int *, int *, int);
203 static int	 readhash(FILE *, int);
204 static int	 files_differ(FILE *, FILE *, int);
205 static char	*match_function(const long *, int, FILE *);
206 static char	*preadline(int, size_t, off_t);
207 
208 static int  *J;			/* will be overlaid on class */
209 static int  *class;		/* will be overlaid on file[0] */
210 static int  *klist;		/* will be overlaid on file[0] after class */
211 static int  *member;		/* will be overlaid on file[1] */
212 static int   clen;
213 static int   inifdef;		/* whether or not we are in a #ifdef block */
214 static int   len[2];
215 static int   pref, suff;	/* length of prefix and suffix */
216 static int   slen[2];
217 static int   anychange;
218 static long *ixnew;		/* will be overlaid on file[1] */
219 static long *ixold;		/* will be overlaid on klist */
220 static struct cand *clist;	/* merely a free storage pot for candidates */
221 static int   clistlen;		/* the length of clist */
222 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223 static u_char *chrtran;		/* translation table for case-folding */
224 static struct context_vec *context_vec_start;
225 static struct context_vec *context_vec_end;
226 static struct context_vec *context_vec_ptr;
227 
228 #define FUNCTION_CONTEXT_SIZE	55
229 static char lastbuf[FUNCTION_CONTEXT_SIZE];
230 static int lastline;
231 static int lastmatchline;
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *file1, char *file2, int flags)
294 {
295 	FILE *f1, *f2;
296 	int i, rval;
297 
298 	f1 = f2 = NULL;
299 	rval = D_SAME;
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	if (flags & D_IGNORECASE)
305 		chrtran = cup2low;
306 	else
307 		chrtran = clow2low;
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if ((flags & D_FORCEASCII) == 0 &&
367 	    (!asciifile(f1) || !asciifile(f2))) {
368 		rval = D_BINARY;
369 		status |= 1;
370 		goto closem;
371 	}
372 	prepare(0, f1, stb1.st_size, flags);
373 	prepare(1, f2, stb2.st_size, flags);
374 
375 	prune();
376 	sort(sfile[0], slen[0]);
377 	sort(sfile[1], slen[1]);
378 
379 	member = (int *)file[1];
380 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382 
383 	class = (int *)file[0];
384 	unsort(sfile[0], slen[0], class);
385 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386 
387 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388 	clen = 0;
389 	clistlen = 100;
390 	clist = xcalloc(clistlen, sizeof(*clist));
391 	i = stone(class, slen[0], member, klist, flags);
392 	free(member);
393 	free(class);
394 
395 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396 	unravel(klist[i]);
397 	free(clist);
398 	free(klist);
399 
400 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
402 	check(f1, f2, flags);
403 	output(file1, f1, file2, f2, flags);
404 closem:
405 	if (anychange) {
406 		status |= 1;
407 		if (rval == D_SAME)
408 			rval = D_DIFFER;
409 	}
410 	if (f1 != NULL)
411 		fclose(f1);
412 	if (f2 != NULL)
413 		fclose(f2);
414 
415 	return (rval);
416 }
417 
418 /*
419  * Check to see if the given files differ.
420  * Returns 0 if they are the same, 1 if different, and -1 on error.
421  * XXX - could use code from cmp(1) [faster]
422  */
423 static int
424 files_differ(FILE *f1, FILE *f2, int flags)
425 {
426 	char buf1[BUFSIZ], buf2[BUFSIZ];
427 	size_t i, j;
428 
429 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
430 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
431 		return (1);
432 	for (;;) {
433 		i = fread(buf1, 1, sizeof(buf1), f1);
434 		j = fread(buf2, 1, sizeof(buf2), f2);
435 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
436 			return (-1);
437 		if (i != j)
438 			return (1);
439 		if (i == 0)
440 			return (0);
441 		if (memcmp(buf1, buf2, i) != 0)
442 			return (1);
443 	}
444 }
445 
446 static FILE *
447 opentemp(const char *file)
448 {
449 	char buf[BUFSIZ], tempfile[PATH_MAX];
450 	ssize_t nread;
451 	int ifd, ofd;
452 
453 	if (strcmp(file, "-") == 0)
454 		ifd = STDIN_FILENO;
455 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
456 		return (NULL);
457 
458 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
459 
460 	if ((ofd = mkstemp(tempfile)) < 0) {
461 		close(ifd);
462 		return (NULL);
463 	}
464 	unlink(tempfile);
465 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
466 		if (write(ofd, buf, nread) != nread) {
467 			close(ifd);
468 			close(ofd);
469 			return (NULL);
470 		}
471 	}
472 	close(ifd);
473 	lseek(ofd, (off_t)0, SEEK_SET);
474 	return (fdopen(ofd, "r"));
475 }
476 
477 char *
478 splice(char *dir, char *file)
479 {
480 	char *tail, *buf;
481 	size_t dirlen;
482 
483 	dirlen = strlen(dir);
484 	while (dirlen != 0 && dir[dirlen - 1] == '/')
485 	    dirlen--;
486 	if ((tail = strrchr(file, '/')) == NULL)
487 		tail = file;
488 	else
489 		tail++;
490 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
491 	return (buf);
492 }
493 
494 static void
495 prepare(int i, FILE *fd, off_t filesize, int flags)
496 {
497 	struct line *p;
498 	int j, h;
499 	size_t sz;
500 
501 	rewind(fd);
502 
503 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
504 	if (sz < 100)
505 		sz = 100;
506 
507 	p = xcalloc(sz + 3, sizeof(*p));
508 	for (j = 0; (h = readhash(fd, flags));) {
509 		if (j == sz) {
510 			sz = sz * 3 / 2;
511 			p = xreallocarray(p, sz + 3, sizeof(*p));
512 		}
513 		p[++j].value = h;
514 	}
515 	len[i] = j;
516 	file[i] = p;
517 }
518 
519 static void
520 prune(void)
521 {
522 	int i, j;
523 
524 	for (pref = 0; pref < len[0] && pref < len[1] &&
525 	    file[0][pref + 1].value == file[1][pref + 1].value;
526 	    pref++)
527 		;
528 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
529 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
530 	    suff++)
531 		;
532 	for (j = 0; j < 2; j++) {
533 		sfile[j] = file[j] + pref;
534 		slen[j] = len[j] - pref - suff;
535 		for (i = 0; i <= slen[j]; i++)
536 			sfile[j][i].serial = i;
537 	}
538 }
539 
540 static void
541 equiv(struct line *a, int n, struct line *b, int m, int *c)
542 {
543 	int i, j;
544 
545 	i = j = 1;
546 	while (i <= n && j <= m) {
547 		if (a[i].value < b[j].value)
548 			a[i++].value = 0;
549 		else if (a[i].value == b[j].value)
550 			a[i++].value = j;
551 		else
552 			j++;
553 	}
554 	while (i <= n)
555 		a[i++].value = 0;
556 	b[m + 1].value = 0;
557 	j = 0;
558 	while (++j <= m) {
559 		c[j] = -b[j].serial;
560 		while (b[j + 1].value == b[j].value) {
561 			j++;
562 			c[j] = b[j].serial;
563 		}
564 	}
565 	c[j] = -1;
566 }
567 
568 /* Code taken from ping.c */
569 static int
570 isqrt(int n)
571 {
572 	int y, x = 1;
573 
574 	if (n == 0)
575 		return (0);
576 
577 	do { /* newton was a stinker */
578 		y = x;
579 		x = n / x;
580 		x += y;
581 		x /= 2;
582 	} while ((x - y) > 1 || (x - y) < -1);
583 
584 	return (x);
585 }
586 
587 static int
588 stone(int *a, int n, int *b, int *c, int flags)
589 {
590 	int i, k, y, j, l;
591 	int oldc, tc, oldl, sq;
592 	u_int numtries, bound;
593 
594 	if (flags & D_MINIMAL)
595 		bound = UINT_MAX;
596 	else {
597 		sq = isqrt(n);
598 		bound = MAXIMUM(256, sq);
599 	}
600 
601 	k = 0;
602 	c[0] = newcand(0, 0, 0);
603 	for (i = 1; i <= n; i++) {
604 		j = a[i];
605 		if (j == 0)
606 			continue;
607 		y = -b[j];
608 		oldl = 0;
609 		oldc = c[0];
610 		numtries = 0;
611 		do {
612 			if (y <= clist[oldc].y)
613 				continue;
614 			l = search(c, k, y);
615 			if (l != oldl + 1)
616 				oldc = c[l - 1];
617 			if (l <= k) {
618 				if (clist[c[l]].y <= y)
619 					continue;
620 				tc = c[l];
621 				c[l] = newcand(i, y, oldc);
622 				oldc = tc;
623 				oldl = l;
624 				numtries++;
625 			} else {
626 				c[l] = newcand(i, y, oldc);
627 				k++;
628 				break;
629 			}
630 		} while ((y = b[++j]) > 0 && numtries < bound);
631 	}
632 	return (k);
633 }
634 
635 static int
636 newcand(int x, int y, int pred)
637 {
638 	struct cand *q;
639 
640 	if (clen == clistlen) {
641 		clistlen = clistlen * 11 / 10;
642 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
643 	}
644 	q = clist + clen;
645 	q->x = x;
646 	q->y = y;
647 	q->pred = pred;
648 	return (clen++);
649 }
650 
651 static int
652 search(int *c, int k, int y)
653 {
654 	int i, j, l, t;
655 
656 	if (clist[c[k]].y < y)	/* quick look for typical case */
657 		return (k + 1);
658 	i = 0;
659 	j = k + 1;
660 	for (;;) {
661 		l = (i + j) / 2;
662 		if (l <= i)
663 			break;
664 		t = clist[c[l]].y;
665 		if (t > y)
666 			j = l;
667 		else if (t < y)
668 			i = l;
669 		else
670 			return (l);
671 	}
672 	return (l + 1);
673 }
674 
675 static void
676 unravel(int p)
677 {
678 	struct cand *q;
679 	int i;
680 
681 	for (i = 0; i <= len[0]; i++)
682 		J[i] = i <= pref ? i :
683 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
684 	for (q = clist + p; q->y != 0; q = clist + q->pred)
685 		J[q->x + pref] = q->y + pref;
686 }
687 
688 /*
689  * Check does double duty:
690  *  1.	ferret out any fortuitous correspondences due
691  *	to confounding by hashing (which result in "jackpot")
692  *  2.  collect random access indexes to the two files
693  */
694 static void
695 check(FILE *f1, FILE *f2, int flags)
696 {
697 	int i, j, jackpot, c, d;
698 	long ctold, ctnew;
699 
700 	rewind(f1);
701 	rewind(f2);
702 	j = 1;
703 	ixold[0] = ixnew[0] = 0;
704 	jackpot = 0;
705 	ctold = ctnew = 0;
706 	for (i = 1; i <= len[0]; i++) {
707 		if (J[i] == 0) {
708 			ixold[i] = ctold += skipline(f1);
709 			continue;
710 		}
711 		while (j < J[i]) {
712 			ixnew[j] = ctnew += skipline(f2);
713 			j++;
714 		}
715 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
716 			for (;;) {
717 				c = getc(f1);
718 				d = getc(f2);
719 				/*
720 				 * GNU diff ignores a missing newline
721 				 * in one file for -b or -w.
722 				 */
723 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
724 					if (c == EOF && d == '\n') {
725 						ctnew++;
726 						break;
727 					} else if (c == '\n' && d == EOF) {
728 						ctold++;
729 						break;
730 					}
731 				}
732 				ctold++;
733 				ctnew++;
734 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
735 				    isspace(d)) {
736 					do {
737 						if (c == '\n')
738 							break;
739 						ctold++;
740 					} while (isspace(c = getc(f1)));
741 					do {
742 						if (d == '\n')
743 							break;
744 						ctnew++;
745 					} while (isspace(d = getc(f2)));
746 				} else if ((flags & D_IGNOREBLANKS)) {
747 					while (isspace(c) && c != '\n') {
748 						c = getc(f1);
749 						ctold++;
750 					}
751 					while (isspace(d) && d != '\n') {
752 						d = getc(f2);
753 						ctnew++;
754 					}
755 				}
756 				if (chrtran[c] != chrtran[d]) {
757 					jackpot++;
758 					J[i] = 0;
759 					if (c != '\n' && c != EOF)
760 						ctold += skipline(f1);
761 					if (d != '\n' && c != EOF)
762 						ctnew += skipline(f2);
763 					break;
764 				}
765 				if (c == '\n' || c == EOF)
766 					break;
767 			}
768 		} else {
769 			for (;;) {
770 				ctold++;
771 				ctnew++;
772 				if ((c = getc(f1)) != (d = getc(f2))) {
773 					/* jackpot++; */
774 					J[i] = 0;
775 					if (c != '\n' && c != EOF)
776 						ctold += skipline(f1);
777 					if (d != '\n' && c != EOF)
778 						ctnew += skipline(f2);
779 					break;
780 				}
781 				if (c == '\n' || c == EOF)
782 					break;
783 			}
784 		}
785 		ixold[i] = ctold;
786 		ixnew[j] = ctnew;
787 		j++;
788 	}
789 	for (; j <= len[1]; j++)
790 		ixnew[j] = ctnew += skipline(f2);
791 	/*
792 	 * if (jackpot)
793 	 *	fprintf(stderr, "jackpot\n");
794 	 */
795 }
796 
797 /* shellsort CACM #201 */
798 static void
799 sort(struct line *a, int n)
800 {
801 	struct line *ai, *aim, w;
802 	int j, m = 0, k;
803 
804 	if (n == 0)
805 		return;
806 	for (j = 1; j <= n; j *= 2)
807 		m = 2 * j - 1;
808 	for (m /= 2; m != 0; m /= 2) {
809 		k = n - m;
810 		for (j = 1; j <= k; j++) {
811 			for (ai = &a[j]; ai > a; ai -= m) {
812 				aim = &ai[m];
813 				if (aim < ai)
814 					break;	/* wraparound */
815 				if (aim->value > ai[0].value ||
816 				    (aim->value == ai[0].value &&
817 					aim->serial > ai[0].serial))
818 					break;
819 				w.value = ai[0].value;
820 				ai[0].value = aim->value;
821 				aim->value = w.value;
822 				w.serial = ai[0].serial;
823 				ai[0].serial = aim->serial;
824 				aim->serial = w.serial;
825 			}
826 		}
827 	}
828 }
829 
830 static void
831 unsort(struct line *f, int l, int *b)
832 {
833 	int *a, i;
834 
835 	a = xcalloc(l + 1, sizeof(*a));
836 	for (i = 1; i <= l; i++)
837 		a[f[i].serial] = f[i].value;
838 	for (i = 1; i <= l; i++)
839 		b[i] = a[i];
840 	free(a);
841 }
842 
843 static int
844 skipline(FILE *f)
845 {
846 	int i, c;
847 
848 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849 		continue;
850 	return (i);
851 }
852 
853 static void
854 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
855 {
856 	int m, i0, i1, j0, j1;
857 
858 	rewind(f1);
859 	rewind(f2);
860 	m = len[0];
861 	J[0] = 0;
862 	J[m + 1] = len[1] + 1;
863 	if (diff_format != D_EDIT) {
864 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
866 				i0++;
867 			j0 = J[i0 - 1] + 1;
868 			i1 = i0 - 1;
869 			while (i1 < m && J[i1 + 1] == 0)
870 				i1++;
871 			j1 = J[i1 + 1] - 1;
872 			J[i1] = j1;
873 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
874 		}
875 	} else {
876 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
877 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
878 				i0--;
879 			j0 = J[i0 + 1] - 1;
880 			i1 = i0 + 1;
881 			while (i1 > 1 && J[i1 - 1] == 0)
882 				i1--;
883 			j1 = J[i1 - 1] + 1;
884 			J[i1] = j1;
885 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
886 		}
887 	}
888 	if (m == 0)
889 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
890 	if (diff_format == D_IFDEF) {
891 		for (;;) {
892 #define	c i0
893 			if ((c = getc(f1)) == EOF)
894 				return;
895 			diff_output("%c", c);
896 		}
897 #undef c
898 	}
899 	if (anychange != 0) {
900 		if (diff_format == D_CONTEXT)
901 			dump_context_vec(f1, f2, flags);
902 		else if (diff_format == D_UNIFIED)
903 			dump_unified_vec(f1, f2, flags);
904 	}
905 }
906 
907 static void
908 range(int a, int b, char *separator)
909 {
910 	diff_output("%d", a > b ? b : a);
911 	if (a < b)
912 		diff_output("%s%d", separator, b);
913 }
914 
915 static void
916 uni_range(int a, int b)
917 {
918 	if (a < b)
919 		diff_output("%d,%d", a, b - a + 1);
920 	else if (a == b)
921 		diff_output("%d", b);
922 	else
923 		diff_output("%d,0", b);
924 }
925 
926 static char *
927 preadline(int fd, size_t rlen, off_t off)
928 {
929 	char *line;
930 	ssize_t nr;
931 
932 	line = xmalloc(rlen + 1);
933 	if ((nr = pread(fd, line, rlen, off)) < 0)
934 		err(2, "preadline");
935 	if (nr > 0 && line[nr-1] == '\n')
936 		nr--;
937 	line[nr] = '\0';
938 	return (line);
939 }
940 
941 static int
942 ignoreline(char *line)
943 {
944 	int ret;
945 
946 	ret = regexec(&ignore_re, line, 0, NULL, 0);
947 	free(line);
948 	return (ret == 0);	/* if it matched, it should be ignored. */
949 }
950 
951 /*
952  * Indicate that there is a difference between lines a and b of the from file
953  * to get to lines c to d of the to file.  If a is greater then b then there
954  * are no lines in the from file involved and this means that there were
955  * lines appended (beginning at b).  If c is greater than d then there are
956  * lines missing from the to file.
957  */
958 static void
959 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
960     int *pflags)
961 {
962 	static size_t max_context = 64;
963 	int i;
964 
965 restart:
966 	if (diff_format != D_IFDEF && a > b && c > d)
967 		return;
968 	if (ignore_pats != NULL) {
969 		char *line;
970 		/*
971 		 * All lines in the change, insert, or delete must
972 		 * match an ignore pattern for the change to be
973 		 * ignored.
974 		 */
975 		if (a <= b) {		/* Changes and deletes. */
976 			for (i = a; i <= b; i++) {
977 				line = preadline(fileno(f1),
978 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
979 				if (!ignoreline(line))
980 					goto proceed;
981 			}
982 		}
983 		if (a > b || c <= d) {	/* Changes and inserts. */
984 			for (i = c; i <= d; i++) {
985 				line = preadline(fileno(f2),
986 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
987 				if (!ignoreline(line))
988 					goto proceed;
989 			}
990 		}
991 		return;
992 	}
993 proceed:
994 	if (*pflags & D_HEADER) {
995 		diff_output("%s %s %s\n", diffargs, file1, file2);
996 		*pflags &= ~D_HEADER;
997 	}
998 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
999 		/*
1000 		 * Allocate change records as needed.
1001 		 */
1002 		if (context_vec_ptr == context_vec_end - 1) {
1003 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1004 			max_context <<= 1;
1005 			context_vec_start = xreallocarray(context_vec_start,
1006 			    max_context, sizeof(*context_vec_start));
1007 			context_vec_end = context_vec_start + max_context;
1008 			context_vec_ptr = context_vec_start + offset;
1009 		}
1010 		if (anychange == 0) {
1011 			/*
1012 			 * Print the context/unidiff header first time through.
1013 			 */
1014 			print_header(file1, file2);
1015 			anychange = 1;
1016 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1017 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1018 			/*
1019 			 * If this change is more than 'diff_context' lines from the
1020 			 * previous change, dump the record and reset it.
1021 			 */
1022 			if (diff_format == D_CONTEXT)
1023 				dump_context_vec(f1, f2, *pflags);
1024 			else
1025 				dump_unified_vec(f1, f2, *pflags);
1026 		}
1027 		context_vec_ptr++;
1028 		context_vec_ptr->a = a;
1029 		context_vec_ptr->b = b;
1030 		context_vec_ptr->c = c;
1031 		context_vec_ptr->d = d;
1032 		return;
1033 	}
1034 	if (anychange == 0)
1035 		anychange = 1;
1036 	switch (diff_format) {
1037 	case D_BRIEF:
1038 		return;
1039 	case D_NORMAL:
1040 	case D_EDIT:
1041 		range(a, b, ",");
1042 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1043 		if (diff_format == D_NORMAL)
1044 			range(c, d, ",");
1045 		diff_output("\n");
1046 		break;
1047 	case D_REVERSE:
1048 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1049 		range(a, b, " ");
1050 		diff_output("\n");
1051 		break;
1052 	case D_NREVERSE:
1053 		if (a > b)
1054 			diff_output("a%d %d\n", b, d - c + 1);
1055 		else {
1056 			diff_output("d%d %d\n", a, b - a + 1);
1057 			if (!(c > d))
1058 				/* add changed lines */
1059 				diff_output("a%d %d\n", b, d - c + 1);
1060 		}
1061 		break;
1062 	}
1063 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1064 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1065 		if (a <= b && c <= d && diff_format == D_NORMAL)
1066 			diff_output("---\n");
1067 	}
1068 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1069 	if (i != 0 && diff_format == D_EDIT) {
1070 		/*
1071 		 * A non-zero return value for D_EDIT indicates that the
1072 		 * last line printed was a bare dot (".") that has been
1073 		 * escaped as ".." to prevent ed(1) from misinterpreting
1074 		 * it.  We have to add a substitute command to change this
1075 		 * back and restart where we left off.
1076 		 */
1077 		diff_output(".\n");
1078 		diff_output("%ds/.//\n", a + i - 1);
1079 		b = a + i - 1;
1080 		a = b + 1;
1081 		c += i;
1082 		goto restart;
1083 	}
1084 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1085 		diff_output(".\n");
1086 	if (inifdef) {
1087 		diff_output("#endif /* %s */\n", ifdefname);
1088 		inifdef = 0;
1089 	}
1090 }
1091 
1092 static int
1093 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1094 {
1095 	int i, j, c, lastc, col, nc;
1096 
1097 	/*
1098 	 * When doing #ifdef's, copy down to current line
1099 	 * if this is the first file, so that stuff makes it to output.
1100 	 */
1101 	if (diff_format == D_IFDEF && oldfile) {
1102 		long curpos = ftell(lb);
1103 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1104 		nc = f[a > b ? b : a - 1] - curpos;
1105 		for (i = 0; i < nc; i++)
1106 			diff_output("%c", getc(lb));
1107 	}
1108 	if (a > b)
1109 		return (0);
1110 	if (diff_format == D_IFDEF) {
1111 		if (inifdef) {
1112 			diff_output("#else /* %s%s */\n",
1113 			    oldfile == 1 ? "!" : "", ifdefname);
1114 		} else {
1115 			if (oldfile)
1116 				diff_output("#ifndef %s\n", ifdefname);
1117 			else
1118 				diff_output("#ifdef %s\n", ifdefname);
1119 		}
1120 		inifdef = 1 + oldfile;
1121 	}
1122 	for (i = a; i <= b; i++) {
1123 		fseek(lb, f[i - 1], SEEK_SET);
1124 		nc = f[i] - f[i - 1];
1125 		if (diff_format != D_IFDEF && ch != '\0') {
1126 			diff_output("%c", ch);
1127 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1128 			    || diff_format == D_UNIFIED))
1129 				diff_output("\t");
1130 			else if (diff_format != D_UNIFIED)
1131 				diff_output(" ");
1132 		}
1133 		col = 0;
1134 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1135 			if ((c = getc(lb)) == EOF) {
1136 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1137 				    diff_format == D_NREVERSE)
1138 					warnx("No newline at end of file");
1139 				else
1140 					diff_output("\n\\ No newline at end of "
1141 					    "file\n");
1142 				return (0);
1143 			}
1144 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1145 				do {
1146 					diff_output(" ");
1147 				} while (++col & 7);
1148 			} else {
1149 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1150 				    && lastc == '.') {
1151 					/*
1152 					 * Don't print a bare "." line
1153 					 * since that will confuse ed(1).
1154 					 * Print ".." instead and return,
1155 					 * giving the caller an offset
1156 					 * from which to restart.
1157 					 */
1158 					diff_output(".\n");
1159 					return (i - a + 1);
1160 				}
1161 				diff_output("%c", c);
1162 				col++;
1163 			}
1164 		}
1165 	}
1166 	return (0);
1167 }
1168 
1169 /*
1170  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1171  */
1172 static int
1173 readhash(FILE *f, int flags)
1174 {
1175 	int i, t, space;
1176 	int sum;
1177 
1178 	sum = 1;
1179 	space = 0;
1180 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1181 		if (flags & D_IGNORECASE)
1182 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1183 				if (t == EOF) {
1184 					if (i == 0)
1185 						return (0);
1186 					break;
1187 				}
1188 				sum = sum * 127 + chrtran[t];
1189 			}
1190 		else
1191 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1192 				if (t == EOF) {
1193 					if (i == 0)
1194 						return (0);
1195 					break;
1196 				}
1197 				sum = sum * 127 + t;
1198 			}
1199 	} else {
1200 		for (i = 0;;) {
1201 			switch (t = getc(f)) {
1202 			case '\t':
1203 			case '\r':
1204 			case '\v':
1205 			case '\f':
1206 			case ' ':
1207 				space++;
1208 				continue;
1209 			default:
1210 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1211 					i++;
1212 					space = 0;
1213 				}
1214 				sum = sum * 127 + chrtran[t];
1215 				i++;
1216 				continue;
1217 			case EOF:
1218 				if (i == 0)
1219 					return (0);
1220 				/* FALLTHROUGH */
1221 			case '\n':
1222 				break;
1223 			}
1224 			break;
1225 		}
1226 	}
1227 	/*
1228 	 * There is a remote possibility that we end up with a zero sum.
1229 	 * Zero is used as an EOF marker, so return 1 instead.
1230 	 */
1231 	return (sum == 0 ? 1 : sum);
1232 }
1233 
1234 static int
1235 asciifile(FILE *f)
1236 {
1237 	unsigned char buf[BUFSIZ];
1238 	size_t cnt;
1239 
1240 	if (f == NULL)
1241 		return (1);
1242 
1243 	rewind(f);
1244 	cnt = fread(buf, 1, sizeof(buf), f);
1245 	return (memchr(buf, '\0', cnt) == NULL);
1246 }
1247 
1248 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1249 
1250 static char *
1251 match_function(const long *f, int pos, FILE *fp)
1252 {
1253 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1254 	size_t nc;
1255 	int last = lastline;
1256 	char *state = NULL;
1257 
1258 	lastline = pos;
1259 	while (pos > last) {
1260 		fseek(fp, f[pos - 1], SEEK_SET);
1261 		nc = f[pos] - f[pos - 1];
1262 		if (nc >= sizeof(buf))
1263 			nc = sizeof(buf) - 1;
1264 		nc = fread(buf, 1, nc, fp);
1265 		if (nc > 0) {
1266 			buf[nc] = '\0';
1267 			buf[strcspn(buf, "\n")] = '\0';
1268 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1269 				if (begins_with(buf, "private:")) {
1270 					if (!state)
1271 						state = " (private)";
1272 				} else if (begins_with(buf, "protected:")) {
1273 					if (!state)
1274 						state = " (protected)";
1275 				} else if (begins_with(buf, "public:")) {
1276 					if (!state)
1277 						state = " (public)";
1278 				} else {
1279 					strlcpy(lastbuf, buf, sizeof lastbuf);
1280 					if (state)
1281 						strlcat(lastbuf, state,
1282 						    sizeof lastbuf);
1283 					lastmatchline = pos;
1284 					return lastbuf;
1285 				}
1286 			}
1287 		}
1288 		pos--;
1289 	}
1290 	return lastmatchline > 0 ? lastbuf : NULL;
1291 }
1292 
1293 /* dump accumulated "context" diff changes */
1294 static void
1295 dump_context_vec(FILE *f1, FILE *f2, int flags)
1296 {
1297 	struct context_vec *cvp = context_vec_start;
1298 	int lowa, upb, lowc, upd, do_output;
1299 	int a, b, c, d;
1300 	char ch, *f;
1301 
1302 	if (context_vec_start > context_vec_ptr)
1303 		return;
1304 
1305 	b = d = 0;		/* gcc */
1306 	lowa = MAXIMUM(1, cvp->a - diff_context);
1307 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1308 	lowc = MAXIMUM(1, cvp->c - diff_context);
1309 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1310 
1311 	diff_output("***************");
1312 	if ((flags & D_PROTOTYPE)) {
1313 		f = match_function(ixold, lowa-1, f1);
1314 		if (f != NULL)
1315 			diff_output(" %s", f);
1316 	}
1317 	diff_output("\n*** ");
1318 	range(lowa, upb, ",");
1319 	diff_output(" ****\n");
1320 
1321 	/*
1322 	 * Output changes to the "old" file.  The first loop suppresses
1323 	 * output if there were no changes to the "old" file (we'll see
1324 	 * the "old" lines as context in the "new" list).
1325 	 */
1326 	do_output = 0;
1327 	for (; cvp <= context_vec_ptr; cvp++)
1328 		if (cvp->a <= cvp->b) {
1329 			cvp = context_vec_start;
1330 			do_output++;
1331 			break;
1332 		}
1333 	if (do_output) {
1334 		while (cvp <= context_vec_ptr) {
1335 			a = cvp->a;
1336 			b = cvp->b;
1337 			c = cvp->c;
1338 			d = cvp->d;
1339 
1340 			if (a <= b && c <= d)
1341 				ch = 'c';
1342 			else
1343 				ch = (a <= b) ? 'd' : 'a';
1344 
1345 			if (ch == 'a')
1346 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1347 			else {
1348 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1349 				fetch(ixold, a, b, f1,
1350 				    ch == 'c' ? '!' : '-', 0, flags);
1351 			}
1352 			lowa = b + 1;
1353 			cvp++;
1354 		}
1355 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1356 	}
1357 	/* output changes to the "new" file */
1358 	diff_output("--- ");
1359 	range(lowc, upd, ",");
1360 	diff_output(" ----\n");
1361 
1362 	do_output = 0;
1363 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1364 		if (cvp->c <= cvp->d) {
1365 			cvp = context_vec_start;
1366 			do_output++;
1367 			break;
1368 		}
1369 	if (do_output) {
1370 		while (cvp <= context_vec_ptr) {
1371 			a = cvp->a;
1372 			b = cvp->b;
1373 			c = cvp->c;
1374 			d = cvp->d;
1375 
1376 			if (a <= b && c <= d)
1377 				ch = 'c';
1378 			else
1379 				ch = (a <= b) ? 'd' : 'a';
1380 
1381 			if (ch == 'd')
1382 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1383 			else {
1384 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1385 				fetch(ixnew, c, d, f2,
1386 				    ch == 'c' ? '!' : '+', 0, flags);
1387 			}
1388 			lowc = d + 1;
1389 			cvp++;
1390 		}
1391 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1392 	}
1393 	context_vec_ptr = context_vec_start - 1;
1394 }
1395 
1396 /* dump accumulated "unified" diff changes */
1397 static void
1398 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1399 {
1400 	struct context_vec *cvp = context_vec_start;
1401 	int lowa, upb, lowc, upd;
1402 	int a, b, c, d;
1403 	char ch, *f;
1404 
1405 	if (context_vec_start > context_vec_ptr)
1406 		return;
1407 
1408 	b = d = 0;		/* gcc */
1409 	lowa = MAXIMUM(1, cvp->a - diff_context);
1410 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1411 	lowc = MAXIMUM(1, cvp->c - diff_context);
1412 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1413 
1414 	diff_output("@@ -");
1415 	uni_range(lowa, upb);
1416 	diff_output(" +");
1417 	uni_range(lowc, upd);
1418 	diff_output(" @@");
1419 	if ((flags & D_PROTOTYPE)) {
1420 		f = match_function(ixold, lowa-1, f1);
1421 		if (f != NULL)
1422 			diff_output(" %s", f);
1423 	}
1424 	diff_output("\n");
1425 
1426 	/*
1427 	 * Output changes in "unified" diff format--the old and new lines
1428 	 * are printed together.
1429 	 */
1430 	for (; cvp <= context_vec_ptr; cvp++) {
1431 		a = cvp->a;
1432 		b = cvp->b;
1433 		c = cvp->c;
1434 		d = cvp->d;
1435 
1436 		/*
1437 		 * c: both new and old changes
1438 		 * d: only changes in the old file
1439 		 * a: only changes in the new file
1440 		 */
1441 		if (a <= b && c <= d)
1442 			ch = 'c';
1443 		else
1444 			ch = (a <= b) ? 'd' : 'a';
1445 
1446 		switch (ch) {
1447 		case 'c':
1448 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1449 			fetch(ixold, a, b, f1, '-', 0, flags);
1450 			fetch(ixnew, c, d, f2, '+', 0, flags);
1451 			break;
1452 		case 'd':
1453 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1454 			fetch(ixold, a, b, f1, '-', 0, flags);
1455 			break;
1456 		case 'a':
1457 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1458 			fetch(ixnew, c, d, f2, '+', 0, flags);
1459 			break;
1460 		}
1461 		lowa = b + 1;
1462 		lowc = d + 1;
1463 	}
1464 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1465 
1466 	context_vec_ptr = context_vec_start - 1;
1467 }
1468 
1469 static void
1470 print_header(const char *file1, const char *file2)
1471 {
1472 	if (label[0] != NULL)
1473 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1474 		    label[0]);
1475 	else
1476 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1477 		    file1, ctime(&stb1.st_mtime));
1478 	if (label[1] != NULL)
1479 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1480 		    label[1]);
1481 	else
1482 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1483 		    file2, ctime(&stb2.st_mtime));
1484 }
1485