xref: /openbsd/usr.bin/diff/diffreg.c (revision 404b540a)
1 /*	$OpenBSD: diffreg.c,v 1.72 2009/06/07 08:39:13 ray Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.72 2009/06/07 08:39:13 ray Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 #include "xmalloc.h"
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 static FILE	*opentemp(const char *);
180 static void	 output(char *, FILE *, char *, FILE *, int);
181 static void	 check(char *, FILE *, char *, FILE *, int);
182 static void	 range(int, int, char *);
183 static void	 uni_range(int, int);
184 static void	 dump_context_vec(FILE *, FILE *, int);
185 static void	 dump_unified_vec(FILE *, FILE *, int);
186 static void	 prepare(int, FILE *, off_t, int);
187 static void	 prune(void);
188 static void	 equiv(struct line *, int, struct line *, int, int *);
189 static void	 unravel(int);
190 static void	 unsort(struct line *, int, int *);
191 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
192 static void	 sort(struct line *, int);
193 static void	 print_header(const char *, const char *);
194 static int	 ignoreline(char *);
195 static int	 asciifile(FILE *);
196 static int	 fetch(long *, int, int, FILE *, int, int, int);
197 static int	 newcand(int, int, int);
198 static int	 search(int *, int, int);
199 static int	 skipline(FILE *);
200 static int	 isqrt(int);
201 static int	 stone(int *, int, int *, int *, int);
202 static int	 readhash(FILE *, int);
203 static int	 files_differ(FILE *, FILE *, int);
204 static char	*match_function(const long *, int, FILE *);
205 static char	*preadline(int, size_t, off_t);
206 
207 static int  *J;			/* will be overlaid on class */
208 static int  *class;		/* will be overlaid on file[0] */
209 static int  *klist;		/* will be overlaid on file[0] after class */
210 static int  *member;		/* will be overlaid on file[1] */
211 static int   clen;
212 static int   inifdef;		/* whether or not we are in a #ifdef block */
213 static int   len[2];
214 static int   pref, suff;	/* length of prefix and suffix */
215 static int   slen[2];
216 static int   anychange;
217 static long *ixnew;		/* will be overlaid on file[1] */
218 static long *ixold;		/* will be overlaid on klist */
219 static struct cand *clist;	/* merely a free storage pot for candidates */
220 static int   clistlen;		/* the length of clist */
221 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222 static u_char *chrtran;		/* translation table for case-folding */
223 static struct context_vec *context_vec_start;
224 static struct context_vec *context_vec_end;
225 static struct context_vec *context_vec_ptr;
226 
227 #define FUNCTION_CONTEXT_SIZE	55
228 static char lastbuf[FUNCTION_CONTEXT_SIZE];
229 static int lastline;
230 static int lastmatchline;
231 
232 
233 /*
234  * chrtran points to one of 2 translation tables: cup2low if folding upper to
235  * lower case clow2low if not folding case
236  */
237 u_char clow2low[256] = {
238 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
239 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
240 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
241 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
242 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
243 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
244 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
245 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
246 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
247 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
248 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
249 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
250 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
251 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
252 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
253 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
254 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
255 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
256 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
257 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
258 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
259 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
260 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
261 	0xfd, 0xfe, 0xff
262 };
263 
264 u_char cup2low[256] = {
265 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
266 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
267 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
268 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
269 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
270 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
271 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
272 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
274 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
275 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
276 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
277 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
278 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
279 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
280 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
281 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
282 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
283 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
284 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
285 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
286 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
287 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
288 	0xfd, 0xfe, 0xff
289 };
290 
291 int
292 diffreg(char *file1, char *file2, int flags)
293 {
294 	FILE *f1, *f2;
295 	int i, rval, ostdout = -1;
296 	pid_t pid = -1;
297 
298 	f1 = f2 = NULL;
299 	rval = D_SAME;
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	if (flags & D_IGNORECASE)
305 		chrtran = cup2low;
306 	else
307 		chrtran = clow2low;
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if ((flags & D_FORCEASCII) == 0 &&
367 	    (!asciifile(f1) || !asciifile(f2))) {
368 		rval = D_BINARY;
369 		status |= 1;
370 		goto closem;
371 	}
372 	if (lflag) {
373 		/* redirect stdout to pr */
374 		int pfd[2];
375 		char *header;
376 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
377 
378 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
379 		prargv[2] = header;
380 		fflush(stdout);
381 		rewind(stdout);
382 		pipe(pfd);
383 		switch ((pid = fork())) {
384 		case -1:
385 			warnx("No more processes");
386 			status |= 2;
387 			xfree(header);
388 			return (D_ERROR);
389 		case 0:
390 			/* child */
391 			if (pfd[0] != STDIN_FILENO) {
392 				dup2(pfd[0], STDIN_FILENO);
393 				close(pfd[0]);
394 			}
395 			close(pfd[1]);
396 			execv(_PATH_PR, prargv);
397 			_exit(127);
398 		default:
399 			/* parent */
400 			if (pfd[1] != STDOUT_FILENO) {
401 				ostdout = dup(STDOUT_FILENO);
402 				dup2(pfd[1], STDOUT_FILENO);
403 				close(pfd[1]);
404 			}
405 			close(pfd[0]);
406 			rewind(stdout);
407 			xfree(header);
408 		}
409 	}
410 	prepare(0, f1, stb1.st_size, flags);
411 	prepare(1, f2, stb2.st_size, flags);
412 
413 	prune();
414 	sort(sfile[0], slen[0]);
415 	sort(sfile[1], slen[1]);
416 
417 	member = (int *)file[1];
418 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
419 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
420 
421 	class = (int *)file[0];
422 	unsort(sfile[0], slen[0], class);
423 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
424 
425 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
426 	clen = 0;
427 	clistlen = 100;
428 	clist = xcalloc(clistlen, sizeof(*clist));
429 	i = stone(class, slen[0], member, klist, flags);
430 	xfree(member);
431 	xfree(class);
432 
433 	J = xrealloc(J, len[0] + 2, sizeof(*J));
434 	unravel(klist[i]);
435 	xfree(clist);
436 	xfree(klist);
437 
438 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
439 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
440 	check(file1, f1, file2, f2, flags);
441 	output(file1, f1, file2, f2, flags);
442 	if (ostdout != -1) {
443 		int wstatus;
444 
445 		/* close the pipe to pr and restore stdout */
446 		fflush(stdout);
447 		rewind(stdout);
448 		if (ostdout != STDOUT_FILENO) {
449 			close(STDOUT_FILENO);
450 			dup2(ostdout, STDOUT_FILENO);
451 			close(ostdout);
452 		}
453 		waitpid(pid, &wstatus, 0);
454 	}
455 closem:
456 	if (anychange) {
457 		status |= 1;
458 		if (rval == D_SAME)
459 			rval = D_DIFFER;
460 	}
461 	if (f1 != NULL)
462 		fclose(f1);
463 	if (f2 != NULL)
464 		fclose(f2);
465 
466 	return (rval);
467 }
468 
469 /*
470  * Check to see if the given files differ.
471  * Returns 0 if they are the same, 1 if different, and -1 on error.
472  * XXX - could use code from cmp(1) [faster]
473  */
474 static int
475 files_differ(FILE *f1, FILE *f2, int flags)
476 {
477 	char buf1[BUFSIZ], buf2[BUFSIZ];
478 	size_t i, j;
479 
480 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
481 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
482 		return (1);
483 	for (;;) {
484 		i = fread(buf1, 1, sizeof(buf1), f1);
485 		j = fread(buf2, 1, sizeof(buf2), f2);
486 		if (i != j)
487 			return (1);
488 		if (i == 0 && j == 0) {
489 			if (ferror(f1) || ferror(f2))
490 				return (1);
491 			return (0);
492 		}
493 		if (memcmp(buf1, buf2, i) != 0)
494 			return (1);
495 	}
496 }
497 
498 static FILE *
499 opentemp(const char *file)
500 {
501 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
502 	ssize_t nread;
503 	int ifd, ofd;
504 
505 	if (strcmp(file, "-") == 0)
506 		ifd = STDIN_FILENO;
507 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
508 		return (NULL);
509 
510 	if ((tempdir = getenv("TMPDIR")) == NULL)
511 		tempdir = _PATH_TMP;
512 
513 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
514 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
515 	    sizeof(tempfile)) {
516 		close(ifd);
517 		errno = ENAMETOOLONG;
518 		return (NULL);
519 	}
520 
521 	if ((ofd = mkstemp(tempfile)) < 0)
522 		return (NULL);
523 	unlink(tempfile);
524 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
525 		if (write(ofd, buf, nread) != nread) {
526 			close(ifd);
527 			close(ofd);
528 			return (NULL);
529 		}
530 	}
531 	close(ifd);
532 	lseek(ofd, (off_t)0, SEEK_SET);
533 	return (fdopen(ofd, "r"));
534 }
535 
536 char *
537 splice(char *dir, char *file)
538 {
539 	char *tail, *buf;
540 
541 	if ((tail = strrchr(file, '/')) == NULL)
542 		tail = file;
543 	else
544 		tail++;
545 	xasprintf(&buf, "%s/%s", dir, tail);
546 	return (buf);
547 }
548 
549 static void
550 prepare(int i, FILE *fd, off_t filesize, int flags)
551 {
552 	struct line *p;
553 	int j, h;
554 	size_t sz;
555 
556 	rewind(fd);
557 
558 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
559 	if (sz < 100)
560 		sz = 100;
561 
562 	p = xcalloc(sz + 3, sizeof(*p));
563 	for (j = 0; (h = readhash(fd, flags));) {
564 		if (j == sz) {
565 			sz = sz * 3 / 2;
566 			p = xrealloc(p, sz + 3, sizeof(*p));
567 		}
568 		p[++j].value = h;
569 	}
570 	len[i] = j;
571 	file[i] = p;
572 }
573 
574 static void
575 prune(void)
576 {
577 	int i, j;
578 
579 	for (pref = 0; pref < len[0] && pref < len[1] &&
580 	    file[0][pref + 1].value == file[1][pref + 1].value;
581 	    pref++)
582 		;
583 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
584 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
585 	    suff++)
586 		;
587 	for (j = 0; j < 2; j++) {
588 		sfile[j] = file[j] + pref;
589 		slen[j] = len[j] - pref - suff;
590 		for (i = 0; i <= slen[j]; i++)
591 			sfile[j][i].serial = i;
592 	}
593 }
594 
595 static void
596 equiv(struct line *a, int n, struct line *b, int m, int *c)
597 {
598 	int i, j;
599 
600 	i = j = 1;
601 	while (i <= n && j <= m) {
602 		if (a[i].value < b[j].value)
603 			a[i++].value = 0;
604 		else if (a[i].value == b[j].value)
605 			a[i++].value = j;
606 		else
607 			j++;
608 	}
609 	while (i <= n)
610 		a[i++].value = 0;
611 	b[m + 1].value = 0;
612 	j = 0;
613 	while (++j <= m) {
614 		c[j] = -b[j].serial;
615 		while (b[j + 1].value == b[j].value) {
616 			j++;
617 			c[j] = b[j].serial;
618 		}
619 	}
620 	c[j] = -1;
621 }
622 
623 /* Code taken from ping.c */
624 static int
625 isqrt(int n)
626 {
627 	int y, x = 1;
628 
629 	if (n == 0)
630 		return (0);
631 
632 	do { /* newton was a stinker */
633 		y = x;
634 		x = n / x;
635 		x += y;
636 		x /= 2;
637 	} while ((x - y) > 1 || (x - y) < -1);
638 
639 	return (x);
640 }
641 
642 static int
643 stone(int *a, int n, int *b, int *c, int flags)
644 {
645 	int i, k, y, j, l;
646 	int oldc, tc, oldl;
647 	u_int numtries;
648 
649 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
650 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
651 	    MAX(256, isqrt(n));
652 
653 	k = 0;
654 	c[0] = newcand(0, 0, 0);
655 	for (i = 1; i <= n; i++) {
656 		j = a[i];
657 		if (j == 0)
658 			continue;
659 		y = -b[j];
660 		oldl = 0;
661 		oldc = c[0];
662 		numtries = 0;
663 		do {
664 			if (y <= clist[oldc].y)
665 				continue;
666 			l = search(c, k, y);
667 			if (l != oldl + 1)
668 				oldc = c[l - 1];
669 			if (l <= k) {
670 				if (clist[c[l]].y <= y)
671 					continue;
672 				tc = c[l];
673 				c[l] = newcand(i, y, oldc);
674 				oldc = tc;
675 				oldl = l;
676 				numtries++;
677 			} else {
678 				c[l] = newcand(i, y, oldc);
679 				k++;
680 				break;
681 			}
682 		} while ((y = b[++j]) > 0 && numtries < bound);
683 	}
684 	return (k);
685 }
686 
687 static int
688 newcand(int x, int y, int pred)
689 {
690 	struct cand *q;
691 
692 	if (clen == clistlen) {
693 		clistlen = clistlen * 11 / 10;
694 		clist = xrealloc(clist, clistlen, sizeof(*clist));
695 	}
696 	q = clist + clen;
697 	q->x = x;
698 	q->y = y;
699 	q->pred = pred;
700 	return (clen++);
701 }
702 
703 static int
704 search(int *c, int k, int y)
705 {
706 	int i, j, l, t;
707 
708 	if (clist[c[k]].y < y)	/* quick look for typical case */
709 		return (k + 1);
710 	i = 0;
711 	j = k + 1;
712 	for (;;) {
713 		l = (i + j) / 2;
714 		if (l <= i)
715 			break;
716 		t = clist[c[l]].y;
717 		if (t > y)
718 			j = l;
719 		else if (t < y)
720 			i = l;
721 		else
722 			return (l);
723 	}
724 	return (l + 1);
725 }
726 
727 static void
728 unravel(int p)
729 {
730 	struct cand *q;
731 	int i;
732 
733 	for (i = 0; i <= len[0]; i++)
734 		J[i] = i <= pref ? i :
735 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
736 	for (q = clist + p; q->y != 0; q = clist + q->pred)
737 		J[q->x + pref] = q->y + pref;
738 }
739 
740 /*
741  * Check does double duty:
742  *  1.	ferret out any fortuitous correspondences due
743  *	to confounding by hashing (which result in "jackpot")
744  *  2.  collect random access indexes to the two files
745  */
746 static void
747 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
748 {
749 	int i, j, jackpot, c, d;
750 	long ctold, ctnew;
751 
752 	rewind(f1);
753 	rewind(f2);
754 	j = 1;
755 	ixold[0] = ixnew[0] = 0;
756 	jackpot = 0;
757 	ctold = ctnew = 0;
758 	for (i = 1; i <= len[0]; i++) {
759 		if (J[i] == 0) {
760 			ixold[i] = ctold += skipline(f1);
761 			continue;
762 		}
763 		while (j < J[i]) {
764 			ixnew[j] = ctnew += skipline(f2);
765 			j++;
766 		}
767 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
768 			for (;;) {
769 				c = getc(f1);
770 				d = getc(f2);
771 				/*
772 				 * GNU diff ignores a missing newline
773 				 * in one file for -b or -w.
774 				 */
775 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
776 				    ((c == EOF && d == '\n') ||
777 				    (c == '\n' && d == EOF))) {
778 					break;
779 				}
780 				ctold++;
781 				ctnew++;
782 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
783 				    isspace(d)) {
784 					do {
785 						if (c == '\n')
786 							break;
787 						ctold++;
788 					} while (isspace(c = getc(f1)));
789 					do {
790 						if (d == '\n')
791 							break;
792 						ctnew++;
793 					} while (isspace(d = getc(f2)));
794 				} else if ((flags & D_IGNOREBLANKS)) {
795 					while (isspace(c) && c != '\n') {
796 						c = getc(f1);
797 						ctold++;
798 					}
799 					while (isspace(d) && d != '\n') {
800 						d = getc(f2);
801 						ctnew++;
802 					}
803 				}
804 				if (chrtran[c] != chrtran[d]) {
805 					jackpot++;
806 					J[i] = 0;
807 					if (c != '\n' && c != EOF)
808 						ctold += skipline(f1);
809 					if (d != '\n' && c != EOF)
810 						ctnew += skipline(f2);
811 					break;
812 				}
813 				if (c == '\n' || c == EOF)
814 					break;
815 			}
816 		} else {
817 			for (;;) {
818 				ctold++;
819 				ctnew++;
820 				if ((c = getc(f1)) != (d = getc(f2))) {
821 					/* jackpot++; */
822 					J[i] = 0;
823 					if (c != '\n' && c != EOF)
824 						ctold += skipline(f1);
825 					if (d != '\n' && c != EOF)
826 						ctnew += skipline(f2);
827 					break;
828 				}
829 				if (c == '\n' || c == EOF)
830 					break;
831 			}
832 		}
833 		ixold[i] = ctold;
834 		ixnew[j] = ctnew;
835 		j++;
836 	}
837 	for (; j <= len[1]; j++)
838 		ixnew[j] = ctnew += skipline(f2);
839 	/*
840 	 * if (jackpot)
841 	 *	fprintf(stderr, "jackpot\n");
842 	 */
843 }
844 
845 /* shellsort CACM #201 */
846 static void
847 sort(struct line *a, int n)
848 {
849 	struct line *ai, *aim, w;
850 	int j, m = 0, k;
851 
852 	if (n == 0)
853 		return;
854 	for (j = 1; j <= n; j *= 2)
855 		m = 2 * j - 1;
856 	for (m /= 2; m != 0; m /= 2) {
857 		k = n - m;
858 		for (j = 1; j <= k; j++) {
859 			for (ai = &a[j]; ai > a; ai -= m) {
860 				aim = &ai[m];
861 				if (aim < ai)
862 					break;	/* wraparound */
863 				if (aim->value > ai[0].value ||
864 				    (aim->value == ai[0].value &&
865 					aim->serial > ai[0].serial))
866 					break;
867 				w.value = ai[0].value;
868 				ai[0].value = aim->value;
869 				aim->value = w.value;
870 				w.serial = ai[0].serial;
871 				ai[0].serial = aim->serial;
872 				aim->serial = w.serial;
873 			}
874 		}
875 	}
876 }
877 
878 static void
879 unsort(struct line *f, int l, int *b)
880 {
881 	int *a, i;
882 
883 	a = xcalloc(l + 1, sizeof(*a));
884 	for (i = 1; i <= l; i++)
885 		a[f[i].serial] = f[i].value;
886 	for (i = 1; i <= l; i++)
887 		b[i] = a[i];
888 	xfree(a);
889 }
890 
891 static int
892 skipline(FILE *f)
893 {
894 	int i, c;
895 
896 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
897 		continue;
898 	return (i);
899 }
900 
901 static void
902 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
903 {
904 	int m, i0, i1, j0, j1;
905 
906 	rewind(f1);
907 	rewind(f2);
908 	m = len[0];
909 	J[0] = 0;
910 	J[m + 1] = len[1] + 1;
911 	if (diff_format != D_EDIT) {
912 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
913 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
914 				i0++;
915 			j0 = J[i0 - 1] + 1;
916 			i1 = i0 - 1;
917 			while (i1 < m && J[i1 + 1] == 0)
918 				i1++;
919 			j1 = J[i1 + 1] - 1;
920 			J[i1] = j1;
921 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
922 		}
923 	} else {
924 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
925 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
926 				i0--;
927 			j0 = J[i0 + 1] - 1;
928 			i1 = i0 + 1;
929 			while (i1 > 1 && J[i1 - 1] == 0)
930 				i1--;
931 			j1 = J[i1 - 1] + 1;
932 			J[i1] = j1;
933 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
934 		}
935 	}
936 	if (m == 0)
937 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
938 	if (diff_format == D_IFDEF) {
939 		for (;;) {
940 #define	c i0
941 			if ((c = getc(f1)) == EOF)
942 				return;
943 			putchar(c);
944 		}
945 #undef c
946 	}
947 	if (anychange != 0) {
948 		if (diff_format == D_CONTEXT)
949 			dump_context_vec(f1, f2, flags);
950 		else if (diff_format == D_UNIFIED)
951 			dump_unified_vec(f1, f2, flags);
952 	}
953 }
954 
955 static void
956 range(int a, int b, char *separator)
957 {
958 	printf("%d", a > b ? b : a);
959 	if (a < b)
960 		printf("%s%d", separator, b);
961 }
962 
963 static void
964 uni_range(int a, int b)
965 {
966 	if (a < b)
967 		printf("%d,%d", a, b - a + 1);
968 	else if (a == b)
969 		printf("%d", b);
970 	else
971 		printf("%d,0", b);
972 }
973 
974 static char *
975 preadline(int fd, size_t rlen, off_t off)
976 {
977 	char *line;
978 	ssize_t nr;
979 
980 	line = xmalloc(rlen + 1);
981 	if ((nr = pread(fd, line, rlen, off)) < 0)
982 		err(1, "preadline");
983 	if (nr > 0 && line[nr-1] == '\n')
984 		nr--;
985 	line[nr] = '\0';
986 	return (line);
987 }
988 
989 static int
990 ignoreline(char *line)
991 {
992 	int ret;
993 
994 	ret = regexec(&ignore_re, line, 0, NULL, 0);
995 	xfree(line);
996 	return (ret == 0);	/* if it matched, it should be ignored. */
997 }
998 
999 /*
1000  * Indicate that there is a difference between lines a and b of the from file
1001  * to get to lines c to d of the to file.  If a is greater then b then there
1002  * are no lines in the from file involved and this means that there were
1003  * lines appended (beginning at b).  If c is greater than d then there are
1004  * lines missing from the to file.
1005  */
1006 static void
1007 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1008     int *pflags)
1009 {
1010 	static size_t max_context = 64;
1011 	int i;
1012 
1013 restart:
1014 	if (diff_format != D_IFDEF && a > b && c > d)
1015 		return;
1016 	if (ignore_pats != NULL) {
1017 		char *line;
1018 		/*
1019 		 * All lines in the change, insert, or delete must
1020 		 * match an ignore pattern for the change to be
1021 		 * ignored.
1022 		 */
1023 		if (a <= b) {		/* Changes and deletes. */
1024 			for (i = a; i <= b; i++) {
1025 				line = preadline(fileno(f1),
1026 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1027 				if (!ignoreline(line))
1028 					goto proceed;
1029 			}
1030 		}
1031 		if (a > b || c <= d) {	/* Changes and inserts. */
1032 			for (i = c; i <= d; i++) {
1033 				line = preadline(fileno(f2),
1034 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1035 				if (!ignoreline(line))
1036 					goto proceed;
1037 			}
1038 		}
1039 		return;
1040 	}
1041 proceed:
1042 	if (*pflags & D_HEADER) {
1043 		printf("%s %s %s\n", diffargs, file1, file2);
1044 		*pflags &= ~D_HEADER;
1045 	}
1046 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1047 		/*
1048 		 * Allocate change records as needed.
1049 		 */
1050 		if (context_vec_ptr == context_vec_end - 1) {
1051 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1052 			max_context <<= 1;
1053 			context_vec_start = xrealloc(context_vec_start,
1054 			    max_context, sizeof(*context_vec_start));
1055 			context_vec_end = context_vec_start + max_context;
1056 			context_vec_ptr = context_vec_start + offset;
1057 		}
1058 		if (anychange == 0) {
1059 			/*
1060 			 * Print the context/unidiff header first time through.
1061 			 */
1062 			print_header(file1, file2);
1063 			anychange = 1;
1064 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1065 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1066 			/*
1067 			 * If this change is more than 'diff_context' lines from the
1068 			 * previous change, dump the record and reset it.
1069 			 */
1070 			if (diff_format == D_CONTEXT)
1071 				dump_context_vec(f1, f2, *pflags);
1072 			else
1073 				dump_unified_vec(f1, f2, *pflags);
1074 		}
1075 		context_vec_ptr++;
1076 		context_vec_ptr->a = a;
1077 		context_vec_ptr->b = b;
1078 		context_vec_ptr->c = c;
1079 		context_vec_ptr->d = d;
1080 		return;
1081 	}
1082 	if (anychange == 0)
1083 		anychange = 1;
1084 	switch (diff_format) {
1085 	case D_BRIEF:
1086 		return;
1087 	case D_NORMAL:
1088 	case D_EDIT:
1089 		range(a, b, ",");
1090 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1091 		if (diff_format == D_NORMAL)
1092 			range(c, d, ",");
1093 		putchar('\n');
1094 		break;
1095 	case D_REVERSE:
1096 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1097 		range(a, b, " ");
1098 		putchar('\n');
1099 		break;
1100 	case D_NREVERSE:
1101 		if (a > b)
1102 			printf("a%d %d\n", b, d - c + 1);
1103 		else {
1104 			printf("d%d %d\n", a, b - a + 1);
1105 			if (!(c > d))
1106 				/* add changed lines */
1107 				printf("a%d %d\n", b, d - c + 1);
1108 		}
1109 		break;
1110 	}
1111 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1112 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1113 		if (a <= b && c <= d && diff_format == D_NORMAL)
1114 			puts("---");
1115 	}
1116 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1117 	if (i != 0 && diff_format == D_EDIT) {
1118 		/*
1119 		 * A non-zero return value for D_EDIT indicates that the
1120 		 * last line printed was a bare dot (".") that has been
1121 		 * escaped as ".." to prevent ed(1) from misinterpreting
1122 		 * it.  We have to add a substitute command to change this
1123 		 * back and restart where we left off.
1124 		 */
1125 		puts(".");
1126 		printf("%ds/^\\.\\././\n", a);
1127 		a += i;
1128 		c += i;
1129 		goto restart;
1130 	}
1131 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1132 		puts(".");
1133 	if (inifdef) {
1134 		printf("#endif /* %s */\n", ifdefname);
1135 		inifdef = 0;
1136 	}
1137 }
1138 
1139 static int
1140 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1141 {
1142 	int i, j, c, lastc, col, nc;
1143 
1144 	/*
1145 	 * When doing #ifdef's, copy down to current line
1146 	 * if this is the first file, so that stuff makes it to output.
1147 	 */
1148 	if (diff_format == D_IFDEF && oldfile) {
1149 		long curpos = ftell(lb);
1150 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1151 		nc = f[a > b ? b : a - 1] - curpos;
1152 		for (i = 0; i < nc; i++)
1153 			putchar(getc(lb));
1154 	}
1155 	if (a > b)
1156 		return (0);
1157 	if (diff_format == D_IFDEF) {
1158 		if (inifdef) {
1159 			printf("#else /* %s%s */\n",
1160 			    oldfile == 1 ? "!" : "", ifdefname);
1161 		} else {
1162 			if (oldfile)
1163 				printf("#ifndef %s\n", ifdefname);
1164 			else
1165 				printf("#ifdef %s\n", ifdefname);
1166 		}
1167 		inifdef = 1 + oldfile;
1168 	}
1169 	for (i = a; i <= b; i++) {
1170 		fseek(lb, f[i - 1], SEEK_SET);
1171 		nc = f[i] - f[i - 1];
1172 		if (diff_format != D_IFDEF && ch != '\0') {
1173 			putchar(ch);
1174 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1175 			    || diff_format == D_UNIFIED))
1176 				putchar('\t');
1177 			else if (diff_format != D_UNIFIED)
1178 				putchar(' ');
1179 		}
1180 		col = 0;
1181 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1182 			if ((c = getc(lb)) == EOF) {
1183 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1184 				    diff_format == D_NREVERSE)
1185 					warnx("No newline at end of file");
1186 				else
1187 					puts("\n\\ No newline at end of file");
1188 				return (0);
1189 			}
1190 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1191 				do {
1192 					putchar(' ');
1193 				} while (++col & 7);
1194 			} else {
1195 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1196 				    && lastc == '.') {
1197 					/*
1198 					 * Don't print a bare "." line
1199 					 * since that will confuse ed(1).
1200 					 * Print ".." instead and return,
1201 					 * giving the caller an offset
1202 					 * from which to restart.
1203 					 */
1204 					puts(".");
1205 					return (i - a + 1);
1206 				}
1207 				putchar(c);
1208 				col++;
1209 			}
1210 		}
1211 	}
1212 	return (0);
1213 }
1214 
1215 /*
1216  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1217  */
1218 static int
1219 readhash(FILE *f, int flags)
1220 {
1221 	int i, t, space;
1222 	int sum;
1223 
1224 	sum = 1;
1225 	space = 0;
1226 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1227 		if (flags & D_IGNORECASE)
1228 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1229 				if (t == EOF) {
1230 					if (i == 0)
1231 						return (0);
1232 					break;
1233 				}
1234 				sum = sum * 127 + chrtran[t];
1235 			}
1236 		else
1237 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1238 				if (t == EOF) {
1239 					if (i == 0)
1240 						return (0);
1241 					break;
1242 				}
1243 				sum = sum * 127 + t;
1244 			}
1245 	} else {
1246 		for (i = 0;;) {
1247 			switch (t = getc(f)) {
1248 			case '\t':
1249 			case '\r':
1250 			case '\v':
1251 			case '\f':
1252 			case ' ':
1253 				space++;
1254 				continue;
1255 			default:
1256 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1257 					i++;
1258 					space = 0;
1259 				}
1260 				sum = sum * 127 + chrtran[t];
1261 				i++;
1262 				continue;
1263 			case EOF:
1264 				if (i == 0)
1265 					return (0);
1266 				/* FALLTHROUGH */
1267 			case '\n':
1268 				break;
1269 			}
1270 			break;
1271 		}
1272 	}
1273 	/*
1274 	 * There is a remote possibility that we end up with a zero sum.
1275 	 * Zero is used as an EOF marker, so return 1 instead.
1276 	 */
1277 	return (sum == 0 ? 1 : sum);
1278 }
1279 
1280 static int
1281 asciifile(FILE *f)
1282 {
1283 	unsigned char buf[BUFSIZ];
1284 	size_t i, cnt;
1285 
1286 	if (f == NULL)
1287 		return (1);
1288 
1289 	rewind(f);
1290 	cnt = fread(buf, 1, sizeof(buf), f);
1291 	for (i = 0; i < cnt; i++)
1292 		if (!isprint(buf[i]) && !isspace(buf[i]))
1293 			return (0);
1294 	return (1);
1295 }
1296 
1297 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1298 
1299 static char *
1300 match_function(const long *f, int pos, FILE *fp)
1301 {
1302 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1303 	size_t nc;
1304 	int last = lastline;
1305 	char *state = NULL;
1306 
1307 	lastline = pos;
1308 	while (pos > last) {
1309 		fseek(fp, f[pos - 1], SEEK_SET);
1310 		nc = f[pos] - f[pos - 1];
1311 		if (nc >= sizeof(buf))
1312 			nc = sizeof(buf) - 1;
1313 		nc = fread(buf, 1, nc, fp);
1314 		if (nc > 0) {
1315 			buf[nc] = '\0';
1316 			buf[strcspn(buf, "\n")] = '\0';
1317 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1318 				if (begins_with(buf, "private:")) {
1319 					if (!state)
1320 						state = " (private)";
1321 				} else if (begins_with(buf, "protected:")) {
1322 					if (!state)
1323 						state = " (protected)";
1324 				} else if (begins_with(buf, "public:")) {
1325 					if (!state)
1326 						state = " (public)";
1327 				} else {
1328 					strlcpy(lastbuf, buf, sizeof lastbuf);
1329 					if (state)
1330 						strlcat(lastbuf, state,
1331 						    sizeof lastbuf);
1332 					lastmatchline = pos;
1333 					return lastbuf;
1334 				}
1335 			}
1336 		}
1337 		pos--;
1338 	}
1339 	return lastmatchline > 0 ? lastbuf : NULL;
1340 }
1341 
1342 /* dump accumulated "context" diff changes */
1343 static void
1344 dump_context_vec(FILE *f1, FILE *f2, int flags)
1345 {
1346 	struct context_vec *cvp = context_vec_start;
1347 	int lowa, upb, lowc, upd, do_output;
1348 	int a, b, c, d;
1349 	char ch, *f;
1350 
1351 	if (context_vec_start > context_vec_ptr)
1352 		return;
1353 
1354 	b = d = 0;		/* gcc */
1355 	lowa = MAX(1, cvp->a - diff_context);
1356 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1357 	lowc = MAX(1, cvp->c - diff_context);
1358 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1359 
1360 	printf("***************");
1361 	if ((flags & D_PROTOTYPE)) {
1362 		f = match_function(ixold, lowa-1, f1);
1363 		if (f != NULL) {
1364 			putchar(' ');
1365 			fputs(f, stdout);
1366 		}
1367 	}
1368 	printf("\n*** ");
1369 	range(lowa, upb, ",");
1370 	printf(" ****\n");
1371 
1372 	/*
1373 	 * Output changes to the "old" file.  The first loop suppresses
1374 	 * output if there were no changes to the "old" file (we'll see
1375 	 * the "old" lines as context in the "new" list).
1376 	 */
1377 	do_output = 0;
1378 	for (; cvp <= context_vec_ptr; cvp++)
1379 		if (cvp->a <= cvp->b) {
1380 			cvp = context_vec_start;
1381 			do_output++;
1382 			break;
1383 		}
1384 	if (do_output) {
1385 		while (cvp <= context_vec_ptr) {
1386 			a = cvp->a;
1387 			b = cvp->b;
1388 			c = cvp->c;
1389 			d = cvp->d;
1390 
1391 			if (a <= b && c <= d)
1392 				ch = 'c';
1393 			else
1394 				ch = (a <= b) ? 'd' : 'a';
1395 
1396 			if (ch == 'a')
1397 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1398 			else {
1399 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1400 				fetch(ixold, a, b, f1,
1401 				    ch == 'c' ? '!' : '-', 0, flags);
1402 			}
1403 			lowa = b + 1;
1404 			cvp++;
1405 		}
1406 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1407 	}
1408 	/* output changes to the "new" file */
1409 	printf("--- ");
1410 	range(lowc, upd, ",");
1411 	printf(" ----\n");
1412 
1413 	do_output = 0;
1414 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1415 		if (cvp->c <= cvp->d) {
1416 			cvp = context_vec_start;
1417 			do_output++;
1418 			break;
1419 		}
1420 	if (do_output) {
1421 		while (cvp <= context_vec_ptr) {
1422 			a = cvp->a;
1423 			b = cvp->b;
1424 			c = cvp->c;
1425 			d = cvp->d;
1426 
1427 			if (a <= b && c <= d)
1428 				ch = 'c';
1429 			else
1430 				ch = (a <= b) ? 'd' : 'a';
1431 
1432 			if (ch == 'd')
1433 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1434 			else {
1435 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1436 				fetch(ixnew, c, d, f2,
1437 				    ch == 'c' ? '!' : '+', 0, flags);
1438 			}
1439 			lowc = d + 1;
1440 			cvp++;
1441 		}
1442 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1443 	}
1444 	context_vec_ptr = context_vec_start - 1;
1445 }
1446 
1447 /* dump accumulated "unified" diff changes */
1448 static void
1449 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1450 {
1451 	struct context_vec *cvp = context_vec_start;
1452 	int lowa, upb, lowc, upd;
1453 	int a, b, c, d;
1454 	char ch, *f;
1455 
1456 	if (context_vec_start > context_vec_ptr)
1457 		return;
1458 
1459 	b = d = 0;		/* gcc */
1460 	lowa = MAX(1, cvp->a - diff_context);
1461 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1462 	lowc = MAX(1, cvp->c - diff_context);
1463 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1464 
1465 	fputs("@@ -", stdout);
1466 	uni_range(lowa, upb);
1467 	fputs(" +", stdout);
1468 	uni_range(lowc, upd);
1469 	fputs(" @@", stdout);
1470 	if ((flags & D_PROTOTYPE)) {
1471 		f = match_function(ixold, lowa-1, f1);
1472 		if (f != NULL) {
1473 			putchar(' ');
1474 			fputs(f, stdout);
1475 		}
1476 	}
1477 	putchar('\n');
1478 
1479 	/*
1480 	 * Output changes in "unified" diff format--the old and new lines
1481 	 * are printed together.
1482 	 */
1483 	for (; cvp <= context_vec_ptr; cvp++) {
1484 		a = cvp->a;
1485 		b = cvp->b;
1486 		c = cvp->c;
1487 		d = cvp->d;
1488 
1489 		/*
1490 		 * c: both new and old changes
1491 		 * d: only changes in the old file
1492 		 * a: only changes in the new file
1493 		 */
1494 		if (a <= b && c <= d)
1495 			ch = 'c';
1496 		else
1497 			ch = (a <= b) ? 'd' : 'a';
1498 
1499 		switch (ch) {
1500 		case 'c':
1501 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1502 			fetch(ixold, a, b, f1, '-', 0, flags);
1503 			fetch(ixnew, c, d, f2, '+', 0, flags);
1504 			break;
1505 		case 'd':
1506 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1507 			fetch(ixold, a, b, f1, '-', 0, flags);
1508 			break;
1509 		case 'a':
1510 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1511 			fetch(ixnew, c, d, f2, '+', 0, flags);
1512 			break;
1513 		}
1514 		lowa = b + 1;
1515 		lowc = d + 1;
1516 	}
1517 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1518 
1519 	context_vec_ptr = context_vec_start - 1;
1520 }
1521 
1522 static void
1523 print_header(const char *file1, const char *file2)
1524 {
1525 	if (label[0] != NULL)
1526 		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1527 		    label[0]);
1528 	else
1529 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1530 		    file1, ctime(&stb1.st_mtime));
1531 	if (label[1] != NULL)
1532 		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1533 		    label[1]);
1534 	else
1535 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1536 		    file2, ctime(&stb2.st_mtime));
1537 }
1538