1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may use this file only in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.opensource.org/licenses/cddl1.txt
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 /*
39  * Copyright 2006-2020 J. Schilling
40  *
41  * @(#)diff.c	1.84 20/09/19 J. Schilling
42  */
43 #if defined(sun)
44 #pragma ident "@(#)diff.c 1.84 20/09/19 J. Schilling"
45 #endif
46 
47 #if defined(sun)
48 #pragma ident	"@(#)diff.c	1.55	05/07/22 SMI"
49 #endif
50 
51 /*
52  *	diff - differential file comparison
53  *
54  *	Uses an algorithm due to Harold Stone, which finds
55  *	a pair of longest identical subsequences in the two
56  *	files.
57  *
58  *	The major goal is to generate the match vector J.
59  *	J[i] is the index of the line in file1 corresponding
60  *	to line i file0. J[i] = 0 if there is no
61  *	such line in file1.
62  *
63  *	Lines are hashed so as to work in core. All potential
64  *	matches are located by sorting the lines of each file
65  *	on the hash (called value). In particular, this
66  *	collects the equivalence classes in file1 together.
67  *	Subroutine equiv  replaces the value of each line in
68  *	file0 by the index of the first element of its
69  *	matching equivalence in (the reordered) file1.
70  *	To save space equiv squeezes file1 into a single
71  *	array member in which the equivalence classes
72  *	are simply concatenated, except that their first
73  *	members are flagged by changing sign.
74  *
75  *	Next the indices that point into member are unsorted into
76  *	array class according to the original order of file0.
77  *
78  *	The cleverness lies in routine stone. This marches
79  *	through the lines of file0, developing a vector klist
80  *	of "k-candidates". At step i a k-candidate is a matched
81  *	pair of lines x,y (x in file0 y in file1) such that
82  *	there is a common subsequence of lenght k
83  *	between the first i lines of file0 and the first y
84  *	lines of file1, but there is no such subsequence for
85  *	any smaller y. x is the earliest possible mate to y
86  *	that occurs in such a subsequence.
87  *
88  *	Whenever any of the members of the equivalence class of
89  *	lines in file1 matable to a line in file0 has serial number
90  *	less than the y of some k-candidate, that k-candidate
91  *	with the smallest such y is replaced. The new
92  *	k-candidate is chained (via pred) to the current
93  *	k-1 candidate so that the actual subsequence can
94  *	be recovered. When a member has serial number greater
95  *	that the y of all k-candidates, the klist is extended.
96  *	At the end, the longest subsequence is pulled out
97  *	and placed in the array J by unravel.
98  *
99  *	With J in hand, the matches there recorded are
100  *	checked against reality to assure that no spurious
101  *	matches have crept in due to hashing. If they have,
102  *	they are broken, and "jackpot " is recorded--a harmless
103  *	matter except that a true match for a spuriously
104  *	mated line may now be unnecessarily reported as a change.
105  *
106  *	Much of the complexity of the program comes simply
107  *	from trying to minimize core utilization and
108  *	maximize the range of doable problems by dynamically
109  *	allocating what is needed and reusing what is not.
110  *	The core requirements for problems larger than somewhat
111  *	are (in words) 2*length(file0) + length(file1) +
112  *	3*(number of k-candidates installed),  typically about
113  *	6n words for files of length n.
114  *
115  *	JS remarks:
116  *
117  *	The amount of memory "in use" largely depends on the allocation
118  *	algorithm as the code intensively uses realloc().
119  *
120  *	If the files differ, we always allocate
121  *	sizeof (off_t) * (lines(file0) + lines(file1))
122  *	In largefile mode we typically allocate less than 20% more
123  *	than in non-largefile mode. It seems that in largefile mode, the
124  *	amount of space need is typically ~ 5 * sizeof (int) *
125  *	(lines(file0) + lines(file1)).
126  *
127  *	Largefile mode is neeeded in order to be able to deal with files
128  *	in the 64 bit inode # range on ZFS. Sun used to use a partial
129  *	largefile mode but this is not portable, so we switched to a full
130  *	large file mode.
131  *
132  *	The code currently uses "int" for line numbers but should use "long".
133  *	If the code is changed to use "long", it would be possible to have
134  *	real large file mode in case that diff is compiled in 64 bit mode.
135  *	In 32 bit mode, there is no need to check for an integer overflow
136  *	as the process will run "out of memory" before line numbers overflow.
137  */
138 #ifdef	SCHILY_BUILD
139 
140 #include <schily/mconfig.h>
141 #include <schily/stdio.h>
142 #include <schily/wchar.h>
143 #include <schily/wctype.h>
144 #include <schily/ctype.h>
145 #include <schily/stdlib.h>
146 #include <schily/limits.h>
147 #include <schily/types.h>
148 #include <schily/stat.h>
149 #include <schily/wait.h>
150 #include <schily/unistd.h>
151 #include <schily/signal.h>
152 #include <schily/fcntl.h>
153 #include <schily/dirent.h>
154 #include <schily/maxpath.h>
155 #include <schily/nlsdefs.h>
156 #include <schily/varargs.h>
157 #include <schily/errno.h>
158 #include <schily/string.h>
159 #include <schily/time.h>
160 #include <version.h>
161 #include <schily/sysexits.h>
162 #define	VMS_VFORK_OK
163 #include <schily/vfork.h>
164 #include <schily/libport.h>
165 
166 #else	/* non-portable SunOS -only definitions BEGIN */
167 
168 #define	SCCS_DIFF		0
169 #define	_FILE_OFFSET_BITS	64
170 #define	_LARGEFILE_SOURCE
171 #include <stdio.h>
172 #include <wchar.h>
173 #include <ctype.h>
174 #include <stdlib.h>
175 #include <limits.h>
176 #include <sys/types.h>
177 #include <sys/stat.h>
178 #include <sys/wait.h>
179 #include <unistd.h>
180 #include <signal.h>
181 #include <fcntl.h>
182 #include <dirent.h>
183 #include <locale.h>
184 #include <stdarg.h>
185 #include <errno.h>
186 #include <string.h>
187 #include <sysexits.h>
188 
189 #ifndef	PROVIDER
190 #define	PROVIDER	"Schily"
191 #endif
192 #ifndef	VERSION
193 #define	VERSION		"5.08"
194 #endif
195 #ifndef	HOST_OS
196 #define	HOST_OS		"SunOS"
197 #endif
198 #ifndef	VDATE
199 #define	VDATE		""
200 #endif
201 
202 #ifdef	USE_VERSION_H
203 #include <version.h>
204 #endif
205 
206 #ifdef	__sparc
207 #define	HOST_CPU	"sparc"
208 #define	HOST_VENDOR	"Sun"
209 #endif
210 #if defined(__i386) || defined(__amd64)
211 #define	HOST_CPU	"i386"
212 #define	HOST_VENDOR	"pc"
213 #endif
214 #ifndef	HOST_CPU
215 #define	HOST_CPU	"unknown"
216 #endif
217 #ifndef	HOST_VENDOR
218 #define	HOST_VENDOR	"unknown"
219 #endif
220 #ifndef	HOST_OS
221 #define	HOST_OS		"unknown"
222 #endif
223 
224 #define	PROTOTYPES
225 #define	__PR(a)	a
226 #define	EXPORT
227 #define	LOCAL	static
228 
229 #define	HAVE_LARGEFILES
230 #define	HAVE_CFTIME
231 #define	HAVE_GETEXECNAME
232 #define	HAVE_MEMCMP
233 #define	HAVE_VFORK
234 #define	_FOUND_STAT_NSECS_
235 #define	HAVE_REALLOC_NULL
236 
237 /*
238  * Found e.g. on SunOS-5.x
239  */
240 #define	stat_ansecs(s)		((s)->st_atim.tv_nsec)
241 #define	stat_mnsecs(s)		((s)->st_mtim.tv_nsec)
242 #define	stat_cnsecs(s)		((s)->st_ctim.tv_nsec)
243 
244 #endif	/* non-portable SunOS -only definitions END */
245 
246 #ifndef	O_BINARY
247 #define	O_BINARY	0
248 #endif
249 #ifdef	HAVE_LARGEFILES
250 #undef	fseek
251 #define	fseek		fseeko
252 #undef	ftell
253 #define	ftell		ftello
254 #endif
255 
256 #ifndef	HAVE_GETEXECNAME
257 #define	error	__none_
258 #include <schily/schily.h>	/* For getexecpath() */
259 #undef	error
260 #endif
261 #include "diff.h"
262 
263 #ifndef	PATH_MAX
264 #ifdef	MAXPATHNAME
265 #define	PATH_MAX	MAXPATHNAME
266 #endif
267 #endif
268 #ifndef	PATH_MAX
269 #define	PATH_MAX	1024
270 #endif
271 
272 #ifdef	pdp11
273 #define	D_BUFSIZ	BUFSIZ
274 #else
275 #define	D_BUFSIZ	(32*1024)
276 #endif
277 
278 /*
279  * In case this is missing in /usr/include/ (e.g. SunOS-4.x)
280  */
281 extern char	*optarg;
282 extern int	optind, opterr, optopt;
283 
284 #define	CHRTRAN(x)	(iflag ? (iswupper(x) ? towlower(x) : (x)) : (x))
285 #define	NCCHRTRAN(x)	(iswupper(x) ? towlower(x) : (x))
286 #undef	max
287 #define	max(a, b)	((a) < (b) ? (b) : (a))
288 #undef	min
289 #define	min(a, b)	((a) > (b) ? (b) : (a))
290 
291 static int pref, suff;		/* length of prefix and suffix */
292 static int *class;		/* will be overlaid on file[0] */
293 static int *member;		/* will be overlaid on file[1] */
294 static int *klist;		/* will be overlaid on file[0] after class */
295 static struct cand *clist;	/* merely a free storage pot for candidates */
296 static int clen = 0;
297 static int camt = 0;
298 static int *J;			/* will be overlaid on class */
299 static off_t *ixold;		/* will be overlaid on klist */
300 static off_t *ixnew;		/* will be overlaid on file[1] */
301 
302 #define	FUNCTION_CONTEXT_SIZE	55
303 static char	lastbuf[FUNCTION_CONTEXT_SIZE];
304 static int	lastline;
305 static int	lastmatchline;
306 
307 static int	didvfork;
308 static int	mbcurmax;
309 
310 static char	nulldev[] = "/dev/null";
311 
312 static void	*talloc __PR((size_t n));
313 static void	*ralloc __PR((void *p, size_t n));
314 	int	main __PR((int argc, char **argv));
315 static void	error __PR((const char *));
316 static void	unravel __PR((int));
317 static void	check __PR((void));
318 static void	output __PR((void));
319 static void	change __PR((int, int, int, int));
320 static void	range __PR((int, int, char *));
321 static void	fetch __PR((off_t *, int, int, int, char *, int));
322 static void	dump_context_vec __PR((void));
323 static void	diffdir __PR((char **, struct pdirs *));
324 static void	calldiffdir __PR((char **, struct pdirs *));
325 static void	setfile __PR((char **, char **, char *));
326 static void	scanpr __PR((struct dir *, int, char *, char *,
327 	char *, char *, char *));
328 static void	only __PR((struct dir *, int));
329 static void	sort __PR((struct line *, int));
330 static void	unsort __PR((struct line *, int, int *));
331 static void	filename __PR((char **, char **, struct stat *, char **));
332 static int	prepare __PR((int, char *));
333 static void	prune __PR((void));
334 static void	equiv __PR((struct line *, int, struct line *, int, int *));
335 static void	done __PR((void));
336 static void	noroom __PR((void));
337 static void	usage __PR((void));
338 static void	initbuf __PR((FILE *, int, off_t));
339 static void	resetbuf __PR((int));
340 
341 static int	diffreg __PR((void));
342 static void	print_dstatus __PR((void));
343 #ifndef	DO_SPAWN_DIFF
344 static int	calldiffreg __PR((int, int));
345 #endif
346 static int	stone __PR((int *, int, int *, int *));
347 static int	newcand __PR((int, int, int));
348 static int	search __PR((int *, int, int));
349 static int	skipline __PR((int));
350 static int	readhash __PR((FILE *, int, char *));
351 static int	entcmp __PR((struct dir *, struct dir *));
352 static int	compare __PR((struct dir *));
353 static int	calldiff __PR((char *));
354 static int	binary __PR((int));
355 static int	filebinary __PR((FILE *));
356 static int	isbinary __PR((char *, int));
357 static int	useless __PR((char *));
358 static char	*copytemp __PR((char *));
359 static char	*pfiletype __PR((mode_t));
360 static struct dir *setupdir __PR((char *));
361 static off_t	ftellbuf __PR((int));
362 static wint_t	wcput	__PR((wint_t));
363 static wint_t	getbufwchar __PR((int, int *));
364 #if !defined(HAVE_CFTIME) && defined(HAVE_STRFTIME)
365 static time_t	gmtoff __PR((const time_t *clk));
366 #endif
367 static void	cf_time	__PR((char *s, size_t maxsize,
368 				char *fmt, const time_t *clk));
369 
370 static char	*match_function __PR((const off_t *, int, int));
371 
372 
373 /*
374  * error message string constants
375  */
376 #define	BAD_MB_ERR	"invalid multibyte character encountered"
377 #define	NO_PROCS_ERR	"no more processes"
378 #define	NO_MEM_ERR	"out of memory"
379 
380 static void *
talloc(n)381 talloc(n)
382 	size_t	n;
383 {
384 	void *p;
385 	p = malloc(n);
386 	if (p == NULL)
387 		noroom();
388 	return (p);
389 }
390 
391 static void *
ralloc(p,n)392 ralloc(p, n)			/* compacting reallocation */
393 	void	*p;
394 	size_t	n;
395 {
396 	void	*q;
397 #if 0
398 	free(p);
399 #endif
400 #ifndef	HAVE_REALLOC_NULL
401 	if (p == NULL)
402 		q = malloc(n);
403 	else
404 #endif
405 	q = realloc(p, n);
406 	if (q == NULL)
407 		noroom();
408 	return (q);
409 }
410 
411 #ifdef	DEBUG
412 extern	char	*_end;
413 long	oe = (long)&_end;
414 #endif
415 
416 int
main(argc,argv)417 main(argc, argv)
418 	int	argc;
419 	char	**argv;
420 {
421 	char *argp;
422 	int flag;			/* option flag read by getopt() */
423 	int i, j;
424 	char buf1[D_BUFSIZ], buf2[D_BUFSIZ];
425 
426 
427 	(void) setlocale(LC_ALL, "");
428 #if !defined(TEXT_DOMAIN)		/* Should be defined by cc -D */
429 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
430 #endif
431 	(void) textdomain(TEXT_DOMAIN);
432 
433 	mbcurmax = MB_CUR_MAX;
434 
435 	diffargv = argv;
436 	whichtemp = 0;
437 	while ((flag =
438 	    getopt(argc, argv,
439 		    "()abBdiptwcuefhnqlrsNC:D:S:U:V(version)")) != EOF) {
440 		switch (flag) {
441 		case 'D':
442 			opt = D_IFDEF;
443 			wantelses = 1;
444 			ifdef1 = "";
445 			ifdef2 = optarg;
446 			break;
447 
448 		case 'a':
449 			aflag = 1;
450 			break;
451 
452 		case 'b':
453 			bflag = 1;
454 			break;
455 
456 		case 'B':
457 			Bflag = 1;
458 			break;
459 
460 		case 'd':		/* -d is a no-op for GNU diff compat */
461 			break;
462 
463 		case 'C':
464 		case 'U':
465 			opt = D_CONTEXT;
466 			argp = optarg;
467 			context = 0;
468 			while (*argp >= '0' && *argp <= '9')
469 				context *= 10, context += *argp++ - '0';
470 			if (*argp)
471 				error(gettext("use [ -C num | -U num ]"));
472 			if (flag == 'U')
473 				uflag++;
474 			else
475 				uflag = 0;
476 			break;
477 
478 		case 'c':
479 		case 'u':
480 			opt = D_CONTEXT;
481 			context = 3;
482 			if (flag == 'u')
483 				uflag++;
484 			else
485 				uflag = 0;
486 			break;
487 
488 		case 'N':
489 			Nflag = 1;
490 			break;
491 
492 		case 'e':
493 			opt = D_EDIT;
494 			break;
495 
496 		case 'f':
497 			opt = D_REVERSE;
498 			break;
499 
500 		case 'h':
501 			hflag++;
502 			break;
503 
504 		case 'i':
505 			iflag = 1;
506 			break;
507 
508 		case 'l':
509 			lflag = 1;
510 			break;
511 
512 		case 'n':
513 			opt = D_NREVERSE;
514 			break;
515 
516 		case 'p':
517 			pflag = 1;
518 			if (opt == D_NORMAL) {
519 				opt = D_CONTEXT;
520 				context = 3;
521 				uflag = 0;
522 			}
523 			break;
524 
525 		case 'q':
526 			opt = D_BRIEF;
527 			break;
528 
529 		case 'r':
530 			rflag = 1;
531 			break;
532 
533 		case 'S':
534 			start = optarg;
535 			break;
536 
537 		case 's':
538 			sflag = 1;
539 			break;
540 
541 		case 't':
542 			tflag = 1;
543 			break;
544 
545 		case 'w':
546 			wflag = 1;
547 			break;
548 
549 		case '?':
550 			usage();
551 			break;
552 
553 		case 'V':		/* version */
554 			printf(gettext(
555 			    "diff %s-%s version %s%s%s (%s-%s-%s)\n"),
556 				PROVIDER,
557 				SCCS_DIFF ? "SCCS":"diff",
558 				VERSION,
559 				*VDATE ? " ":"",
560 				VDATE,
561 				HOST_CPU, HOST_VENDOR, HOST_OS);
562 			exit(EX_OK);
563 
564 		default:
565 			/* Not sure how it would get here, but just in case */
566 			(void) fprintf(stderr, "diff: ");
567 			(void) fprintf(stderr,
568 				gettext("invalid option -%c\n"), flag);
569 			usage();
570 		}
571 	}
572 
573 	argc -= optind;
574 	argv = &argv[optind];
575 
576 	if (opt != D_CONTEXT && uflag)
577 		uflag = 0;
578 
579 	if (argc != 2)
580 		error(gettext("two filename arguments required"));
581 
582 	file1 = argv[0];
583 	file2 = argv[1];
584 	file1ok = file2ok = 1;
585 	status = 0;
586 
587 	if (hflag) {
588 		if (opt) {
589 			error(
590 gettext("-h doesn't support -e, -f, -n, -c, -q, -u, or -D"));
591 		} else {
592 			diffargv[0] = "diffh";
593 			(void) execv(diffh, diffargv);
594 			(void) fprintf(stderr, "diffh: ");
595 			perror(diffh);
596 			status = 2;
597 			done();
598 		}
599 
600 	}
601 
602 	if (Nflag) {
603 		int	fail = 0;
604 
605 		if (stat(file1, &stb1) < 0) {
606 			if (errno == ENOENT) {		/* file1 nonexisting */
607 				file1ok = 0;
608 				stb1.st_mode = S_IFREG;	/* assume plain file */
609 				stb1.st_size = 0;
610 				input[0] = fopen(nulldev, "rb");
611 				fail += 1;
612 			}
613 		}
614 		if (stat(file2, &stb2) < 0) {
615 			if (errno == ENOENT) {
616 				file2ok = 0;
617 				if (!fail)
618 					stb2.st_mode = stb1.st_mode;
619 				else
620 					stb2.st_mode = S_IFREG;
621 				stb2.st_size = 0;
622 				input[1] = fopen(nulldev, "rb");
623 				fail += 1;
624 			}
625 		} else if (fail == 1) {			/* file2 exists	    */
626 			stb1.st_mode = stb2.st_mode;	/* file1 like file2 */
627 		}
628 		if (fail > 1) {				/* both files missing */
629 			if (input[0] != NULL)
630 				fclose(input[0]);
631 			if (input[1] != NULL)
632 				fclose(input[1]);
633 			input[0] = NULL;
634 			input[1] = NULL;
635 		}
636 	}
637 
638 	if (strcmp(file1, "-") == 0) {
639 		if (fstat(fileno(stdin), &stb1) == 0) {
640 			stb1.st_mode = S_IFREG;
641 		} else {
642 			(void) fprintf(stderr, "diff: ");
643 			perror("stdin");
644 			status = 2;
645 			done();
646 		}
647 	} else if (input[0] == NULL && stat(file1, &stb1) < 0) {
648 		(void) fprintf(stderr, "diff: ");
649 		perror(file1);
650 		status = 2;
651 		done();
652 	}
653 
654 	if (strcmp(file2, "-") == 0) {
655 		if (strcmp(file1, "-") == 0) {
656 			error(gettext("cannot specify - -"));
657 		} else {
658 			if (fstat(fileno(stdin), &stb2) == 0) {
659 				stb2.st_mode = S_IFREG;
660 			} else {
661 				(void) fprintf(stderr, "diff: ");
662 				perror("stdin");
663 				status = 2;
664 				done();
665 			}
666 		}
667 	} else if (input[1] == NULL && stat(file2, &stb2) < 0) {
668 		(void) fprintf(stderr, "diff: ");
669 		perror(file2);
670 		status = 2;
671 		done();
672 	}
673 
674 	if ((stb1.st_mode & S_IFMT) == S_IFDIR &&
675 	    (stb2.st_mode & S_IFMT) == S_IFDIR) {
676 		diffdir(argv, (struct pdirs *)NULL);
677 		done();
678 	    }
679 
680 	filename(&file1, &file2, &stb1, &input_file1);
681 	filename(&file2, &file1, &stb2, &input_file2);
682 	if (input[0] == NULL && (input[0] = fopen(file1, "rb")) == NULL) {
683 		(void) fprintf(stderr, "diff: ");
684 		perror(file1);
685 		status = 2;
686 		done();
687 	}
688 	initbuf(input[0], 0, (off_t)0);
689 
690 	if (input[1] == NULL && (input[1] = fopen(file2, "rb")) == NULL) {
691 		(void) fprintf(stderr, "diff: ");
692 		perror(file2);
693 		status = 2;
694 		done();
695 	}
696 	initbuf(input[1], 1, (off_t)0);
697 
698 #ifdef	HAVE_SETVBUF
699 	setvbuf(input[0], NULL, _IOFBF, 32*1024);
700 	setvbuf(input[1], NULL, _IOFBF, 32*1024);
701 #endif
702 
703 	if (stb1.st_size != stb2.st_size)
704 		goto notsame;
705 
706 	for (;;) {
707 		i = fread(buf1, 1, D_BUFSIZ, input[0]);
708 		j = fread(buf2, 1, D_BUFSIZ, input[1]);
709 		if (ferror(input[0]) || ferror(input[1])) {
710 			(void) fprintf(stderr, "diff: ");
711 			(void) fprintf(stderr, gettext("Error reading "));
712 			perror(ferror(input[0])? file1:file2);
713 			(void) fclose(input[0]);
714 			(void) fclose(input[1]);
715 			status = 2;
716 			done();
717 		}
718 		if (i != j)
719 			goto notsame;
720 		if (i == 0 && j == 0) {
721 			/* files are the same; diff -D needs to print one */
722 			if (opt == D_IFDEF) {
723 				rewind(input[0]);
724 				while ((i =
725 				    fread(buf1, 1, D_BUFSIZ, input[0])) > 0) {
726 					(void) fwrite(buf1, 1, i, stdout);
727 				}
728 			}
729 			(void) fclose(input[0]);
730 			(void) fclose(input[1]);
731 			status = 0;
732 			goto same;		/* files don't differ */
733 		}
734 #ifdef	HAVE_MEMCMP
735 		if (memcmp(buf1, buf2, i))
736 			goto notsame;
737 #else
738 		for (j = 0; j < i; j++)
739 			if (buf1[j] != buf2[j])
740 				goto notsame;
741 #endif
742 	}
743 
744 notsame:
745 	status = 1;
746 	if (!aflag &&
747 	    (filebinary(input[0]) || filebinary(input[1]))) {
748 		if (ferror(input[0]) || ferror(input[1])) {
749 			(void) fprintf(stderr, "diff: ");
750 			(void) fprintf(stderr, gettext("Error reading "));
751 			perror(ferror(input[0])? file1:file2);
752 			(void) fclose(input[0]);
753 			(void) fclose(input[1]);
754 			status = 2;
755 			done();
756 		}
757 		(void) printf(gettext("Binary files %s and %s differ\n"),
758 		    file1, file2);
759 		(void) fclose(input[0]);
760 		(void) fclose(input[1]);
761 		done();
762 	}
763 	anychange = diffreg();
764 	status = anychange;
765 
766 same:
767 	print_dstatus();
768 
769 #ifdef	DEBUG
770 	fprintf(stderr, "Allocated space: %ld Bytes\n", (long)sbrk(0) - oe);
771 #endif
772 	done();
773 	/*NOTREACHED*/
774 	return (0);
775 }
776 
777 static int
diffreg()778 diffreg()
779 {
780 	int	k;
781 
782 	anychange = 0;
783 	lastline = 0;
784 	lastmatchline = 0;
785 
786 	if (prepare(0, file1) || prepare(1, file2))
787 		return (anychange = 1);
788 	prune();
789 	sort(sfile[0], slen[0]);
790 	sort(sfile[1], slen[1]);
791 
792 	member = (int *)file[1];
793 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
794 	member = (int *)ralloc((void *)member, (slen[1] + 2) * sizeof (int));
795 
796 	class = (int *)file[0];
797 	unsort(sfile[0], slen[0], class);
798 	class = (int *)ralloc((void *)class, (slen[0] + 2) * sizeof (int));
799 
800 	klist = (int *)talloc((slen[0] + 2) * sizeof (int));
801 	clist = (struct cand *)talloc(sizeof (cand));
802 	clen = 0;
803 	camt = 0;
804 	k = stone(class, slen[0], member, klist);
805 	free((void *)member);
806 	free((void *)class);
807 
808 	J = (int *)ralloc(J, (len[0] + 2) * sizeof (int));
809 	unravel(klist[k]);
810 	free((char *)clist);
811 	free((char *)klist);
812 
813 	ixold = (off_t *)ralloc(ixold, (len[0] + 2) * sizeof (off_t));
814 	ixnew = (off_t *)ralloc(ixnew, (len[1] + 2) * sizeof (off_t));
815 	check();
816 	output();
817 
818 	return (anychange);
819 }
820 
821 static void
print_dstatus()822 print_dstatus()
823 {
824 	if (opt == D_CONTEXT && anychange == 0)
825 		(void) printf(gettext("No differences encountered\n"));
826 	else if (opt == D_BRIEF && anychange != 0)
827 		(void) printf(gettext("Files %s and %s differ\n"),
828 		    file1, file2);
829 }
830 
831 #ifndef	DO_SPAWN_DIFF
832 static int
calldiffreg(f1,f2)833 calldiffreg(f1, f2)
834 	int	f1;
835 	int	f2;
836 {
837 	int	result = anychange;
838 	int	ret;
839 	char	*in1 = input_file1;
840 	char	*in2 = input_file2;
841 
842 	input_file1 = file1;
843 	input_file2 = file2;
844 
845 	if ((input[0] = fdopen(f1, "rb")) == NULL) {
846 		(void) fprintf(stderr, "diff: ");
847 		perror(file1);
848 		status = 2;
849 		done();
850 	}
851 	if ((input[1] = fdopen(f2, "rb")) == NULL) {
852 		(void) fprintf(stderr, "diff: ");
853 		perror(file1);
854 		status = 2;
855 		done();
856 	}
857 
858 	initbuf(input[0], 0, (off_t)0);
859 	initbuf(input[1], 1, (off_t)0);
860 
861 #ifdef	HAVE_SETVBUF
862 	setvbuf(input[0], NULL, _IOFBF, 32*1024);
863 	setvbuf(input[1], NULL, _IOFBF, 32*1024);
864 #endif
865 	rewind(input[0]);
866 	rewind(input[1]);
867 
868 	ret = diffreg();
869 	print_dstatus();
870 	fclose(input[0]);
871 	fclose(input[1]);
872 
873 	input_file1 = in1;
874 	input_file2 = in2;
875 	if (ret > result)
876 		result = ret;
877 	return (result);
878 }
879 #endif
880 
881 static int
stone(a,n,b,c)882 stone(a, n, b, c)
883 	int	*a;
884 	int	n;
885 	int	*b;
886 	int	*c;
887 {
888 	int i, k, y;
889 	int j, l;
890 	int oldc, tc;
891 	int oldl;
892 
893 	k = 0;
894 	c[0] = newcand(0, 0, 0);
895 	for (i = 1; i <= n; i++) {
896 		j = a[i];
897 		if (j == 0)
898 			continue;
899 		y = -b[j];
900 		oldl = 0;
901 		oldc = c[0];
902 		do {
903 			if (y <= clist[oldc].y)
904 				continue;
905 			l = search(c, k, y);
906 			if (l != oldl+1)
907 				oldc = c[l-1];
908 			if (l <= k) {
909 				if (clist[c[l]].y <= y)
910 					continue;
911 				tc = c[l];
912 				c[l] = newcand(i, y, oldc);
913 				oldc = tc;
914 				oldl = l;
915 			} else {
916 				c[l] = newcand(i, y, oldc);
917 				k++;
918 				break;
919 			}
920 		} while ((y = b[++j]) > 0);
921 	}
922 	return (k);
923 }
924 
925 static int
newcand(x,y,pred)926 newcand(x, y, pred)
927 	int	x;
928 	int	y;
929 	int	pred;
930 {
931 	struct cand *q;
932 
933 	if (clen >= camt) {
934 		camt += 64;
935 		clist = (struct cand *)ralloc((void *)clist,
936 		    camt * sizeof (cand));
937 	}
938 	q = clist + clen;
939 	q->x = x;
940 	q->y = y;
941 	q->pred = pred;
942 	return (clen++);
943 }
944 
945 static int
search(c,k,y)946 search(c, k, y)
947 	int	*c;
948 	int	k;
949 	int	y;
950 {
951 	int i, j, l;
952 	int t;
953 
954 	if (clist[c[k]].y < y)	/* quick look for typical case */
955 		return (k + 1);
956 	i = 0;
957 	j = k+1;
958 	while ((l = (i + j) / 2) > i) {
959 		t = clist[c[l]].y;
960 		if (t > y)
961 			j = l;
962 		else if (t < y)
963 			i = l;
964 		else
965 			return (l);
966 	}
967 	return (l + 1);
968 }
969 
970 static void
unravel(p)971 unravel(p)
972 	int	p;
973 {
974 	int i;
975 	struct cand *q;
976 
977 	for (i = 0; i <= len[0]; i++)
978 		J[i] = i <= pref ? i :
979 			i > len[0] - suff ? i + len[1] - len[0]:
980 			0;
981 	for (q = clist + p; q->y != 0; q = clist + q->pred)
982 		J[q->x + pref] = q->y + pref;
983 }
984 
985 /*
986  * check does double duty:
987  * 1. ferret out any fortuitous correspondences due to confounding by
988  * hashing (which result in "jackpot")
989  * 2. collect random access indexes to the two files
990  */
991 
992 static void
check()993 check()
994 {
995 	wint_t	c, d;
996 	int i, j;
997 	/* int jackpot; */
998 	int	mlen;
999 	off_t ctold, ctnew;
1000 
1001 	resetbuf(0);
1002 	resetbuf(1);
1003 
1004 	j = 1;
1005 	ixold[0] = ixnew[0] = 0;
1006 	/* jackpot = 0; */
1007 
1008 	/*
1009 	 * ctold and ctnew are byte positions within the file (suitable for
1010 	 * lseek()).  After we get a character with getwc(), instead of
1011 	 * just incrementing the byte position by 1, we have to determine
1012 	 * how many bytes the character actually is.  This is the reason for
1013 	 * the wctomb() calls here and in skipline().
1014 	 */
1015 	ctold = ctnew = 0;
1016 	for (i = 1; i <= len[0]; i++) {
1017 		if (J[i] == 0) {
1018 			ixold[i] = ctold += skipline(0);
1019 			continue;
1020 		}
1021 		while (j < J[i]) {
1022 			ixnew[j] = ctnew += skipline(1);
1023 			j++;
1024 		}
1025 		if (bflag || wflag || iflag) {
1026 			for (;;) {
1027 				c = getbufwchar(0, &mlen);
1028 				ctold += mlen;
1029 				d = getbufwchar(1, &mlen);
1030 				ctnew += mlen;
1031 
1032 				if (bflag && iswspace(c) && iswspace(d)) {
1033 					while (iswspace(c)) {
1034 						if (c == '\n' || c == WEOF)
1035 							break;
1036 
1037 						c = getbufwchar(0, &mlen);
1038 						ctold += mlen;
1039 					}
1040 					while (iswspace(d)) {
1041 						if (d == '\n' || d == WEOF)
1042 							break;
1043 
1044 						d = getbufwchar(1, &mlen);
1045 						ctnew += mlen;
1046 					}
1047 				} else if (wflag) {
1048 					while (iswspace(c) && c != '\n') {
1049 						c = getbufwchar(0, &mlen);
1050 						ctold += mlen;
1051 					}
1052 					while (iswspace(d) && d != '\n') {
1053 						d = getbufwchar(1, &mlen);
1054 						ctnew += mlen;
1055 					}
1056 				}
1057 				if (c == WEOF || d == WEOF) {
1058 					if (c != d) {
1059 						/* jackpot++; */
1060 						J[i] = 0;
1061 						if (c != '\n' && c != WEOF)
1062 							ctold += skipline(0);
1063 						if (d != '\n' && d != WEOF)
1064 							ctnew += skipline(1);
1065 						break;
1066 					}
1067 					break;
1068 				} else {
1069 					if (CHRTRAN(c) != CHRTRAN(d)) {
1070 						/* jackpot++; */
1071 						J[i] = 0;
1072 						if (c != '\n')
1073 							ctold += skipline(0);
1074 						if (d != '\n')
1075 							ctnew += skipline(1);
1076 						break;
1077 					}
1078 					if (c == '\n')
1079 						break;
1080 				}
1081 			}
1082 		} else {
1083 			for (;;) {
1084 				c = getbufwchar(0, &mlen);
1085 				ctold += mlen;
1086 				d = getbufwchar(1, &mlen);
1087 				ctnew += mlen;
1088 				if (c != d) {
1089 					/* jackpot++; */
1090 					J[i] = 0;
1091 					if (c != '\n' && c != WEOF)
1092 						ctold += skipline(0);
1093 					if (d != '\n' && d != WEOF)
1094 						ctnew += skipline(1);
1095 					break;
1096 				}
1097 				if (c == '\n' || c == WEOF)
1098 					break;
1099 			}
1100 		}
1101 		ixold[i] = ctold;
1102 		ixnew[j] = ctnew;
1103 		j++;
1104 	}
1105 	for (; j <= len[1]; j++) {
1106 		ixnew[j] = ctnew += skipline(1);
1107 	}
1108 
1109 /*	if(jackpot)			*/
1110 /*		fprintf(stderr, "diff: jackpot\n");	*/
1111 }
1112 
1113 static int
skipline(f)1114 skipline(f)
1115 	int	f;
1116 {
1117 	int i;
1118 	wint_t c;
1119 	int	mlen;
1120 
1121 	for (i = 1; (c = getbufwchar(f, &mlen)) != '\n' && c != WEOF; ) {
1122 		i += mlen;
1123 	}
1124 	return (i);
1125 }
1126 
1127 static void
output()1128 output()
1129 {
1130 	int m;
1131 	wint_t	wc;
1132 	int i0, i1, j1;
1133 	int j0;
1134 	int	mlen;
1135 
1136 	resetbuf(0);
1137 	resetbuf(1);
1138 
1139 	m = len[0];
1140 	J[0] = 0;
1141 	J[m + 1] = len[1] + 1;
1142 	if (opt != D_EDIT) {
1143 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
1144 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
1145 				i0++;
1146 			j0 = J[i0 - 1] + 1;
1147 			i1 = i0 - 1;
1148 			while (i1 < m && J[i1 + 1] == 0)
1149 				i1++;
1150 			j1 = J[i1 + 1] - 1;
1151 			J[i1] = j1;
1152 			change(i0, i1, j0, j1);
1153 		}
1154 	} else {
1155 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
1156 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
1157 				i0--;
1158 			j0 = J[i0 + 1] - 1;
1159 			i1 = i0 + 1;
1160 			while (i1 > 1 && J[i1 - 1] == 0)
1161 				i1--;
1162 			j1 = J[i1 - 1] + 1;
1163 			J[i1] = j1;
1164 			change(i1, i0, j1, j0);
1165 		}
1166 	}
1167 	if (m == 0)
1168 		change(1, 0, 1, len[1]);
1169 	if (opt == D_IFDEF) {
1170 		for (;;) {
1171 			wc = getbufwchar(0, &mlen);
1172 			if (wc == WEOF)
1173 				return;
1174 			(void) wcput(wc);
1175 		}
1176 	}
1177 	if (anychange && opt == D_CONTEXT)
1178 		dump_context_vec();
1179 }
1180 
1181 
1182 /*
1183  * indicate that there is a difference between lines a and b of the from file
1184  * to get to lines c to d of the to file.
1185  * If a is greater then b then there are no lines in the from file involved
1186  * and this means that there were lines appended (beginning at b).
1187  * If c is greater than d then there are lines missing from the to file.
1188  */
1189 static void
change(a,b,c,d)1190 change(a, b, c, d)
1191 	int	a;
1192 	int	b;
1193 	int	c;
1194 	int	d;
1195 {
1196 	char	time_buf[BUFSIZ];
1197 	char	*dcmsg;
1198 	char	*p;
1199 
1200 #if	BUFSIZ < 40	/* 36 should be sufficient */
1201 	error time buffer too small
1202 #endif
1203 	if (opt != D_IFDEF && a > b && c > d)
1204 		return;
1205 	if (Bflag) {
1206 		int	i;
1207 
1208 		/*
1209 		 * On a UNIX system, enpty lines only contain a single "\n" and
1210 		 * thus cause a file offset difference of 1 to the next line.
1211 		 */
1212 		if (a > b) {	/* Inserted */
1213 			for (i = c; i <= d; i++) {
1214 				if ((ixnew[i] - ixnew[i - 1]) != 1)
1215 					goto show;
1216 			}
1217 		}
1218 		if (c > d) {	/* Deleted */
1219 			for (i = a; i <= b; i++) {
1220 				if ((ixold[i] - ixold[i - 1]) != 1)
1221 					goto show;
1222 			}
1223 		}
1224 		return;
1225 	}
1226 show:
1227 	if (anychange == 0) {
1228 		anychange = 1;
1229 		if (opt == D_CONTEXT) {
1230 			/*
1231 			 * TRANSLATION_NOTE_FOR_DC
1232 			 * This message is the format of file
1233 			 * timestamps written with the -C and
1234 			 * -c options.
1235 			 * %a -- locale's abbreviated weekday name
1236 			 * %b -- locale's abbreviated month name
1237 			 * %e -- day of month [1,31]
1238 			 * %T -- Time as %H:%M:%S
1239 			 * %Y -- Year, including the century
1240 			 */
1241 			if (uflag) {
1242 #if	!defined(_FOUND_STAT_NSECS_)
1243 				dcmsg = "%Y-%m-%d %H:%M:%S %z";
1244 #else
1245 				dcmsg = "%Y-%m-%d %H:%M:%S.000000000 %z";
1246 #endif
1247 			} else {
1248 				dcmsg = dcgettext(NULL, "%a %b %e %T %Y",
1249 								LC_TIME);
1250 			}
1251 			cf_time(time_buf, sizeof (time_buf),
1252 						dcmsg, &stb1.st_mtime);
1253 
1254 #if	defined(_FOUND_STAT_NSECS_)
1255 			/*
1256 			 * Be careful here: in the German locale, the string
1257 			 * contains "So. " for "Sonntag".
1258 			 */
1259 			if ((p = strchr(time_buf, '.')) != NULL &&
1260 			    p[1] == '0') {
1261 				long	ns = stat_mnsecs(&stb1);
1262 				if (ns < 0)
1263 					ns = 0;
1264 				sprintf(++p, "%9.9ld", ns);
1265 				p[9] = ' ';	/* '\0' from sprintf() */
1266 			}
1267 #endif
1268 			if (uflag)
1269 				(void) printf("--- %s	%s\n", input_file1,
1270 				    time_buf);
1271 			else
1272 				(void) printf("*** %s	%s\n", input_file1,
1273 				    time_buf);
1274 			cf_time(time_buf, sizeof (time_buf),
1275 						dcmsg, &stb2.st_mtime);
1276 
1277 #if	defined(_FOUND_STAT_NSECS_)
1278 			/*
1279 			 * Be careful here: in the German locale, the string
1280 			 * contains "So. " for "Sonntag".
1281 			 */
1282 			if ((p = strchr(time_buf, '.')) != NULL &&
1283 			    p[1] == '0') {
1284 				long	ns = stat_mnsecs(&stb2);
1285 				if (ns < 0)
1286 					ns = 0;
1287 				sprintf(++p, "%9.9ld", ns);
1288 				p[9] = ' ';	/* '\0' from sprintf() */
1289 			}
1290 #endif
1291 			if (uflag)
1292 				(void) printf("+++ %s	%s\n", input_file2,
1293 				    time_buf);
1294 			else
1295 				(void) printf("--- %s	%s\n", input_file2,
1296 				    time_buf);
1297 
1298 			if (context_vec_start == NULL) {
1299 				context_vec_start = (struct context_vec *)
1300 					    malloc(MAX_CONTEXT *
1301 					    sizeof (struct context_vec));
1302 			}
1303 			if (context_vec_start == NULL)
1304 				error(gettext(NO_MEM_ERR));
1305 
1306 			context_vec_end = context_vec_start + (MAX_CONTEXT - 1);
1307 			context_vec_ptr = context_vec_start - 1;
1308 		}
1309 	}
1310 
1311 	if (opt == D_CONTEXT) {
1312 		/*
1313 		 * if this new change is within 'context' lines of
1314 		 * the previous change, just add it to the change
1315 		 * record.  If the record is full or if this
1316 		 * change is more than 'context' lines from the previous
1317 		 * change, dump the record, reset it & add the new change.
1318 		 */
1319 		if (context_vec_ptr >= context_vec_end ||
1320 		    (context_vec_ptr >= context_vec_start &&
1321 		    a > (context_vec_ptr->b + 2 * context) &&
1322 		    c > (context_vec_ptr->d + 2 * context)))
1323 			dump_context_vec();
1324 
1325 		context_vec_ptr++;
1326 		context_vec_ptr->a = a;
1327 		context_vec_ptr->b = b;
1328 		context_vec_ptr->c = c;
1329 		context_vec_ptr->d = d;
1330 		return;
1331 	}
1332 
1333 	switch (opt) {
1334 	case D_BRIEF:
1335 		return;
1336 	case D_NORMAL:
1337 	case D_EDIT:
1338 		range(a, b, ",");
1339 		(void) putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1340 		if (opt == D_NORMAL) range(c, d, ",");
1341 		(void) printf("\n");
1342 		break;
1343 	case D_REVERSE:
1344 		(void) putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1345 		range(a, b, " ");
1346 		(void) printf("\n");
1347 		break;
1348 	case D_NREVERSE:
1349 		if (a > b) {
1350 			(void) printf("a%d %d\n", b, d - c + 1);
1351 		} else {
1352 			(void) printf("d%d %d\n", a, b - a + 1);
1353 			if (!(c > d))
1354 				/* add changed lines */
1355 				(void) printf("a%d %d\n", b, d - c + 1);
1356 		}
1357 		break;
1358 	}
1359 	if (opt == D_NORMAL || opt == D_IFDEF) {
1360 		fetch(ixold, a, b, 0, "< ", 1);
1361 		if (a <= b && c <= d && opt == D_NORMAL)
1362 			(void) prints("---\n");
1363 	}
1364 	fetch(ixnew, c, d, 1, opt == D_NORMAL?"> ":empty, 0);
1365 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
1366 		(void) prints(".\n");
1367 	if (inifdef) {
1368 		(void) fprintf(stdout, "#endif /* %s */\n", endifname);
1369 		inifdef = 0;
1370 	}
1371 }
1372 
1373 static void
range(a,b,separator)1374 range(a, b, separator)
1375 	int	a;
1376 	int	b;
1377 	char	*separator;
1378 {
1379 	(void) printf("%d", a > b ? b : a);
1380 	if (a < b) {
1381 		(void) printf("%s%d", separator, b);
1382 	}
1383 }
1384 
1385 static void
fetch(f,a,b,filen,s,oldfile)1386 fetch(f, a, b, filen, s, oldfile)
1387 	off_t	*f;
1388 	int	a;
1389 	int	b;
1390 	int	filen;
1391 	char	*s;
1392 	int	oldfile;
1393 {
1394 	int i;
1395 	int col;
1396 	int nc;
1397 	int mlen = 0;
1398 	wint_t	ch;
1399 	FILE	*lb;
1400 
1401 	lb = input[filen];
1402 	/*
1403 	 * When doing #ifdef's, copy down to current line
1404 	 * if this is the first file, so that stuff makes it to output.
1405 	 */
1406 	if (opt == D_IFDEF && oldfile) {
1407 		off_t curpos = ftellbuf(filen);
1408 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1409 		nc = f[(a > b) ? b : (a - 1) ] - curpos;
1410 		for (i = 0; i < nc; i += mlen) {
1411 			ch = getbufwchar(filen, &mlen);
1412 			if (ch == WEOF) {
1413 				(void) putchar('\n');
1414 				break;
1415 			} else {
1416 				(void) wcput(ch);
1417 			}
1418 		}
1419 	}
1420 	if (a > b)
1421 		return;
1422 	if (opt == D_IFDEF) {
1423 		int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
1424 		if (inifdef) {
1425 			(void) fprintf(stdout, "#else /* %s%s */\n",
1426 			    oneflag && oldfile == 1 ? "!" : "", ifdef2);
1427 		} else {
1428 			if (oneflag) {
1429 				/* There was only one ifdef given */
1430 				endifname = ifdef2;
1431 				if (oldfile)
1432 					(void) fprintf(stdout,
1433 					    "#ifndef %s\n", endifname);
1434 				else
1435 					(void) fprintf(stdout,
1436 					    "#ifdef %s\n", endifname);
1437 			} else {
1438 				endifname = oldfile ? ifdef1 : ifdef2;
1439 				(void) fprintf(stdout,
1440 					"#ifdef %s\n", endifname);
1441 			}
1442 		}
1443 		inifdef = 1 + oldfile;
1444 	}
1445 
1446 	for (i = a; i <= b; i++) {
1447 		wint_t	lastch;
1448 
1449 		if (ftellbuf(filen) != f[i - 1]) {
1450 			/*
1451 			 * Only seek in case we are not at the expected offset.
1452 			 * This is marginally slower with small files but 50%
1453 			 * faster with huge files.
1454 			 */
1455 			(void) fseek(lb, f[i - 1], SEEK_SET);
1456 			initbuf(lb, filen, f[i - 1]);
1457 		}
1458 		if (opt != D_IFDEF)
1459 			(void) prints(s);
1460 		col = 0;
1461 		lastch = '\0';
1462 		while ((ch = getbufwchar(filen, &mlen)) != '\n' && ch != WEOF) {
1463 			if (ch == '\t' && tflag) {
1464 				do
1465 					(void) putchar(' ');
1466 				while (++col & 7);
1467 			} else {
1468 				(void) wcput(ch);
1469 				if (col++ == 0)
1470 					lastch = ch;
1471 			}
1472 		}
1473 		/*
1474 		 * When creating an "ed" script, we cannot directly append a
1475 		 * line that contains only ".\n". We rather enter "..\n", leave
1476 		 * append mode, substitute ".." by "." and re-enter append mode.
1477 		 */
1478 		if (opt == D_EDIT && col == 1 && lastch == '.' && ch == '\n') {
1479 			char	*cp = ".\n.\ns/.//\na";
1480 
1481 			while (*cp)
1482 				(void) wcput(*cp++);
1483 		}
1484 		(void) putchar('\n');
1485 	}
1486 }
1487 
1488 /*
1489  * hashing has the effect of
1490  * arranging line in 7-bit bytes and then
1491  * summing 1-s complement in 16-bit hunks
1492  */
1493 
1494 static int
readhash(f,filen,str)1495 readhash(f, filen, str)
1496 	FILE	*f;
1497 	int	filen;
1498 	char	*str;
1499 {
1500 	long sum;
1501 	unsigned int	shift;
1502 	int space;
1503 	int t;
1504 	wint_t	wt;
1505 	int	mlen;
1506 
1507 	sum = 1;
1508 	space = 0;
1509 	if (!bflag && !wflag) {
1510 		if (iflag) {
1511 			if (mbcurmax == 1) {
1512 				/* In this case, diff doesn't have to take */
1513 				/* care of multibyte characters. */
1514 				for (shift = 0; (t = getc(f)) != '\n';
1515 					shift += 7) {
1516 					if (t == EOF) {
1517 						if (shift) {
1518 							(void) fprintf(stderr,
1519 	gettext("Warning: missing newline at end of file %s\n"), str);
1520 							break;
1521 						} else {
1522 							return (0);
1523 						}
1524 					}
1525 					sum += (isupper(t) ? tolower(t) : t) <<
1526 						(shift &= HALFMASK);
1527 				}
1528 			} else {
1529 				/* In this case, diff needs to take care of */
1530 				/* multibyte characters. */
1531 				for (shift = 0;
1532 				(wt = getbufwchar(filen, &mlen)) != '\n';
1533 					shift += 7) {
1534 					if (wt == WEOF) {
1535 						if (shift) {
1536 							(void) fprintf(stderr,
1537 	gettext("Warning: missing newline at end of file %s\n"), str);
1538 							break;
1539 						} else {
1540 							return (0);
1541 						}
1542 					}
1543 					sum += NCCHRTRAN(wt) <<
1544 						(shift &= HALFMASK);
1545 				}
1546 			}
1547 		} else {
1548 			/* In this case, diff doesn't have to take care of */
1549 			/* multibyte characters. */
1550 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
1551 				if (t == EOF) {
1552 					if (shift) {
1553 						(void) fprintf(stderr,
1554 	gettext("Warning: missing newline at end of file %s\n"), str);
1555 						break;
1556 					} else {
1557 						return (0);
1558 					}
1559 				}
1560 				sum += (long)t << (shift &= HALFMASK);
1561 			}
1562 		}
1563 	} else {
1564 		/* In this case, diff needs to take care of */
1565 		/* multibyte characters. */
1566 		for (shift = 0; ; ) {
1567 			wt = getbufwchar(filen, &mlen);
1568 
1569 			if (wt != '\n' && iswspace(wt)) {
1570 				space++;
1571 				continue;
1572 			} else {
1573 				switch (wt) {
1574 				case WEOF:
1575 					if (shift) {
1576 						(void) fprintf(stderr,
1577 	gettext("Warning: missing newline at end of file %s\n"), str);
1578 						break;
1579 					} else {
1580 						return (0);
1581 					}
1582 				default:
1583 					if (space && !wflag) {
1584 						shift += 7;
1585 						space = 0;
1586 					}
1587 					sum += CHRTRAN(wt) <<
1588 						(shift &= HALFMASK);
1589 					shift += 7;
1590 					continue;
1591 				case '\n':
1592 					break;
1593 				}
1594 			}
1595 			break;
1596 		}
1597 	}
1598 	return (sum);
1599 }
1600 
1601 
1602 /* dump accumulated "context" diff changes */
1603 static void
dump_context_vec()1604 dump_context_vec()
1605 {
1606 	int	a, b, c, d;
1607 	char	ch;
1608 	struct	context_vec *cvp = context_vec_start;
1609 	int	lowa, upb, lowc, upd;
1610 	int	do_output;
1611 
1612 	if (cvp > context_vec_ptr)
1613 		return;
1614 
1615 	/*
1616 	 * Make GCC quiet, we don't need to initialize "b" and "d" as from here
1617 	 * there is a grant for cvp <= context_vec_ptr and thus the two loop
1618 	 * are always entered at least once.
1619 	 */
1620 	b = d = 0;
1621 
1622 	lowa = max(1, cvp->a - context);
1623 	upb  = min(len[0], context_vec_ptr->b + context);
1624 	lowc = max(1, cvp->c - context);
1625 	upd  = min(len[1], context_vec_ptr->d + context);
1626 
1627 	if (uflag) {
1628 		/*
1629 		 * The POSIX standard likes to see 0,0 for an empty range at
1630 		 * the beginning of a file. We use
1631 		 * 	1,0 for "diff -Nu file /dev/null"
1632 		 * and
1633 		 * 	0,0 for "diff -Nu file non-existent"
1634 		 * which matches the behavior of patch(1) that removes files
1635 		 * only if the range was specified by 0,0.
1636 		 */
1637 		a = b = 0;
1638 		if (file1ok == 0)
1639 			a = 1;
1640 		if (file2ok == 0)
1641 			b = 1;
1642 		(void) printf("@@ -%d,%d +%d,%d @@",
1643 		    lowa > upb ? upb : 		/* Needed for -U0 */
1644 		    lowa - a, upb - lowa + 1,
1645 #ifdef	__symmetric_low__			/* othogonal but wrong */
1646 		    lowc > upd ? upd :
1647 #endif
1648 		    lowc - b, upd - lowc + 1);
1649 		if (pflag) {
1650 			char	*f;
1651 
1652 			f = match_function(ixold, lowa-1, 0);
1653 			if (f != NULL)
1654 				(void) printf(" %s", f);
1655 		}
1656 		(void) printf("\n");
1657 	} else {
1658 		(void) printf("***************");
1659 		if (pflag) {
1660 			char	*f;
1661 
1662 			f = match_function(ixold, lowa-1, 0);
1663 			if (f != NULL)
1664 				(void) printf(" %s", f);
1665 		}
1666 		(void) printf("\n*** ");
1667 		range(lowa, upb, ",");
1668 		(void) printf(" ****\n");
1669 	}
1670 
1671 	/*
1672 	 * output changes to the "old" file.  The first loop suppresses
1673 	 * output if there were no changes to the "old" file (we'll see
1674 	 * the "old" lines as context in the "new" list).
1675 	 */
1676 	if (uflag) {
1677 		do_output = 1;
1678 	} else {
1679 		for (do_output = 0; cvp <= context_vec_ptr; cvp++) {
1680 			if (cvp->a <= cvp->b) {
1681 				cvp = context_vec_start;
1682 				do_output++;
1683 				break;
1684 			}
1685 		}
1686 	}
1687 
1688 	if (do_output) {
1689 		while (cvp <= context_vec_ptr) {
1690 			a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
1691 
1692 			if (a <= b && c <= d)
1693 				ch = 'c';
1694 			else
1695 				ch = (a <= b) ? 'd' : 'a';
1696 
1697 			if (ch == 'a') {
1698 				/* The last argument should not affect */
1699 				/* the behavior of fetch() */
1700 				fetch(ixold, lowa, b, 0, uflag ? " " : "  ", 1);
1701 				if (uflag)
1702 					fetch(ixnew, c, d, 1, "+", 0);
1703 			} else if (ch == 'd') {
1704 				fetch(ixold, lowa, a - 1, 0, uflag ? " " :
1705 				    "  ", 1);
1706 				fetch(ixold, a, b, 0, uflag ? "-" : "- ", 1);
1707 			} else {
1708 				/* The last argument should not affect */
1709 				/* the behavior of fetch() */
1710 				fetch(ixold, lowa, a-1, 0, uflag ? " " : "  ",
1711 				    1);
1712 				if (uflag) {
1713 					fetch(ixold, a, b, 0, "-", 1);
1714 					fetch(ixnew, c, d, 1, "+", 0);
1715 				} else {
1716 					fetch(ixold, a, b, 0, "! ", 1);
1717 				}
1718 			}
1719 			lowa = b + 1;
1720 			cvp++;
1721 		}
1722 		/* The last argument should not affect the behavior */
1723 		/* of fetch() */
1724 		fetch(ixold, b+1, upb, 0, uflag ? " " : "  ", 1);
1725 	}
1726 
1727 	if (uflag) {
1728 		context_vec_ptr = context_vec_start - 1;
1729 		return;
1730 	}
1731 
1732 	/* output changes to the "new" file */
1733 	(void) printf("--- ");
1734 	range(lowc, upd, ",");
1735 	(void) printf(" ----\n");
1736 
1737 	do_output = 0;
1738 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) {
1739 		if (cvp->c <= cvp->d) {
1740 			cvp = context_vec_start;
1741 			do_output++;
1742 			break;
1743 		}
1744 	}
1745 
1746 	if (do_output) {
1747 		while (cvp <= context_vec_ptr) {
1748 			a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
1749 
1750 			if (a <= b && c <= d)
1751 				ch = 'c';
1752 			else
1753 				ch = (a <= b) ? 'd' : 'a';
1754 
1755 			if (ch == 'd') {
1756 				/* The last argument should not affect */
1757 				/* the behavior of fetch() */
1758 				fetch(ixnew, lowc, d, 1, "  ", 0);
1759 			} else {
1760 				/* The last argument should not affect */
1761 				/* the behavior of fetch() */
1762 				fetch(ixnew, lowc, c - 1, 1, "  ", 0);
1763 				fetch(ixnew, c, d, 1,
1764 				    ch == 'c' ? "! " : "+ ", 0);
1765 			}
1766 			lowc = d + 1;
1767 			cvp++;
1768 		}
1769 		/* The last argument should not affect the behavior */
1770 		/* of fetch() */
1771 		fetch(ixnew, d + 1, upd, 1, "  ", 0);
1772 	}
1773 	context_vec_ptr = context_vec_start - 1;
1774 }
1775 
1776 
1777 
1778 /*
1779  * diff - directory comparison
1780  */
1781 int	header;
1782 char	title[2 * BUFSIZ], *etitle;
1783 
1784 static void
diffdir(argv,pdirs)1785 diffdir(argv, pdirs)
1786 	char		**argv;
1787 	struct pdirs	*pdirs;
1788 {
1789 	struct dir *d1, *d2;
1790 	struct dir *dir1, *dir2;
1791 	int i;
1792 	int cmp;
1793 	int result, dirstatus;
1794 
1795 	if (opt == D_IFDEF)
1796 		error(gettext("cannot specify -D with directories"));
1797 
1798 	if (opt == D_EDIT && (sflag || lflag)) {
1799 		(void) fprintf(stderr, "diff: ");
1800 		(void) fprintf(stderr, gettext(
1801 			"warning: should not give -s or -l with -e\n"));
1802 	}
1803 	dirstatus = 0;
1804 	title[0] = 0;
1805 	(void) strlcpy(title, "diff ", sizeof (title));
1806 	for (i = 1; diffargv[i + 2]; i++) {
1807 		if (strcmp(diffargv[i], "-") == 0) {
1808 			continue;	/* Skip -S and its argument */
1809 		}
1810 		(void) strlcat(title, diffargv[i], sizeof (title));
1811 		(void) strlcat(title, " ", sizeof (title));
1812 	}
1813 	for (etitle = title; *etitle; etitle++)
1814 		;
1815 	setfile(&file1, &efile1, file1);
1816 	setfile(&file2, &efile2, file2);
1817 	argv[0] = file1;
1818 	argv[1] = file2;
1819 	dir1 = setupdir(file1);
1820 	dir2 = setupdir(file2);
1821 	d1 = dir1; d2 = dir2;
1822 	while (d1->d_entry != 0 || d2->d_entry != 0) {
1823 		if (d1->d_entry && useless(d1->d_entry)) {
1824 			d1++;
1825 			continue;
1826 		}
1827 		if (d2->d_entry && useless(d2->d_entry)) {
1828 			d2++;
1829 			continue;
1830 		}
1831 		if (d1->d_entry == 0)
1832 			cmp = 1;
1833 		else if (d2->d_entry == 0)
1834 			cmp = -1;
1835 		else
1836 			cmp = strcmp(d1->d_entry, d2->d_entry);
1837 
1838 		file1ok = file2ok = 1;
1839 		if (cmp < 0) {
1840 			if (Nflag) {
1841 				result = compare(d1);
1842 				if (result > dirstatus)
1843 					dirstatus = result;
1844 			} else if (lflag) {
1845 				d1->d_flags |= ONLY;
1846 			} else if (opt == D_NORMAL || opt == D_CONTEXT ||
1847 			    opt == D_BRIEF) {
1848 				only(d1, 1);
1849 			}
1850 			if (d1->d_entry)
1851 				d1++;
1852 			if (!Nflag && dirstatus == 0)
1853 				dirstatus = 1;
1854 		} else if (cmp == 0) {
1855 			result = compare(d1);
1856 			if (result > dirstatus)
1857 				dirstatus = result;
1858 			if (d1->d_entry)
1859 				d1++;
1860 			if (d2->d_entry)
1861 				d2++;
1862 		} else {
1863 			if (Nflag) {
1864 				result = compare(d2);
1865 				if (result > dirstatus)
1866 					dirstatus = result;
1867 				if (d2->d_flags & DIRECT)
1868 					d2->d_flags |= XDIRECT;
1869 			} else if (lflag) {
1870 				d2->d_flags |= ONLY;
1871 			} else if (opt == D_NORMAL || opt == D_CONTEXT) {
1872 				only(d2, 2);
1873 			}
1874 			if (d2->d_entry)
1875 				d2++;
1876 			if (!Nflag && dirstatus == 0)
1877 				dirstatus = 1;
1878 		}
1879 	}
1880 	if (lflag) {
1881 		scanpr(dir1, ONLY,
1882 			gettext("Only in %.*s"), file1, efile1, 0, 0);
1883 		scanpr(dir2, ONLY,
1884 			gettext("Only in %.*s"), file2, efile2, 0, 0);
1885 		scanpr(dir1, SAME,
1886 		    gettext("Common identical files in %.*s and %.*s"),
1887 		    file1, efile1, file2, efile2);
1888 		scanpr(dir1, DIFFER,
1889 		    gettext("Binary files which differ in %.*s and %.*s"),
1890 		    file1, efile1, file2, efile2);
1891 		scanpr(dir1, DIRECT,
1892 		    gettext("Common subdirectories of %.*s and %.*s"),
1893 		    file1, efile1, file2, efile2);
1894 	}
1895 	if (rflag) {
1896 		struct pdirs	pdir;
1897 
1898 		pdir.p_last = pdirs;
1899 
1900 		if (header && lflag)
1901 			(void) printf("\f");
1902 		for (d1 = dir1; d1->d_entry; d1++) {
1903 			if ((d1->d_flags & DIRECT) == 0)
1904 				continue;
1905 			(void) strcpy(efile1, d1->d_entry);
1906 			(void) strcpy(efile2, d1->d_entry);
1907 			pdir.p_dev1 = d1->d_dev1;
1908 			pdir.p_dev2 = d1->d_dev2;
1909 			pdir.p_ino1 = d1->d_ino1;
1910 			pdir.p_ino2 = d1->d_ino2;
1911 			calldiffdir(argv, &pdir);
1912 		}
1913 		if (Nflag) {
1914 			for (d2 = dir2; d2->d_entry; d2++) {
1915 				if ((d2->d_flags & XDIRECT) == 0)
1916 						continue;
1917 				(void) strcpy(efile1, d2->d_entry);
1918 				(void) strcpy(efile2, d2->d_entry);
1919 				pdir.p_dev1 = d2->d_dev1;
1920 				pdir.p_dev2 = d2->d_dev2;
1921 				pdir.p_ino1 = d2->d_ino1;
1922 				pdir.p_ino2 = d2->d_ino2;
1923 				calldiffdir(argv, &pdir);
1924 			}
1925 		}
1926 	}
1927 	for (d1 = dir1; d1->d_entry; d1++)
1928 		free(d1->d_entry);
1929 	for (d2 = dir2; d2->d_entry; d2++)
1930 		free(d2->d_entry);
1931 	free(dir1);
1932 	free(dir2);
1933 	if (dirstatus > status)
1934 		status = dirstatus;
1935 }
1936 
1937 static void
calldiffdir(argv,pdirs)1938 calldiffdir(argv, pdirs)
1939 	char		**argv;
1940 	struct pdirs	*pdirs;
1941 {
1942 	char	*f1 = file1;
1943 	char	*f2 = file2;
1944 	char	*ef1 = efile1;
1945 	char	*ef2 = efile2;
1946 	struct pdirs	*pd = pdirs->p_last;
1947 
1948 #ifndef	DIRCMP_DEBUG
1949 	/*
1950 	 * We do not need to compare identical directories.
1951 	 */
1952 	if (pdirs->p_dev1 == pdirs->p_dev2 &&
1953 	    pdirs->p_ino1 == pdirs->p_ino2)
1954 		return;
1955 #endif
1956 	while (pd) {
1957 		if (pdirs->p_dev1 == pd->p_dev1 &&
1958 		    pdirs->p_dev2 == pd->p_dev2 &&
1959 		    pdirs->p_ino1 == pd->p_ino1 &&
1960 		    pdirs->p_ino2 == pd->p_ino2)
1961 			return;
1962 		pd = pd->p_last;
1963 	}
1964 
1965 	diffdir(argv, pdirs);
1966 	free(file1);
1967 	free(file2);
1968 	file1 = f1;
1969 	file2 = f2;
1970 	efile1 = ef1;
1971 	efile2 = ef2;
1972 	argv[0] = file1;
1973 	argv[1] = file2;
1974 }
1975 
1976 static void
setfile(fpp,epp,filen)1977 setfile(fpp, epp, filen)
1978 	char	**fpp;
1979 	char	**epp;
1980 	char	*filen;
1981 {
1982 	char *cp;
1983 
1984 	*fpp = (char *)malloc(BUFSIZ);
1985 	if (*fpp == 0) {
1986 		(void) fprintf(stderr, "diff: ");
1987 		(void) fprintf(stderr, gettext("out of memory\n"));
1988 		exit(1);
1989 	}
1990 	(void) strcpy(*fpp, filen);
1991 	for (cp = *fpp; *cp; cp++)
1992 		continue;
1993 	*cp++ = '/';
1994 	*cp = 0;
1995 	*epp = cp;
1996 }
1997 
1998 static void
scanpr(dp,test,titlen,file1n,efile1n,file2n,efile2n)1999 scanpr(dp, test, titlen, file1n, efile1n, file2n, efile2n)
2000 	struct dir	*dp;
2001 	int		test;
2002 	char		*titlen;
2003 	char		*file1n;
2004 	char		*efile1n;
2005 	char		*file2n;
2006 	char		*efile2n;
2007 {
2008 	int titled = 0;
2009 
2010 	for (; dp->d_entry; dp++) {
2011 		if ((dp->d_flags & test) == 0)
2012 			continue;
2013 		if (titled == 0) {
2014 			if (header == 0)
2015 				header = 1;
2016 			else
2017 				(void) printf("\n");
2018 			(void) printf(titlen,
2019 			    efile1n - file1n - 1, file1n,
2020 			    efile2n - file2n - 1, file2n);
2021 			(void) printf(":\n");
2022 			titled = 1;
2023 		}
2024 		(void) printf("\t%s\n", dp->d_entry);
2025 	}
2026 }
2027 
2028 static void
only(dp,which)2029 only(dp, which)
2030 	struct dir	*dp;
2031 	int		which;
2032 {
2033 	char *filen = which == 1 ? file1 : file2;
2034 	char *efilen = which == 1 ? efile1 : efile2;
2035 
2036 	(void) printf(gettext("Only in %.*s: %s\n"),
2037 	    (int)(efilen - filen - 1), filen,
2038 	    dp->d_entry);
2039 }
2040 
2041 static struct dir *
setupdir(cp)2042 setupdir(cp)
2043 	char	*cp;
2044 {
2045 	struct dir *dp = 0, *ep;
2046 	struct dirent *rp;
2047 	int nitems;
2048 	int	dplen;
2049 	int size;
2050 	DIR *dirp;
2051 
2052 	dirp = opendir(cp);
2053 	if (Nflag && dirp == NULL) {
2054 		dp = (struct dir *)malloc(sizeof (struct dir));
2055 		if (dp == 0)
2056 			error(gettext(NO_MEM_ERR));
2057 		dp[0].d_entry = 0;		/* delimiter */
2058 		return (dp);
2059 	}
2060 	if (dirp == NULL) {
2061 		(void) fprintf(stderr, "diff: ");
2062 		perror(cp);
2063 		status = 2;
2064 		done();
2065 	}
2066 	nitems = 0;
2067 	dplen = 0;
2068 	dp = (struct dir *)malloc(sizeof (struct dir));
2069 	if (dp == 0)
2070 		error(gettext(NO_MEM_ERR));
2071 
2072 	while ((rp = readdir(dirp)) != NULL) {
2073 		ep = &dp[nitems++];
2074 		ep->d_entry = 0;
2075 		ep->d_flags = 0;
2076 		ep->d_dev1 = 0;
2077 		ep->d_dev2 = 0;
2078 		ep->d_ino1 = 0;
2079 		ep->d_ino2 = 0;
2080 		size = strlen(rp->d_name);
2081 		if (size > 0) {
2082 			ep->d_entry = (char *)malloc(size + 1);
2083 			if (ep->d_entry == 0)
2084 				error(gettext(NO_MEM_ERR));
2085 
2086 			(void) strcpy(ep->d_entry, rp->d_name);
2087 		}
2088 		if (nitems >= dplen) {
2089 			dplen += 64;
2090 			dp = (struct dir *)realloc((char *)dp,
2091 				(dplen + 1) * sizeof (struct dir));
2092 		}
2093 		if (dp == 0)
2094 			error(gettext(NO_MEM_ERR));
2095 	}
2096 	dp[nitems].d_entry = 0;		/* delimiter */
2097 	(void) closedir(dirp);
2098 	qsort(dp, nitems, sizeof (struct dir),
2099 		(int (*) __PR((const void *, const void *)))entcmp);
2100 	return (dp);
2101 }
2102 
2103 static int
entcmp(d1,d2)2104 entcmp(d1, d2)
2105 	struct dir	*d1;
2106 	struct dir	*d2;
2107 {
2108 	return (strcmp(d1->d_entry, d2->d_entry));
2109 }
2110 
2111 static int
compare(dp)2112 compare(dp)
2113 	struct dir	*dp;
2114 {
2115 	int i, j;
2116 	int f1 = -1, f2 = -1;
2117 	mode_t fmt1, fmt2;
2118 	struct stat statb1, statb2;
2119 	char buf1[D_BUFSIZ], buf2[D_BUFSIZ];
2120 	int result = 0;
2121 
2122 	(void) strcpy(efile1, dp->d_entry);
2123 	(void) strcpy(efile2, dp->d_entry);
2124 
2125 	if (stat(file1, &statb1) == -1) {
2126 		if (errno == ENOENT && Nflag) {
2127 			statb1.st_mode = 0;
2128 			statb1.st_dev = 0;
2129 			statb1.st_ino = 0;
2130 			result = 1;
2131 			file1ok = 0;
2132 		} else {
2133 			(void) fprintf(stderr, "diff: ");
2134 			perror(file1);
2135 			return (2);
2136 		}
2137 	}
2138 	if (stat(file2, &statb2) == -1) {
2139 		if (errno == ENOENT && Nflag) {
2140 			statb2.st_mode = 0;
2141 			statb2.st_dev = 0;
2142 			statb2.st_ino = 0;
2143 			result = 1;
2144 			file2ok = 0;
2145 		} else {
2146 			(void) fprintf(stderr, "diff: ");
2147 			perror(file2);
2148 			return (2);
2149 		}
2150 	}
2151 
2152 	fmt1 = statb1.st_mode & S_IFMT;
2153 	fmt2 = statb2.st_mode & S_IFMT;
2154 
2155 	if (fmt1 == S_IFREG) {
2156 		f1 = open(file1, O_RDONLY|O_BINARY);
2157 	} else if (file1ok == 0) {
2158 		f1 = open(nulldev, O_RDONLY|O_BINARY);
2159 	}
2160 	if ((fmt1 == S_IFREG || file1ok == 0) && f1 < 0) {
2161 		(void) fprintf(stderr, "diff: ");
2162 		perror(file1);
2163 		return (2);
2164 	}
2165 
2166 	if (fmt2 == S_IFREG) {
2167 		f2 = open(file2, O_RDONLY|O_BINARY);
2168 	} else if (file2ok == 0) {
2169 		f2 = open(nulldev, O_RDONLY|O_BINARY);
2170 	}
2171 	if ((fmt2 == S_IFREG || file2ok == 0) && f2 < 0) {
2172 		(void) fprintf(stderr, "diff: ");
2173 		perror(file2);
2174 		(void) close(f1);
2175 		return (2);
2176 	}
2177 
2178 	if (result) {
2179 		if (fmt1 == S_IFDIR || fmt2 == S_IFDIR)
2180 			fmt1 = fmt2 = S_IFDIR;
2181 		if (fmt1 == S_IFREG || fmt2 == S_IFREG)
2182 			goto notsame;
2183 	}
2184 
2185 	if (fmt1 != S_IFREG || fmt2 != S_IFREG) {
2186 		if (fmt1 == fmt2) {
2187 			switch (fmt1) {
2188 
2189 			case S_IFDIR:
2190 				dp->d_flags = DIRECT;
2191 				dp->d_dev1 = statb1.st_dev;
2192 				dp->d_dev2 = statb2.st_dev;
2193 				dp->d_ino1 = statb1.st_ino;
2194 				dp->d_ino2 = statb2.st_ino;
2195 				if (lflag || opt == D_EDIT)
2196 					goto closem;
2197 				if (Nflag && rflag)
2198 					goto closem;
2199 				(void) printf(gettext(
2200 				    "Common subdirectories: %s and %s\n"),
2201 				    file1, file2);
2202 				goto closem;
2203 
2204 #if	defined(S_IFCHR) || defined(S_IFBLK)
2205 			case S_IFCHR:
2206 #ifdef	S_IFBLK
2207 			case S_IFBLK:
2208 #endif
2209 				if (statb1.st_rdev == statb2.st_rdev)
2210 					goto same;
2211 				(void) printf(gettext(
2212 				    "Special files %s and %s differ\n"),
2213 				    file1, file2);
2214 				break;
2215 #endif
2216 
2217 #ifdef	S_IFLNK
2218 			case S_IFLNK:
2219 				if ((i = readlink(file1, buf1,
2220 							D_BUFSIZ)) == -1) {
2221 					(void) fprintf(stderr, gettext(
2222 					    "diff: cannot read link\n"));
2223 					return (2);
2224 				}
2225 
2226 				if ((j = readlink(file2, buf2,
2227 							D_BUFSIZ)) == -1) {
2228 					(void) fprintf(stderr, gettext(
2229 					    "diff: cannot read link\n"));
2230 					return (2);
2231 				}
2232 
2233 				if (i == j) {
2234 					if (strncmp(buf1, buf2, i) == 0)
2235 						goto same;
2236 				}
2237 
2238 				(void) printf(gettext(
2239 				    "Symbolic links %s and %s differ\n"),
2240 				    file1, file2);
2241 				break;
2242 #endif
2243 
2244 #ifdef	S_IFIFO
2245 			case S_IFIFO:
2246 				if (statb1.st_ino == statb2.st_ino)
2247 					goto same;
2248 				(void) printf(gettext(
2249 				    "Named pipes %s and %s differ\n"),
2250 				    file1, file2);
2251 				break;
2252 #endif
2253 			}
2254 		} else {
2255 			if (lflag) {
2256 				dp->d_flags |= DIFFER;
2257 			} else if (opt == D_NORMAL || opt == D_CONTEXT) {
2258 /*
2259  * TRANSLATION_NOTE
2260  * The second and fourth parameters will take the gettext'ed string
2261  * of one of the following:
2262  * a directory
2263  * a character special file
2264  * a block special file
2265  * a plain file
2266  * a named pipe
2267  * a socket
2268  * a door
2269  * an event port
2270  * an unknown type
2271  */
2272 				(void) printf(
2273 gettext("File %s is %s while file %s is %s\n"),
2274 					file1, pfiletype(fmt1),
2275 					file2, pfiletype(fmt2));
2276 			}
2277 		}
2278 		(void) close(f1); (void) close(f2);
2279 		return (1);
2280 	}
2281 	if (statb1.st_size != statb2.st_size)
2282 		goto notsame;
2283 	for (;;) {
2284 		i = read(f1, buf1, D_BUFSIZ);
2285 		j = read(f2, buf2, D_BUFSIZ);
2286 		if (i < 0 || j < 0) {
2287 			(void) fprintf(stderr, "diff: ");
2288 			(void) fprintf(stderr, gettext("Error reading "));
2289 			perror(i < 0 ? file1: file2);
2290 			(void) close(f1); (void) close(f2);
2291 			return (2);
2292 		}
2293 		if (i != j)
2294 			goto notsame;
2295 		if (i == 0 && j == 0)
2296 			goto same;
2297 #ifdef	HAVE_MEMCMP
2298 		if (memcmp(buf1, buf2, i))
2299 			goto notsame;
2300 #else
2301 		for (j = 0; j < i; j++)
2302 			if (buf1[j] != buf2[j])
2303 				goto notsame;
2304 #endif
2305 	}
2306 same:
2307 	if (sflag == 0)
2308 		goto closem;
2309 	if (lflag)
2310 		dp->d_flags = SAME;
2311 	else
2312 		(void) printf(gettext("Files %s and %s are identical\n"),
2313 			file1, file2);
2314 
2315 closem:
2316 	/*
2317 	 * Check values, it may be directories that we did not open
2318 	 * and thus f1/f2 may be -1.
2319 	 */
2320 	if (f1 >= 0)
2321 		(void) close(f1);
2322 	if (f2 >= 0)
2323 		(void) close(f2);
2324 	return (0);
2325 
2326 notsame:
2327 	if (!aflag &&
2328 	    (binary(f1) || binary(f2))) {
2329 		if (lflag)
2330 			dp->d_flags |= DIFFER;
2331 		else if (opt == D_NORMAL || opt == D_CONTEXT)
2332 			(void) printf(
2333 				gettext("Binary files %s and %s differ\n"),
2334 			    file1, file2);
2335 		(void) close(f1); (void) close(f2);
2336 		return (1);
2337 	}
2338 #ifdef	DO_SPAWN_DIFF
2339 	(void) close(f1); (void) close(f2);
2340 #endif
2341 	anychange = 1;
2342 	if (lflag) {
2343 #ifndef	DO_SPAWN_DIFF
2344 		(void) close(f1); (void) close(f2);
2345 #endif
2346 		result = calldiff(title);
2347 	} else {
2348 		if (opt == D_EDIT)
2349 			(void) printf("ed - %s << '-*-END-*-'\n", dp->d_entry);
2350 		else
2351 			(void) printf("%s%s %s\n", title, file1, file2);
2352 #ifdef	DO_SPAWN_DIFF
2353 		result = calldiff((char *)0);
2354 #else
2355 		stb1 = statb1;
2356 		stb2 = statb2;
2357 		result = calldiffreg(f1, f2);
2358 #endif
2359 		if (opt == D_EDIT)
2360 			(void) printf("w\nq\n-*-END-*-\n");
2361 	}
2362 	return (result);
2363 }
2364 
2365 char	*prargs[] = { "pr", "-h", 0, 0, 0 };
2366 
2367 static int
calldiff(wantpr)2368 calldiff(wantpr)
2369 	char	*wantpr;
2370 {
2371 	const char	*exname;
2372 	pid_t pid;
2373 	int diffstatus, pv[2];
2374 
2375 	prargs[2] = wantpr;
2376 	(void) fflush(stdout);
2377 	if (wantpr) {
2378 		(void) sprintf(etitle, "%s %s", file1, file2);
2379 		(void) pipe(pv);
2380 		pid = vfork();
2381 		if (pid == (pid_t)-1)
2382 			error(gettext(NO_PROCS_ERR));
2383 
2384 		if (pid == 0) {
2385 #ifdef	set_child_standard_fds	/* VMS */
2386 			set_child_standard_fds(pv[0],
2387 						STDOUT_FILENO,
2388 						STDERR_FILENO);
2389 #ifdef	F_SETFD
2390 			fcntl(pv[1], F_SETFD, 1);
2391 #endif
2392 #else			/* ! VMS, below is the code for UNIX */
2393 			(void) dup2(pv[0], STDIN_FILENO);
2394 			(void) close(pv[0]);
2395 			(void) close(pv[1]);
2396 #endif
2397 			(void) execv(pr+4, prargs);
2398 			(void) execv(pr, prargs);
2399 			perror(pr);
2400 #ifdef	HAVE_VFORK
2401 			didvfork = 1;
2402 #endif
2403 			status = 2;
2404 			done();
2405 		}
2406 	}
2407 #ifdef	HAVE_GETEXECNAME
2408 	exname = getexecname();
2409 #else
2410 	exname = getexecpath();
2411 #endif
2412 	pid = vfork();
2413 	if (pid == (pid_t)-1)
2414 		error(gettext(NO_PROCS_ERR));
2415 
2416 	if (pid == 0) {
2417 		if (wantpr) {
2418 			(void) dup2(pv[1], STDOUT_FILENO);
2419 			(void) close(pv[0]);
2420 			(void) close(pv[1]);
2421 		}
2422 		(void) execv(exname, diffargv);
2423 		(void) execv(diff+4, diffargv);
2424 		(void) execv(diff, diffargv);
2425 		perror(diff);
2426 #ifdef	HAVE_VFORK
2427 		didvfork = 1;
2428 #endif
2429 		status = 2;
2430 		done();
2431 	}
2432 	if (wantpr) {
2433 		(void) close(pv[0]);
2434 		(void) close(pv[1]);
2435 	}
2436 	while (wait(&diffstatus) != pid)
2437 		continue;
2438 	while (wait((int *)0) != (pid_t)-1)
2439 		continue;
2440 #ifndef	HAVE_GETEXECNAME
2441 	if (exname)
2442 		free((char *)exname);
2443 #endif
2444 	if (WIFEXITED(diffstatus))
2445 		return (WEXITSTATUS(diffstatus));
2446 	else
2447 		return (2);
2448 }
2449 
2450 #ifdef	PROTOTYPES
2451 static char *
pfiletype(mode_t fmt)2452 pfiletype(mode_t fmt)
2453 #else
2454 static char *
2455 pfiletype(fmt)
2456 	mode_t	fmt;
2457 #endif
2458 {
2459 /*
2460  * TRANSLATION_NOTE
2461  * The following 9 messages will be used in the second and
2462  * the fourth parameters of the message
2463  * "File %s is %s while file %s is %s\n"
2464  */
2465 	switch (fmt) {
2466 
2467 	case S_IFDIR:
2468 		return (gettext("a directory"));
2469 
2470 #ifdef	S_IFCHR
2471 	case S_IFCHR:
2472 		return (gettext("a character special file"));
2473 #endif
2474 
2475 #ifdef	S_IFBLK
2476 	case S_IFBLK:
2477 		return (gettext("a block special file"));
2478 #endif
2479 
2480 	case S_IFREG:
2481 		return (gettext("a plain file"));
2482 
2483 #ifdef	S_IFIFO
2484 	case S_IFIFO:
2485 		return (gettext("a named pipe"));
2486 #endif
2487 
2488 #ifdef	S_IFSOCK
2489 	case S_IFSOCK:
2490 		return (gettext("a socket"));
2491 #endif
2492 
2493 #ifdef	S_IFDOOR
2494 	case S_IFDOOR:
2495 		return (gettext("a door"));
2496 #endif
2497 
2498 #if defined(S_IFPORT) && S_IFPORT != S_IFIFO	/* Do not use it on Ultrix */
2499 	case S_IFPORT:
2500 		return (gettext("an event port"));
2501 #endif
2502 
2503 	default:
2504 		return (gettext("an unknown type"));
2505 	}
2506 }
2507 
2508 static int
binary(f)2509 binary(f)
2510 	int	f;
2511 {
2512 	char buf[D_BUFSIZ];
2513 	int cnt;
2514 
2515 	if (f < 0)
2516 		return (0);
2517 	(void) lseek(f, (off_t)0, SEEK_SET);
2518 	cnt = read(f, buf, D_BUFSIZ);
2519 	if (cnt < 0)
2520 		return (1);
2521 	return (isbinary(buf, cnt));
2522 }
2523 
2524 static int
filebinary(f)2525 filebinary(f)
2526 	FILE	*f;
2527 {
2528 	char buf[D_BUFSIZ];
2529 	int cnt;
2530 
2531 	(void) fseek(f, (off_t)0, SEEK_SET);
2532 	cnt = fread(buf, 1, D_BUFSIZ, f);
2533 	if (ferror(f))
2534 		return (1);
2535 	return (isbinary(buf, cnt));
2536 }
2537 
2538 
2539 /*
2540  * We consider a "binary" file to be one that:
2541  * contains a null character ("diff" doesn't handle them correctly, and
2542  *    neither do many other UNIX text-processing commands).
2543  * Characters with their 8th bit set do NOT make a file binary; they may be
2544  * legitimate text characters, or parts of same.
2545  */
2546 static int
isbinary(buf,cnt)2547 isbinary(buf, cnt)
2548 	char	*buf;
2549 	int	cnt;
2550 {
2551 	char *cp;
2552 
2553 	cp = buf;
2554 	while (--cnt >= 0)
2555 		if (*cp++ == '\0')
2556 			return (1);
2557 	return (0);
2558 }
2559 
2560 
2561 /*
2562  * THIS IS CRUDE.
2563  */
2564 static int
useless(cp)2565 useless(cp)
2566 	char	*cp;
2567 {
2568 
2569 	if (cp[0] == '.') {
2570 		if (cp[1] == '\0')
2571 			return (1);	/* directory "." */
2572 		if (cp[1] == '.' && cp[2] == '\0')
2573 			return (1);	/* directory ".." */
2574 	}
2575 	if (start && strcmp(start, cp) > 0)
2576 		return (1);
2577 	return (0);
2578 }
2579 
2580 
2581 static void
sort(a,n)2582 sort(a, n)				/* shellsort CACM #201 */
2583 	struct line	*a;
2584 	int		n;
2585 {
2586 	struct line w;
2587 	int j, m;
2588 	struct line *ai;
2589 	struct line *aim;
2590 	int k;
2591 
2592 	for (j = 1, m = 0; j <= n; j *= 2)
2593 		m = 2 * j - 1;
2594 	for (m /= 2; m != 0; m /= 2) {
2595 		k = n - m;
2596 		for (j = 1; j <= k; j++) {
2597 			for (ai = &a[j]; ai > a; ai -= m) {
2598 				aim = &ai[m];
2599 				if (aim < ai)
2600 					break;	/* wraparound */
2601 				if (aim->value > ai[0].value ||
2602 				    (aim->value == ai[0].value &&
2603 				    aim->serial > ai[0].serial))
2604 					break;
2605 				w.value = ai[0].value;
2606 				ai[0].value = aim->value;
2607 				aim->value = w.value;
2608 				w.serial = ai[0].serial;
2609 				ai[0].serial = aim->serial;
2610 				aim->serial = w.serial;
2611 			}
2612 		}
2613 	}
2614 }
2615 
2616 static void
unsort(f,l,b)2617 unsort(f, l, b)
2618 	struct line	*f;
2619 	int		l;
2620 	int		*b;
2621 {
2622 	int *a;
2623 	int i;
2624 
2625 	a = (int *)talloc((l + 1) * sizeof (int));
2626 	for (i = 1; i <= l; i++)
2627 		a[f[i].serial] = f[i].value;
2628 	for (i = 1; i <= l; i++)
2629 		b[i] = a[i];
2630 	free((char *)a);
2631 }
2632 
2633 static void
filename(pa1,pa2,st,ifile)2634 filename(pa1, pa2, st, ifile)
2635 	char		**pa1;
2636 	char		**pa2;
2637 	struct stat	*st;
2638 	char		**ifile;
2639 {
2640 	char *a1, *b1, *a2;
2641 
2642 	a1 = *pa1;
2643 	a2 = *pa2;
2644 
2645 	if (*ifile)
2646 		free(*ifile);
2647 
2648 	if (strcmp(*pa1, "-") == 0)
2649 		*ifile = strdup("-");
2650 	else
2651 		*ifile = strdup(*pa1);
2652 
2653 	if (*ifile == (char *)NULL) {
2654 		(void) fprintf(stderr, gettext(
2655 			"no more memory - try again later\n"));
2656 		status = 2;
2657 		done();
2658 	}
2659 
2660 	if ((st->st_mode & S_IFMT) == S_IFDIR) {
2661 		b1 = *pa1 = (char *)malloc(PATH_MAX);
2662 		if (b1 == (char *)NULL) {
2663 			(void) fprintf(stderr, gettext(
2664 				"no more memory - try again later\n"));
2665 			status = 2;
2666 			done();
2667 		}
2668 		while ((*b1++ = *a1++) != '\0')
2669 			;
2670 		b1[-1] = '/';
2671 		a1 = b1;
2672 		while ((*a1++ = *a2++) != '\0')
2673 			if (*a2 && *a2 != '/' && a2[-1] == '/')
2674 				a1 = b1;
2675 		free(*ifile);
2676 		*ifile = strdup(*pa1);
2677 
2678 		if (*ifile == (char *)NULL) {
2679 			(void) fprintf(stderr, gettext(
2680 				"no more memory - try again later\n"));
2681 			status = 2;
2682 			done();
2683 		}
2684 
2685 		if (stat(*pa1, st) < 0) {
2686 			(void) fprintf(stderr, "diff: ");
2687 			perror(*pa1);
2688 			status = 2;
2689 			done();
2690 		}
2691 #if  defined(S_IFBLK)
2692 	} else if ((st->st_mode & S_IFMT) != S_IFREG &&
2693 		    (st->st_mode & S_IFMT) != S_IFBLK) {
2694 #else
2695 	} else if ((st->st_mode & S_IFMT) != S_IFREG) {
2696 #endif
2697 		*pa1 = copytemp(a1);
2698 	} else if (a1[0] == '-' && a1[1] == 0) {
2699 		*pa1 = copytemp(a1);	/* hack! */
2700 		if (stat(*pa1, st) < 0) {
2701 			(void) fprintf(stderr, "diff: ");
2702 			perror(*pa1);
2703 			status = 2;
2704 			done();
2705 		}
2706 	}
2707 }
2708 
2709 static char *
copytemp(fn)2710 copytemp(fn)
2711 	char	*fn;
2712 {
2713 	int ifd, ofd;	/* input and output file descriptors */
2714 	int i;
2715 	char template[13];	/* template for temp file name */
2716 	char buf[D_BUFSIZ];
2717 
2718 	/*
2719 	 * a "-" file is interpreted as fd 0 for pre-/dev/fd systems
2720 	 * ... let's hope this goes away soon!
2721 	 */
2722 	if ((ifd = (strcmp(fn, "-") ? open(fn, O_RDONLY|O_BINARY) : 0)) < 0) {
2723 		(void) fprintf(stderr, "diff: ");
2724 		(void) fprintf(stderr, gettext("cannot open %s\n"), fn);
2725 		done();
2726 	}
2727 #if O_BINARY
2728 	if (ifd == STDIN_FILENO)
2729 		setmode(ifd, O_BINARY);
2730 #endif
2731 #ifdef	SIGHUP
2732 	(void) signal(SIGHUP, (void (*) __PR((int)))done);
2733 #endif
2734 #ifdef	SIGINT
2735 	(void) signal(SIGINT, (void (*) __PR((int)))done);
2736 #endif
2737 #ifdef	SIGPIPE
2738 	(void) signal(SIGPIPE, (void (*) __PR((int)))done);
2739 #endif
2740 #ifdef	SIGTERM
2741 	(void) signal(SIGTERM, (void (*) __PR((int)))done);
2742 #endif
2743 	(void) strcpy(template, "/tmp/dXXXXXX");
2744 	if ((ofd = mkstemp(template)) < 0) {
2745 		(void) fprintf(stderr, "diff: ");
2746 		(void) fprintf(stderr, gettext("cannot create %s\n"), template);
2747 		done();
2748 	}
2749 #if O_BINARY
2750 	setmode(ofd, O_BINARY);
2751 #endif
2752 	(void) strcpy(tempfile[whichtemp++], template);
2753 	while ((i = read(ifd, buf, D_BUFSIZ)) > 0)
2754 		if (write(ofd, buf, i) != i) {
2755 			(void) fprintf(stderr, "diff: ");
2756 			(void) fprintf(stderr,
2757 				gettext("write failed %s\n"), template);
2758 			done();
2759 		}
2760 	(void) close(ifd); (void) close(ofd);
2761 	return (tempfile[whichtemp-1]);
2762 }
2763 
2764 static int
prepare(i,arg)2765 prepare(i, arg)
2766 	int	i;
2767 	char	*arg;
2768 {
2769 	struct line *p;
2770 	int j, h;
2771 	size_t	isize;
2772 
2773 	/*
2774 	 * Average line length is aprox. 35 chars.
2775 	 */
2776 	isize = (i == 0 ? stb1.st_size : stb2.st_size) / 35;
2777 	if (isize > 1000000)
2778 		isize = 1000000;
2779 	if (isize < 64)
2780 		isize = 64;
2781 
2782 	if (fseek(input[i], (off_t)0, SEEK_SET) < 0) {
2783 		perror(arg);
2784 		return (1);
2785 	}
2786 	p = (struct line *)talloc((isize + 3) * sizeof (line));
2787 	for (j = 0; (h = readhash(input[i], i, arg)) != 0; ) {
2788 		if (j >= isize) {
2789 			isize += 64;
2790 			p = (struct line *)ralloc((void *)p,
2791 					(isize + 3) * sizeof (line));
2792 		}
2793 		p[++j].value = h;
2794 	}
2795 	len[i] = j;
2796 	file[i] = p;
2797 	return (0);
2798 }
2799 
2800 static void
prune()2801 prune()
2802 {
2803 	int i, j;
2804 
2805 	/*
2806 	 * Compute the unmodified common prefix, the # of common lines at the
2807 	 * the beginning of both files.
2808 	 */
2809 	for (pref = 0; pref < len[0] && pref < len[1] &&
2810 	    file[0][pref + 1].value == file[1][pref + 1].value;
2811 	    pref++)
2812 		;
2813 
2814 	/*
2815 	 * Compute the unmodified common suffix, the # of common lines at the
2816 	 * the end of both files.
2817 	 */
2818 	for (suff = 0; (suff < len[0] - pref) &&
2819 	    (suff < len[1] - pref) &&
2820 	    (file[0][len[0] - suff].value ==
2821 	    file[1][len[1] - suff].value);
2822 	    suff++)
2823 		;
2824 
2825 	/*
2826 	 * The folowing change is supposed (by Sun Microsystems) to fix
2827 	 * a problem with diff3 that is claimed to silently mismerge files.
2828 	 * If we however enable this code, the stone algorithm to find the
2829 	 * longest identical subsequences does not always work and a resync
2830 	 * may occur with a single common line already.
2831 	 * Even though this looks like a bug, we leave the code for now
2832 	 * as it helps to get the same output as from the simpler udiff(1).
2833 	 */
2834 #if 1
2835 	/* decremnt suff by 2 iff suff >= 2, ensure that suff is never < 0 */
2836 	if (suff >= 2)
2837 		suff -= 2;
2838 #endif
2839 
2840 	for (j = 0; j < 2; j++) {
2841 		sfile[j] = file[j] + pref;
2842 		slen[j] = len[j] - pref - suff;
2843 		for (i = 0; i <= slen[j]; i++)
2844 			sfile[j][i].serial = i;
2845 	}
2846 }
2847 
2848 static void
equiv(a,n,b,m,c)2849 equiv(a, n, b, m, c)
2850 	struct line	*a;
2851 	int		n;
2852 	struct line	*b;
2853 	int		m;
2854 	int		*c;
2855 {
2856 	int i, j;
2857 	i = j = 1;
2858 	while (i <= n && j <= m) {
2859 		if (a[i].value < b[j].value)
2860 			a[i++].value = 0;
2861 		else if (a[i].value == b[j].value)
2862 			a[i++].value = j;
2863 		else
2864 			j++;
2865 	}
2866 	while (i <= n)
2867 		a[i++].value = 0;
2868 	b[m+1].value = 0;	j = 0;
2869 	while (++j <= m) {
2870 		c[j] = -b[j].serial;
2871 		while (b[j + 1].value == b[j].value) {
2872 			j++;
2873 			c[j] = b[j].serial;
2874 		}
2875 	}
2876 	c[j] = -1;
2877 }
2878 
2879 static void
done()2880 done()
2881 {
2882 	if (whichtemp) (void) unlink(tempfile[0]);
2883 	if (whichtemp == 2) (void) unlink(tempfile[1]);
2884 	if (didvfork)
2885 		_exit(status);
2886 	exit(status);
2887 }
2888 
2889 static void
noroom()2890 noroom()
2891 {
2892 	(void) fprintf(stderr, "diff: ");
2893 	(void) fprintf(stderr, gettext("files too big, try -h\n"));
2894 	done();
2895 }
2896 
2897 static void
error(s)2898 error(s)
2899 	const char *s;
2900 {
2901 	(void) fprintf(stderr, "diff: ");
2902 	(void) fprintf(stderr, "%s", s);
2903 	(void) fprintf(stderr, "\n");
2904 	done();
2905 }
2906 
2907 static void
usage()2908 usage()
2909 {
2910 	(void) fprintf(stderr, gettext(
2911 	"usage: diff [-abBiNptw] [-c | -e | -f | -h | -n | -q | -u] \
2912 file1 file2\n\
2913        diff [-abBiNptw] [-C number | -U number] file1 file2\n\
2914        diff [-abBiNptw] [-D string] file1 file2\n\
2915        diff [-abBiNptw] [-c | -e | -f | -h | -n | -q | -u] [-l] [-r] \
2916 [-s] [-S name] directory1 directory2\n"));
2917 	status = 2;
2918 	done();
2919 }
2920 
2921 #define	NW	1024
2922 struct buff	{
2923 	FILE	*iop;			/* I/O stream */
2924 	char	buf[NW + MB_LEN_MAX];	/* buffer */
2925 	char	*ptr;			/* current pointer in the buffer */
2926 	int	buffered;		/* if non-zero, buffer has data */
2927 	off_t	offset;			/* offset in the file */
2928 };
2929 
2930 static struct buff bufwchar[2];
2931 
2932 /*
2933  *	Initializes the buff structure for specified
2934  *	I/O stream.  Also sets the specified offset
2935  */
2936 static void
initbuf(iop,filen,offset)2937 initbuf(iop, filen, offset)
2938 	FILE	*iop;
2939 	int	filen;
2940 	off_t	offset;
2941 {
2942 	bufwchar[filen].iop = iop;
2943 	bufwchar[filen].ptr = NULL;
2944 	bufwchar[filen].buffered = 0;
2945 	bufwchar[filen].offset = offset;
2946 }
2947 
2948 /*
2949  * 	Reset a buff structure, and rewind the associated file.
2950  */
2951 static void
resetbuf(filen)2952 resetbuf(filen)
2953 	int	filen;
2954 {
2955 	bufwchar[filen].ptr = NULL;
2956 	bufwchar[filen].buffered = bufwchar[filen].offset = 0;
2957 	rewind(bufwchar[filen].iop);
2958 }
2959 
2960 
2961 /*
2962  *	Returns the current offset in the file
2963  */
2964 static off_t
ftellbuf(filen)2965 ftellbuf(filen)
2966 	int	filen;
2967 {
2968 	return (bufwchar[filen].offset);
2969 }
2970 
2971 static wint_t
wcput(wc)2972 wcput(wc)
2973 	wint_t	wc;
2974 {
2975 	char	mbs[MB_LEN_MAX];
2976 	unsigned char	*p;
2977 	int	n;
2978 
2979 	n = wctomb(mbs, (wchar_t)wc);
2980 	if (n > 0) {
2981 		p = (unsigned char *)mbs;
2982 		while (n--) {
2983 			(void) putc((*p++), stdout);
2984 		}
2985 		return (wc);
2986 	} else if (n < 0) {
2987 		(void) putc((int)(wc & 0xff), stdout);
2988 		return (wc & 0xff);
2989 	} else {
2990 		/* this should not happen */
2991 		return (WEOF);
2992 	}
2993 }
2994 
2995 /*
2996  *	Reads one wide-character from the file associated with filen.
2997  *	If multibyte locales, the input is buffered.
2998  *
2999  *	Input:	filen	the file number (0 or 1)
3000  *	Output:	*blen	number of bytes to make wide-character
3001  *	Return:			wide-character
3002  */
3003 static wint_t
getbufwchar(filen,blen)3004 getbufwchar(filen, blen)
3005 	int	filen;
3006 	int	*blen;
3007 {
3008 
3009 	int	i, num, chlen;
3010 	wchar_t	wc;
3011 	size_t	mxlen;
3012 
3013 	if (mbcurmax == 1) {
3014 		/* If sigle byte locale, use getc() */
3015 		int	ch;
3016 
3017 		ch = getc(bufwchar[filen].iop);
3018 		bufwchar[filen].offset++;
3019 		*blen = 1;
3020 
3021 		if (isascii(ch) || (ch == EOF)) {
3022 			return ((wint_t)ch);
3023 		} else {
3024 			wchar_t	wch;
3025 			char	str[2];
3026 
3027 			str[0] = (char)ch;
3028 			str[1] = '\0';
3029 			if (mbtowc(&wch, str, 1) > 0) {
3030 				return ((wint_t)wch);
3031 			} else {
3032 				return ((wint_t)ch);
3033 			}
3034 		}
3035 	} else {
3036 		mxlen = mbcurmax;
3037 	}
3038 
3039 	if (bufwchar[filen].buffered == 0) {
3040 		/* Not buffered */
3041 		bufwchar[filen].ptr = &(bufwchar[filen].buf[MB_LEN_MAX]);
3042 		num = fread((void *)bufwchar[filen].ptr,
3043 			sizeof (char), NW, bufwchar[filen].iop);
3044 		if (ferror(bufwchar[filen].iop)) {
3045 			(void) fprintf(stderr, "diff: ");
3046 			(void) fprintf(stderr, gettext("Error reading "));
3047 			perror((filen == 0) ? file1 : file2);
3048 			status = 2;
3049 			done();
3050 		}
3051 		if (num == 0)
3052 			return (WEOF);
3053 		bufwchar[filen].buffered = num;
3054 	}
3055 
3056 	if (bufwchar[filen].buffered < mbcurmax) {
3057 		for (i = 0; i < bufwchar[filen].buffered; i++) {
3058 			bufwchar[filen].buf[MB_LEN_MAX -
3059 				(bufwchar[filen].buffered - i)] =
3060 				*(bufwchar[filen].ptr + i);
3061 		}
3062 		bufwchar[filen].ptr = &(bufwchar[filen].buf[MB_LEN_MAX]);
3063 		num = fread((void *)bufwchar[filen].ptr,
3064 			sizeof (char), NW, bufwchar[filen].iop);
3065 		if (ferror(bufwchar[filen].iop)) {
3066 			(void) fprintf(stderr, "diff: ");
3067 			(void) fprintf(stderr, gettext("Error reading "));
3068 			perror((filen == 0) ? file1 : file2);
3069 			status = 2;
3070 			done();
3071 		}
3072 		bufwchar[filen].ptr = &(bufwchar[filen].buf[MB_LEN_MAX -
3073 				bufwchar[filen].buffered]);
3074 		bufwchar[filen].buffered += num;
3075 		if (bufwchar[filen].buffered < mbcurmax) {
3076 			mxlen = bufwchar[filen].buffered;
3077 		}
3078 	}
3079 
3080 	chlen = mbtowc(&wc, bufwchar[filen].ptr, mxlen);
3081 	if (chlen <= 0) {
3082 		(bufwchar[filen].buffered)--;
3083 		*blen = 1;
3084 		(bufwchar[filen].offset)++;
3085 		wc = (wchar_t)((unsigned char)*bufwchar[filen].ptr++);
3086 		return ((wint_t)wc);
3087 	} else {
3088 		bufwchar[filen].buffered -= chlen;
3089 		bufwchar[filen].ptr += chlen;
3090 		bufwchar[filen].offset += chlen;
3091 		*blen = chlen;
3092 		return ((wint_t)wc);
3093 	}
3094 }
3095 
3096 #if !defined(HAVE_CFTIME) && defined(HAVE_STRFTIME)
3097 static time_t
gmtoff(clk)3098 gmtoff(clk)
3099 	const time_t	*clk;
3100 {
3101 	struct tm	local;
3102 	struct tm	gmt;
3103 	time_t		crtime;
3104 
3105 	local = *localtime(clk);
3106 	gmt   = *gmtime(clk);
3107 
3108 	local.tm_sec  -= gmt.tm_sec;
3109 	local.tm_min  -= gmt.tm_min;
3110 	local.tm_hour -= gmt.tm_hour;
3111 	local.tm_yday -= gmt.tm_yday;
3112 	local.tm_year -= gmt.tm_year;
3113 	if (local.tm_year)		/* Hit new-year limit	*/
3114 		local.tm_yday = local.tm_year;	/* yday = +-1	*/
3115 
3116 	crtime = local.tm_sec + 60 *
3117 		    (local.tm_min + 60 *
3118 			(local.tm_hour + 24 * local.tm_yday));
3119 
3120 	return (crtime);
3121 }
3122 #endif
3123 
3124 #define	DO2(p, n, c)	*p++ = ((char)((n)/10) + '0'); *p++ = \
3125 					((char)((n)%10) + '0'); *p++ = c;
3126 
3127 #define	DO2_(p, n)	*p++ = ((char)((n)/10) + '0'); *p++ = \
3128 					((char)((n)%10) + '0');
3129 
3130 static void
cf_time(s,maxsize,fmt,clk)3131 cf_time(s, maxsize, fmt, clk)
3132 	char		*s;
3133 	size_t		maxsize;
3134 	char		*fmt;
3135 	const time_t	*clk;
3136 {
3137 #ifdef	HAVE_CFTIME
3138 	cftime(s, fmt, clk);
3139 #else
3140 #ifdef	HAVE_STRFTIME
3141 	struct	tm	*tp = localtime(clk);
3142 	char		*p;
3143 
3144 	strftime(s, maxsize, fmt, tp);
3145 	/*
3146 	 * HP/UX implements %z as %Z and we need to correct this...
3147 	 */
3148 	p = fmt + strlen(fmt);
3149 	if (*--p != 'z')
3150 		return;
3151 	p = strrchr(s, ' ');
3152 	if (p++) {
3153 		if (*p != '+' && *p != '-') {
3154 			register int	z;
3155 			register int	n;
3156 
3157 			z = gmtoff(clk) / 60;	/* seconds -> minutes */
3158 			if (z < 0) {
3159 				*p++ = '-';
3160 				z = -z;
3161 			} else {
3162 				*p++ = '+';
3163 			}
3164 			n = z / 60;
3165 			DO2_(p, n);
3166 			n = z % 60;
3167 			DO2(p, n, 0);
3168 		}
3169 	}
3170 #else
3171 	/*
3172 	 * This is not the correct time format, but we are not on a POSIX
3173 	 * platform and need to do the best we can.
3174 	 */
3175 	strlcpy(s, ctime(clk), maxsize);
3176 	if (maxsize > 24)
3177 		s[24] = '\0';
3178 #endif
3179 #endif
3180 }
3181 
3182 /*
3183  * The next function has been imported from OpenBSD.
3184  * Original author is: Otto Moerbeek <otto@drijf.net>
3185  */
3186 #define	begins_with(s, pre)	(strncmp(s, pre, sizeof (pre)-1) == 0)
3187 #define	C			(char *)
3188 
3189 static char *
match_function(f,pos,filen)3190 match_function(f, pos, filen)
3191 	const off_t	*f;
3192 	int		pos;
3193 	int		filen;
3194 {
3195 	unsigned char	buf[FUNCTION_CONTEXT_SIZE];
3196 	size_t		nc;
3197 	int		last = lastline;
3198 	char		*state = NULL;
3199 	FILE		*fp = bufwchar[filen].iop;
3200 	off_t		off;
3201 
3202 	off = ftellbuf(filen);
3203 	lastline = pos;
3204 	while (pos > last) {
3205 		fseek(fp, f[pos - 1], SEEK_SET);
3206 		nc = f[pos] - f[pos - 1];
3207 		if (nc >= sizeof (buf))
3208 			nc = sizeof (buf) - 1;
3209 		nc = fread(buf, 1, nc, fp);
3210 		if (nc > 0) {
3211 			buf[nc] = '\0';
3212 			buf[strcspn(C buf, "\n")] = '\0';
3213 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
3214 				if (begins_with(C buf, "private:")) {
3215 					if (!state)
3216 						state = " (private)";
3217 				} else if (begins_with(C buf, "protected:")) {
3218 					if (!state)
3219 						state = " (protected)";
3220 				} else if (begins_with(C buf, "public:")) {
3221 					if (!state)
3222 						state = " (public)";
3223 				} else {
3224 					strlcpy(lastbuf, C buf,
3225 					    sizeof (lastbuf));
3226 
3227 					if (state)
3228 						strlcat(lastbuf, state,
3229 						    sizeof (lastbuf));
3230 					lastmatchline = pos;
3231 					initbuf(fp, filen, off);
3232 					return (lastbuf);
3233 				}
3234 			}
3235 		}
3236 		pos--;
3237 	}
3238 	initbuf(fp, filen, off);
3239 	return (lastmatchline > 0 ? lastbuf : NULL);
3240 }
3241