1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.proprietary.c%
6 */
7
8 #ifndef lint
9 static char sccsid[] = "@(#)diffreg.c 8.1 (Berkeley) 06/06/93";
10 #endif /* not lint */
11
12 #include "diff.h"
13 #include "pathnames.h"
14 /*
15 * diff - compare two files.
16 */
17
18 /*
19 * Uses an algorithm due to Harold Stone, which finds
20 * a pair of longest identical subsequences in the two
21 * files.
22 *
23 * The major goal is to generate the match vector J.
24 * J[i] is the index of the line in file1 corresponding
25 * to line i file0. J[i] = 0 if there is no
26 * such line in file1.
27 *
28 * Lines are hashed so as to work in core. All potential
29 * matches are located by sorting the lines of each file
30 * on the hash (called ``value''). In particular, this
31 * collects the equivalence classes in file1 together.
32 * Subroutine equiv replaces the value of each line in
33 * file0 by the index of the first element of its
34 * matching equivalence in (the reordered) file1.
35 * To save space equiv squeezes file1 into a single
36 * array member in which the equivalence classes
37 * are simply concatenated, except that their first
38 * members are flagged by changing sign.
39 *
40 * Next the indices that point into member are unsorted into
41 * array class according to the original order of file0.
42 *
43 * The cleverness lies in routine stone. This marches
44 * through the lines of file0, developing a vector klist
45 * of "k-candidates". At step i a k-candidate is a matched
46 * pair of lines x,y (x in file0 y in file1) such that
47 * there is a common subsequence of length k
48 * between the first i lines of file0 and the first y
49 * lines of file1, but there is no such subsequence for
50 * any smaller y. x is the earliest possible mate to y
51 * that occurs in such a subsequence.
52 *
53 * Whenever any of the members of the equivalence class of
54 * lines in file1 matable to a line in file0 has serial number
55 * less than the y of some k-candidate, that k-candidate
56 * with the smallest such y is replaced. The new
57 * k-candidate is chained (via pred) to the current
58 * k-1 candidate so that the actual subsequence can
59 * be recovered. When a member has serial number greater
60 * that the y of all k-candidates, the klist is extended.
61 * At the end, the longest subsequence is pulled out
62 * and placed in the array J by unravel
63 *
64 * With J in hand, the matches there recorded are
65 * check'ed against reality to assure that no spurious
66 * matches have crept in due to hashing. If they have,
67 * they are broken, and "jackpot" is recorded--a harmless
68 * matter except that a true match for a spuriously
69 * mated line may now be unnecessarily reported as a change.
70 *
71 * Much of the complexity of the program comes simply
72 * from trying to minimize core utilization and
73 * maximize the range of doable problems by dynamically
74 * allocating what is needed and reusing what is not.
75 * The core requirements for problems larger than somewhat
76 * are (in words) 2*length(file0) + length(file1) +
77 * 3*(number of k-candidates installed), typically about
78 * 6n words for files of length n.
79 */
80
81 #define prints(s) fputs(s,stdout)
82
83 FILE *input[2];
84 FILE *fopen();
85
86 struct cand {
87 int x;
88 int y;
89 int pred;
90 } cand;
91 struct line {
92 int serial;
93 int value;
94 } *file[2], line;
95 int len[2];
96 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */
97 int slen[2];
98 int pref, suff; /* length of prefix and suffix */
99 int *class; /* will be overlaid on file[0] */
100 int *member; /* will be overlaid on file[1] */
101 int *klist; /* will be overlaid on file[0] after class */
102 struct cand *clist; /* merely a free storage pot for candidates */
103 int clen = 0;
104 int *J; /* will be overlaid on class */
105 long *ixold; /* will be overlaid on klist */
106 long *ixnew; /* will be overlaid on file[1] */
107 char *chrtran; /* translation table for case-folding */
108
109 /* chrtran points to one of 2 translation tables:
110 * cup2low if folding upper to lower case
111 * clow2low if not folding case
112 */
113 char clow2low[256] = {
114 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
115 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
116 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
117 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
118 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
119 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
120 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
121 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
122 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
123 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
124 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
125 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
126 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
127 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
128 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
129 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
130 };
131
132 char cup2low[256] = {
133 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
134 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
135 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
136 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
137 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
138 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
139 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
140 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
141 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
142 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
143 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
144 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
145 0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
146 0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
147 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
148 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
149 };
150
diffreg()151 diffreg()
152 {
153 register int i, j;
154 FILE *f1, *f2;
155 char buf1[BUFSIZ], buf2[BUFSIZ];
156
157 if (hflag) {
158 diffargv[0] = "diffh";
159 execv(diffh, diffargv);
160 fprintf(stderr, "diff: ");
161 perror(diffh);
162 done();
163 }
164 chrtran = (iflag? cup2low : clow2low);
165 if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
166 file1 = splice(file1, file2);
167 if (stat(file1, &stb1) < 0) {
168 fprintf(stderr, "diff: ");
169 perror(file1);
170 done();
171 }
172 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
173 file2 = splice(file2, file1);
174 if (stat(file2, &stb2) < 0) {
175 fprintf(stderr, "diff: ");
176 perror(file2);
177 done();
178 }
179 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
180 if (!strcmp(file2, "-")) {
181 fprintf(stderr, "diff: can't specify - -\n");
182 done();
183 }
184 file1 = copytemp();
185 if (stat(file1, &stb1) < 0) {
186 fprintf(stderr, "diff: ");
187 perror(file1);
188 done();
189 }
190 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
191 file2 = copytemp();
192 if (stat(file2, &stb2) < 0) {
193 fprintf(stderr, "diff: ");
194 perror(file2);
195 done();
196 }
197 }
198 if ((f1 = fopen(file1, "r")) == NULL) {
199 fprintf(stderr, "diff: ");
200 perror(file1);
201 done();
202 }
203 if ((f2 = fopen(file2, "r")) == NULL) {
204 fprintf(stderr, "diff: ");
205 perror(file2);
206 fclose(f1);
207 done();
208 }
209 if (stb1.st_size != stb2.st_size)
210 goto notsame;
211 for (;;) {
212 i = fread(buf1, 1, BUFSIZ, f1);
213 j = fread(buf2, 1, BUFSIZ, f2);
214 if (i < 0 || j < 0 || i != j)
215 goto notsame;
216 if (i == 0 && j == 0) {
217 fclose(f1);
218 fclose(f2);
219 status = 0; /* files don't differ */
220 goto same;
221 }
222 for (j = 0; j < i; j++)
223 if (buf1[j] != buf2[j])
224 goto notsame;
225 }
226 notsame:
227 /*
228 * Files certainly differ at this point; set status accordingly
229 */
230 status = 1;
231 if (!asciifile(f1) || !asciifile(f2)) {
232 printf("Binary files %s and %s differ\n", file1, file2);
233 fclose(f1);
234 fclose(f2);
235 done();
236 }
237 prepare(0, f1);
238 prepare(1, f2);
239 fclose(f1);
240 fclose(f2);
241 prune();
242 sort(sfile[0],slen[0]);
243 sort(sfile[1],slen[1]);
244
245 member = (int *)file[1];
246 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
247 member = (int *)ralloc((char *)member,(slen[1]+2)*sizeof(int));
248
249 class = (int *)file[0];
250 unsort(sfile[0], slen[0], class);
251 class = (int *)ralloc((char *)class,(slen[0]+2)*sizeof(int));
252
253 klist = (int *)talloc((slen[0]+2)*sizeof(int));
254 clist = (struct cand *)talloc(sizeof(cand));
255 i = stone(class, slen[0], member, klist);
256 free((char *)member);
257 free((char *)class);
258
259 J = (int *)talloc((len[0]+2)*sizeof(int));
260 unravel(klist[i]);
261 free((char *)clist);
262 free((char *)klist);
263
264 ixold = (long *)talloc((len[0]+2)*sizeof(long));
265 ixnew = (long *)talloc((len[1]+2)*sizeof(long));
266 check();
267 output();
268 status = anychange;
269 same:
270 if (opt == D_CONTEXT && anychange == 0)
271 printf("No differences encountered\n");
272 done();
273 }
274
275 char tempfile[] = _PATH_TMP;
276
277 char *
copytemp()278 copytemp()
279 {
280 char buf[BUFSIZ];
281 register int i, f;
282
283 signal(SIGHUP,done);
284 signal(SIGINT,done);
285 signal(SIGPIPE,done);
286 signal(SIGTERM,done);
287 mktemp(tempfile);
288 f = creat(tempfile,0600);
289 if (f < 0) {
290 fprintf(stderr, "diff: ");
291 perror(tempfile);
292 done();
293 }
294 while ((i = read(0,buf,BUFSIZ)) > 0)
295 if (write(f,buf,i) != i) {
296 fprintf(stderr, "diff: ");
297 perror(tempfile);
298 done();
299 }
300 close(f);
301 return (tempfile);
302 }
303
304 char *
splice(dir,file)305 splice(dir, file)
306 char *dir, *file;
307 {
308 char *tail;
309 char buf[BUFSIZ];
310
311 if (!strcmp(file, "-")) {
312 fprintf(stderr, "diff: can't specify - with other arg directory\n");
313 done();
314 }
315 tail = rindex(file, '/');
316 if (tail == 0)
317 tail = file;
318 else
319 tail++;
320 (void)sprintf(buf, "%s/%s", dir, tail);
321 return (savestr(buf));
322 }
323
prepare(i,fd)324 prepare(i, fd)
325 int i;
326 FILE *fd;
327 {
328 register struct line *p;
329 register j,h;
330
331 fseek(fd, (long)0, 0);
332 p = (struct line *)talloc(3*sizeof(line));
333 for(j=0; h=readhash(fd);) {
334 p = (struct line *)ralloc((char *)p,(++j+3)*sizeof(line));
335 p[j].value = h;
336 }
337 len[i] = j;
338 file[i] = p;
339 }
340
prune()341 prune()
342 {
343 register i,j;
344 for(pref=0;pref<len[0]&&pref<len[1]&&
345 file[0][pref+1].value==file[1][pref+1].value;
346 pref++ ) ;
347 for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
348 file[0][len[0]-suff].value==file[1][len[1]-suff].value;
349 suff++) ;
350 for(j=0;j<2;j++) {
351 sfile[j] = file[j]+pref;
352 slen[j] = len[j]-pref-suff;
353 for(i=0;i<=slen[j];i++)
354 sfile[j][i].serial = i;
355 }
356 }
357
358 equiv(a,n,b,m,c)
359 struct line *a, *b;
360 int *c;
361 {
362 register int i, j;
363 i = j = 1;
364 while(i<=n && j<=m) {
365 if(a[i].value <b[j].value)
366 a[i++].value = 0;
367 else if(a[i].value == b[j].value)
368 a[i++].value = j;
369 else
370 j++;
371 }
372 while(i <= n)
373 a[i++].value = 0;
374 b[m+1].value = 0;
375 j = 0;
376 while(++j <= m) {
377 c[j] = -b[j].serial;
378 while(b[j+1].value == b[j].value) {
379 j++;
380 c[j] = b[j].serial;
381 }
382 }
383 c[j] = -1;
384 }
385
stone(a,n,b,c)386 stone(a,n,b,c)
387 int *a;
388 int *b;
389 register int *c;
390 {
391 register int i, k,y;
392 int j, l;
393 int oldc, tc;
394 int oldl;
395 k = 0;
396 c[0] = newcand(0,0,0);
397 for(i=1; i<=n; i++) {
398 j = a[i];
399 if(j==0)
400 continue;
401 y = -b[j];
402 oldl = 0;
403 oldc = c[0];
404 do {
405 if(y <= clist[oldc].y)
406 continue;
407 l = search(c, k, y);
408 if(l!=oldl+1)
409 oldc = c[l-1];
410 if(l<=k) {
411 if(clist[c[l]].y <= y)
412 continue;
413 tc = c[l];
414 c[l] = newcand(i,y,oldc);
415 oldc = tc;
416 oldl = l;
417 } else {
418 c[l] = newcand(i,y,oldc);
419 k++;
420 break;
421 }
422 } while((y=b[++j]) > 0);
423 }
424 return(k);
425 }
426
newcand(x,y,pred)427 newcand(x,y,pred)
428 {
429 register struct cand *q;
430 clist = (struct cand *)ralloc((char *)clist,++clen*sizeof(cand));
431 q = clist + clen -1;
432 q->x = x;
433 q->y = y;
434 q->pred = pred;
435 return(clen-1);
436 }
437
search(c,k,y)438 search(c, k, y)
439 int *c;
440 {
441 register int i, j, l;
442 int t;
443 if(clist[c[k]].y<y) /*quick look for typical case*/
444 return(k+1);
445 i = 0;
446 j = k+1;
447 while (1) {
448 l = i + j;
449 if ((l >>= 1) <= i)
450 break;
451 t = clist[c[l]].y;
452 if(t > y)
453 j = l;
454 else if(t < y)
455 i = l;
456 else
457 return(l);
458 }
459 return(l+1);
460 }
461
unravel(p)462 unravel(p)
463 {
464 register int i;
465 register struct cand *q;
466 for(i=0; i<=len[0]; i++)
467 J[i] = i<=pref ? i:
468 i>len[0]-suff ? i+len[1]-len[0]:
469 0;
470 for(q=clist+p;q->y!=0;q=clist+q->pred)
471 J[q->x+pref] = q->y+pref;
472 }
473
474 /* check does double duty:
475 1. ferret out any fortuitous correspondences due
476 to confounding by hashing (which result in "jackpot")
477 2. collect random access indexes to the two files */
478
check()479 check()
480 {
481 register int i, j;
482 int jackpot;
483 long ctold, ctnew;
484 register int c,d;
485
486 if ((input[0] = fopen(file1,"r")) == NULL) {
487 perror(file1);
488 done();
489 }
490 if ((input[1] = fopen(file2,"r")) == NULL) {
491 perror(file2);
492 done();
493 }
494 j = 1;
495 ixold[0] = ixnew[0] = 0;
496 jackpot = 0;
497 ctold = ctnew = 0;
498 for(i=1;i<=len[0];i++) {
499 if(J[i]==0) {
500 ixold[i] = ctold += skipline(0);
501 continue;
502 }
503 while(j<J[i]) {
504 ixnew[j] = ctnew += skipline(1);
505 j++;
506 }
507 if(bflag || wflag || iflag) {
508 for(;;) {
509 c = getc(input[0]);
510 d = getc(input[1]);
511 ctold++;
512 ctnew++;
513 if(bflag && isspace(c) && isspace(d)) {
514 do {
515 if(c=='\n')
516 break;
517 ctold++;
518 } while(isspace(c=getc(input[0])));
519 do {
520 if(d=='\n')
521 break;
522 ctnew++;
523 } while(isspace(d=getc(input[1])));
524 } else if ( wflag ) {
525 while( isspace(c) && c!='\n' ) {
526 c=getc(input[0]);
527 ctold++;
528 }
529 while( isspace(d) && d!='\n' ) {
530 d=getc(input[1]);
531 ctnew++;
532 }
533 }
534 if(chrtran[c] != chrtran[d]) {
535 jackpot++;
536 J[i] = 0;
537 if(c!='\n')
538 ctold += skipline(0);
539 if(d!='\n')
540 ctnew += skipline(1);
541 break;
542 }
543 if(c=='\n')
544 break;
545 }
546 } else {
547 for(;;) {
548 ctold++;
549 ctnew++;
550 if((c=getc(input[0])) != (d=getc(input[1]))) {
551 /* jackpot++; */
552 J[i] = 0;
553 if(c!='\n')
554 ctold += skipline(0);
555 if(d!='\n')
556 ctnew += skipline(1);
557 break;
558 }
559 if(c=='\n')
560 break;
561 }
562 }
563 ixold[i] = ctold;
564 ixnew[j] = ctnew;
565 j++;
566 }
567 for(;j<=len[1];j++) {
568 ixnew[j] = ctnew += skipline(1);
569 }
570 fclose(input[0]);
571 fclose(input[1]);
572 /*
573 if(jackpot)
574 fprintf(stderr, "jackpot\n");
575 */
576 }
577
578 sort(a,n) /*shellsort CACM #201*/
579 struct line *a;
580 {
581 struct line w;
582 register int j,m;
583 struct line *ai;
584 register struct line *aim;
585 int k;
586
587 if (n == 0)
588 return;
589 for(j=1;j<=n;j*= 2)
590 m = 2*j - 1;
591 for(m/=2;m!=0;m/=2) {
592 k = n-m;
593 for(j=1;j<=k;j++) {
594 for(ai = &a[j]; ai > a; ai -= m) {
595 aim = &ai[m];
596 if(aim < ai)
597 break; /*wraparound*/
598 if(aim->value > ai[0].value ||
599 aim->value == ai[0].value &&
600 aim->serial > ai[0].serial)
601 break;
602 w.value = ai[0].value;
603 ai[0].value = aim->value;
604 aim->value = w.value;
605 w.serial = ai[0].serial;
606 ai[0].serial = aim->serial;
607 aim->serial = w.serial;
608 }
609 }
610 }
611 }
612
613 unsort(f, l, b)
614 struct line *f;
615 int *b;
616 {
617 register int *a;
618 register int i;
619 a = (int *)talloc((l+1)*sizeof(int));
620 for(i=1;i<=l;i++)
621 a[f[i].serial] = f[i].value;
622 for(i=1;i<=l;i++)
623 b[i] = a[i];
624 free((char *)a);
625 }
626
skipline(f)627 skipline(f)
628 {
629 register i, c;
630
631 for(i=1;(c=getc(input[f]))!='\n';i++)
632 if (c < 0)
633 return(i);
634 return(i);
635 }
636
output()637 output()
638 {
639 int m;
640 register int i0, i1, j1;
641 int j0;
642 input[0] = fopen(file1,"r");
643 input[1] = fopen(file2,"r");
644 m = len[0];
645 J[0] = 0;
646 J[m+1] = len[1]+1;
647 if(opt!=D_EDIT) for(i0=1;i0<=m;i0=i1+1) {
648 while(i0<=m&&J[i0]==J[i0-1]+1) i0++;
649 j0 = J[i0-1]+1;
650 i1 = i0-1;
651 while(i1<m&&J[i1+1]==0) i1++;
652 j1 = J[i1+1]-1;
653 J[i1] = j1;
654 change(i0,i1,j0,j1);
655 } else for(i0=m;i0>=1;i0=i1-1) {
656 while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--;
657 j0 = J[i0+1]-1;
658 i1 = i0+1;
659 while(i1>1&&J[i1-1]==0) i1--;
660 j1 = J[i1-1]+1;
661 J[i1] = j1;
662 change(i1,i0,j1,j0);
663 }
664 if(m==0)
665 change(1,0,1,len[1]);
666 if (opt==D_IFDEF) {
667 for (;;) {
668 #define c i0
669 c = getc(input[0]);
670 if (c < 0)
671 return;
672 putchar(c);
673 }
674 #undef c
675 }
676 if (anychange && opt == D_CONTEXT)
677 dump_context_vec();
678 }
679
680 /*
681 * The following struct is used to record change information when
682 * doing a "context" diff. (see routine "change" to understand the
683 * highly mneumonic field names)
684 */
685 struct context_vec {
686 int a; /* start line in old file */
687 int b; /* end line in old file */
688 int c; /* start line in new file */
689 int d; /* end line in new file */
690 };
691
692 struct context_vec *context_vec_start,
693 *context_vec_end,
694 *context_vec_ptr;
695
696 #define MAX_CONTEXT 128
697
698 /* indicate that there is a difference between lines a and b of the from file
699 to get to lines c to d of the to file.
700 If a is greater then b then there are no lines in the from file involved
701 and this means that there were lines appended (beginning at b).
702 If c is greater than d then there are lines missing from the to file.
703 */
change(a,b,c,d)704 change(a,b,c,d)
705 {
706 int lowa,upb,lowc,upd;
707 struct stat stbuf;
708
709 if (opt != D_IFDEF && a>b && c>d)
710 return;
711 if (anychange == 0) {
712 anychange = 1;
713 if(opt == D_CONTEXT) {
714 printf("*** %s ", file1);
715 stat(file1, &stbuf);
716 printf("%s--- %s ",
717 ctime(&stbuf.st_mtime), file2);
718 stat(file2, &stbuf);
719 printf("%s", ctime(&stbuf.st_mtime));
720
721 context_vec_start = (struct context_vec *)
722 malloc(MAX_CONTEXT *
723 sizeof(struct context_vec));
724 context_vec_end = context_vec_start + MAX_CONTEXT;
725 context_vec_ptr = context_vec_start - 1;
726 }
727 }
728 if(opt == D_CONTEXT) {
729 /*
730 * if this new change is within 'context' lines of
731 * the previous change, just add it to the change
732 * record. If the record is full or if this
733 * change is more than 'context' lines from the previous
734 * change, dump the record, reset it & add the new change.
735 */
736 if ( context_vec_ptr >= context_vec_end ||
737 ( context_vec_ptr >= context_vec_start &&
738 a > (context_vec_ptr->b + 2*context) &&
739 c > (context_vec_ptr->d + 2*context) ) )
740 dump_context_vec();
741
742 context_vec_ptr++;
743 context_vec_ptr->a = a;
744 context_vec_ptr->b = b;
745 context_vec_ptr->c = c;
746 context_vec_ptr->d = d;
747 return;
748 }
749 switch (opt) {
750
751 case D_NORMAL:
752 case D_EDIT:
753 range(a,b,",");
754 putchar(a>b?'a':c>d?'d':'c');
755 if(opt==D_NORMAL)
756 range(c,d,",");
757 putchar('\n');
758 break;
759 case D_REVERSE:
760 putchar(a>b?'a':c>d?'d':'c');
761 range(a,b," ");
762 putchar('\n');
763 break;
764 case D_NREVERSE:
765 if (a>b)
766 printf("a%d %d\n",b,d-c+1);
767 else {
768 printf("d%d %d\n",a,b-a+1);
769 if (!(c>d))
770 /* add changed lines */
771 printf("a%d %d\n",b, d-c+1);
772 }
773 break;
774 }
775 if(opt == D_NORMAL || opt == D_IFDEF) {
776 fetch(ixold,a,b,input[0],"< ", 1);
777 if(a<=b&&c<=d && opt == D_NORMAL)
778 prints("---\n");
779 }
780 fetch(ixnew,c,d,input[1],opt==D_NORMAL?"> ":"", 0);
781 if ((opt ==D_EDIT || opt == D_REVERSE) && c<=d)
782 prints(".\n");
783 if (inifdef) {
784 fprintf(stdout, "#endif /* %s */\n", endifname);
785 inifdef = 0;
786 }
787 }
788
range(a,b,separator)789 range(a,b,separator)
790 char *separator;
791 {
792 printf("%d", a>b?b:a);
793 if(a<b) {
794 printf("%s%d", separator, b);
795 }
796 }
797
fetch(f,a,b,lb,s,oldfile)798 fetch(f,a,b,lb,s,oldfile)
799 long *f;
800 FILE *lb;
801 char *s;
802 {
803 register int i, j;
804 register int c;
805 register int col;
806 register int nc;
807 int oneflag = (*ifdef1!='\0') != (*ifdef2!='\0');
808
809 /*
810 * When doing #ifdef's, copy down to current line
811 * if this is the first file, so that stuff makes it to output.
812 */
813 if (opt == D_IFDEF && oldfile){
814 long curpos = ftell(lb);
815 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
816 nc = f[a>b? b : a-1 ] - curpos;
817 for (i = 0; i < nc; i++)
818 putchar(getc(lb));
819 }
820 if (a > b)
821 return;
822 if (opt == D_IFDEF) {
823 if (inifdef)
824 fprintf(stdout, "#else /* %s%s */\n", oneflag && oldfile==1 ? "!" : "", ifdef2);
825 else {
826 if (oneflag) {
827 /* There was only one ifdef given */
828 endifname = ifdef2;
829 if (oldfile)
830 fprintf(stdout, "#ifndef %s\n", endifname);
831 else
832 fprintf(stdout, "#ifdef %s\n", endifname);
833 }
834 else {
835 endifname = oldfile ? ifdef1 : ifdef2;
836 fprintf(stdout, "#ifdef %s\n", endifname);
837 }
838 }
839 inifdef = 1+oldfile;
840 }
841
842 for(i=a;i<=b;i++) {
843 fseek(lb,f[i-1],0);
844 nc = f[i]-f[i-1];
845 if (opt != D_IFDEF)
846 prints(s);
847 col = 0;
848 for(j=0;j<nc;j++) {
849 c = getc(lb);
850 if (c == '\t' && tflag)
851 do
852 putchar(' ');
853 while (++col & 7);
854 else {
855 putchar(c);
856 col++;
857 }
858 }
859 }
860
861 if (inifdef && !wantelses) {
862 fprintf(stdout, "#endif /* %s */\n", endifname);
863 inifdef = 0;
864 }
865 }
866
867 #define POW2 /* define only if HALFLONG is 2**n */
868 #define HALFLONG 16
869 #define low(x) (x&((1L<<HALFLONG)-1))
870 #define high(x) (x>>HALFLONG)
871
872 /*
873 * hashing has the effect of
874 * arranging line in 7-bit bytes and then
875 * summing 1-s complement in 16-bit hunks
876 */
readhash(f)877 readhash(f)
878 register FILE *f;
879 {
880 register long sum;
881 register unsigned shift;
882 register t;
883 register space;
884
885 sum = 1;
886 space = 0;
887 if(!bflag && !wflag) {
888 if(iflag)
889 for(shift=0;(t=getc(f))!='\n';shift+=7) {
890 if(t==-1)
891 return(0);
892 sum += (long)chrtran[t] << (shift
893 #ifdef POW2
894 &= HALFLONG - 1);
895 #else
896 %= HALFLONG);
897 #endif
898 }
899 else
900 for(shift=0;(t=getc(f))!='\n';shift+=7) {
901 if(t==-1)
902 return(0);
903 sum += (long)t << (shift
904 #ifdef POW2
905 &= HALFLONG - 1);
906 #else
907 %= HALFLONG);
908 #endif
909 }
910 } else {
911 for(shift=0;;) {
912 switch(t=getc(f)) {
913 case -1:
914 return(0);
915 case '\t':
916 case ' ':
917 space++;
918 continue;
919 default:
920 if(space && !wflag) {
921 shift += 7;
922 space = 0;
923 }
924 sum += (long)chrtran[t] << (shift
925 #ifdef POW2
926 &= HALFLONG - 1);
927 #else
928 %= HALFLONG);
929 #endif
930 shift += 7;
931 continue;
932 case '\n':
933 break;
934 }
935 break;
936 }
937 }
938 sum = low(sum) + high(sum);
939 return((short)low(sum) + (short)high(sum));
940 }
941
942 #include <a.out.h>
943
asciifile(f)944 asciifile(f)
945 FILE *f;
946 {
947 char buf[BUFSIZ];
948 register int cnt;
949 register char *cp;
950
951 fseek(f, (long)0, 0);
952 cnt = fread(buf, 1, BUFSIZ, f);
953 if (cnt >= sizeof (struct exec)) {
954 struct exec hdr;
955 hdr = *(struct exec *)buf;
956 if (!N_BADMAG(hdr))
957 return (0);
958 }
959 cp = buf;
960 while (--cnt >= 0)
961 if (*cp++ & 0200)
962 return (0);
963 return (1);
964 }
965
966
967 /* dump accumulated "context" diff changes */
dump_context_vec()968 dump_context_vec()
969 {
970 register int a, b, c, d;
971 register char ch;
972 register struct context_vec *cvp = context_vec_start;
973 register int lowa, upb, lowc, upd;
974 register int do_output;
975
976 if ( cvp > context_vec_ptr )
977 return;
978
979 lowa = max(1, cvp->a - context);
980 upb = min(len[0], context_vec_ptr->b + context);
981 lowc = max(1, cvp->c - context);
982 upd = min(len[1], context_vec_ptr->d + context);
983
984 printf("***************\n*** ");
985 range(lowa,upb,",");
986 printf(" ****\n");
987
988 /*
989 * output changes to the "old" file. The first loop suppresses
990 * output if there were no changes to the "old" file (we'll see
991 * the "old" lines as context in the "new" list).
992 */
993 do_output = 0;
994 for ( ; cvp <= context_vec_ptr; cvp++)
995 if (cvp->a <= cvp->b) {
996 cvp = context_vec_start;
997 do_output++;
998 break;
999 }
1000
1001 if ( do_output ) {
1002 while (cvp <= context_vec_ptr) {
1003 a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
1004
1005 if (a <= b && c <= d)
1006 ch = 'c';
1007 else
1008 ch = (a <= b) ? 'd' : 'a';
1009
1010 if (ch == 'a')
1011 fetch(ixold,lowa,b,input[0]," ");
1012 else {
1013 fetch(ixold,lowa,a-1,input[0]," ");
1014 fetch(ixold,a,b,input[0],ch == 'c' ? "! " : "- ");
1015 }
1016 lowa = b + 1;
1017 cvp++;
1018 }
1019 fetch(ixold, b+1, upb, input[0], " ");
1020 }
1021
1022 /* output changes to the "new" file */
1023 printf("--- ");
1024 range(lowc,upd,",");
1025 printf(" ----\n");
1026
1027 do_output = 0;
1028 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1029 if (cvp->c <= cvp->d) {
1030 cvp = context_vec_start;
1031 do_output++;
1032 break;
1033 }
1034
1035 if (do_output) {
1036 while (cvp <= context_vec_ptr) {
1037 a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d;
1038
1039 if (a <= b && c <= d)
1040 ch = 'c';
1041 else
1042 ch = (a <= b) ? 'd' : 'a';
1043
1044 if (ch == 'd')
1045 fetch(ixnew,lowc,d,input[1]," ");
1046 else {
1047 fetch(ixnew,lowc,c-1,input[1]," ");
1048 fetch(ixnew,c,d,input[1],ch == 'c' ? "! " : "+ ");
1049 }
1050 lowc = d + 1;
1051 cvp++;
1052 }
1053 fetch(ixnew, d+1, upd, input[1], " ");
1054 }
1055
1056 context_vec_ptr = context_vec_start - 1;
1057 }
1058