1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (C) Caldera International Inc. 2001-2002. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code and documentation must retain the above 13 * copyright notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed or owned by Caldera 20 * International, Inc. 21 * 4. Neither the name of Caldera International, Inc. nor the names of other 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 26 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 27 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 28 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 30 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 31 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 34 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /*- 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 67 */ 68 69 #include <sys/cdefs.h> 70 __FBSDID("$FreeBSD$"); 71 72 #include <sys/capsicum.h> 73 #include <sys/procdesc.h> 74 #include <sys/stat.h> 75 #include <sys/types.h> 76 #include <sys/event.h> 77 #include <sys/wait.h> 78 79 #include <capsicum_helpers.h> 80 #include <ctype.h> 81 #include <err.h> 82 #include <errno.h> 83 #include <fcntl.h> 84 #include <paths.h> 85 #include <regex.h> 86 #include <stddef.h> 87 #include <stdint.h> 88 #include <stdio.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <unistd.h> 92 #include <limits.h> 93 #include <signal.h> 94 95 #include "diff.h" 96 #include "xmalloc.h" 97 98 #define _PATH_PR "/usr/bin/pr" 99 100 /* 101 * diff - compare two files. 102 */ 103 104 /* 105 * Uses an algorithm due to Harold Stone, which finds 106 * a pair of longest identical subsequences in the two 107 * files. 108 * 109 * The major goal is to generate the match vector J. 110 * J[i] is the index of the line in file1 corresponding 111 * to line i file0. J[i] = 0 if there is no 112 * such line in file1. 113 * 114 * Lines are hashed so as to work in core. All potential 115 * matches are located by sorting the lines of each file 116 * on the hash (called ``value''). In particular, this 117 * collects the equivalence classes in file1 together. 118 * Subroutine equiv replaces the value of each line in 119 * file0 by the index of the first element of its 120 * matching equivalence in (the reordered) file1. 121 * To save space equiv squeezes file1 into a single 122 * array member in which the equivalence classes 123 * are simply concatenated, except that their first 124 * members are flagged by changing sign. 125 * 126 * Next the indices that point into member are unsorted into 127 * array class according to the original order of file0. 128 * 129 * The cleverness lies in routine stone. This marches 130 * through the lines of file0, developing a vector klist 131 * of "k-candidates". At step i a k-candidate is a matched 132 * pair of lines x,y (x in file0 y in file1) such that 133 * there is a common subsequence of length k 134 * between the first i lines of file0 and the first y 135 * lines of file1, but there is no such subsequence for 136 * any smaller y. x is the earliest possible mate to y 137 * that occurs in such a subsequence. 138 * 139 * Whenever any of the members of the equivalence class of 140 * lines in file1 matable to a line in file0 has serial number 141 * less than the y of some k-candidate, that k-candidate 142 * with the smallest such y is replaced. The new 143 * k-candidate is chained (via pred) to the current 144 * k-1 candidate so that the actual subsequence can 145 * be recovered. When a member has serial number greater 146 * that the y of all k-candidates, the klist is extended. 147 * At the end, the longest subsequence is pulled out 148 * and placed in the array J by unravel 149 * 150 * With J in hand, the matches there recorded are 151 * check'ed against reality to assure that no spurious 152 * matches have crept in due to hashing. If they have, 153 * they are broken, and "jackpot" is recorded--a harmless 154 * matter except that a true match for a spuriously 155 * mated line may now be unnecessarily reported as a change. 156 * 157 * Much of the complexity of the program comes simply 158 * from trying to minimize core utilization and 159 * maximize the range of doable problems by dynamically 160 * allocating what is needed and reusing what is not. 161 * The core requirements for problems larger than somewhat 162 * are (in words) 2*length(file0) + length(file1) + 163 * 3*(number of k-candidates installed), typically about 164 * 6n words for files of length n. 165 */ 166 167 struct cand { 168 int x; 169 int y; 170 int pred; 171 }; 172 173 static struct line { 174 int serial; 175 int value; 176 } *file[2]; 177 178 /* 179 * The following struct is used to record change information when 180 * doing a "context" or "unified" diff. (see routine "change" to 181 * understand the highly mnemonic field names) 182 */ 183 struct context_vec { 184 int a; /* start line in old file */ 185 int b; /* end line in old file */ 186 int c; /* start line in new file */ 187 int d; /* end line in new file */ 188 }; 189 190 #define diff_output printf 191 static FILE *opentemp(const char *); 192 static void output(char *, FILE *, char *, FILE *, int); 193 static void check(FILE *, FILE *, int); 194 static void range(int, int, const char *); 195 static void uni_range(int, int); 196 static void dump_context_vec(FILE *, FILE *, int); 197 static void dump_unified_vec(FILE *, FILE *, int); 198 static void prepare(int, FILE *, size_t, int); 199 static void prune(void); 200 static void equiv(struct line *, int, struct line *, int, int *); 201 static void unravel(int); 202 static void unsort(struct line *, int, int *); 203 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 204 static void sort(struct line *, int); 205 static void print_header(const char *, const char *); 206 static int ignoreline(char *); 207 static int asciifile(FILE *); 208 static int fetch(long *, int, int, FILE *, int, int, int); 209 static int newcand(int, int, int); 210 static int search(int *, int, int); 211 static int skipline(FILE *); 212 static int isqrt(int); 213 static int stone(int *, int, int *, int *, int); 214 static int readhash(FILE *, int); 215 static int files_differ(FILE *, FILE *, int); 216 static char *match_function(const long *, int, FILE *); 217 static char *preadline(int, size_t, off_t); 218 219 static int *J; /* will be overlaid on class */ 220 static int *class; /* will be overlaid on file[0] */ 221 static int *klist; /* will be overlaid on file[0] after class */ 222 static int *member; /* will be overlaid on file[1] */ 223 static int clen; 224 static int inifdef; /* whether or not we are in a #ifdef block */ 225 static int len[2]; 226 static int pref, suff; /* length of prefix and suffix */ 227 static int slen[2]; 228 static int anychange; 229 static long *ixnew; /* will be overlaid on file[1] */ 230 static long *ixold; /* will be overlaid on klist */ 231 static struct cand *clist; /* merely a free storage pot for candidates */ 232 static int clistlen; /* the length of clist */ 233 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 234 static u_char *chrtran; /* translation table for case-folding */ 235 static struct context_vec *context_vec_start; 236 static struct context_vec *context_vec_end; 237 static struct context_vec *context_vec_ptr; 238 239 #define FUNCTION_CONTEXT_SIZE 55 240 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 241 static int lastline; 242 static int lastmatchline; 243 244 245 /* 246 * chrtran points to one of 2 translation tables: cup2low if folding upper to 247 * lower case clow2low if not folding case 248 */ 249 static u_char clow2low[256] = { 250 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 251 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 252 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 253 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 254 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 255 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 256 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 257 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 258 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 259 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 260 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 261 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 262 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 263 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 264 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 265 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 266 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 267 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 268 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 269 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 270 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 271 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 272 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 273 0xfd, 0xfe, 0xff 274 }; 275 276 static u_char cup2low[256] = { 277 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 278 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 279 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 280 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 281 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 282 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 283 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 284 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 285 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 286 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 287 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 288 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 289 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 290 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 291 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 292 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 293 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 294 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 295 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 296 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 297 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 298 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 299 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 300 0xfd, 0xfe, 0xff 301 }; 302 303 int 304 diffreg(char *file1, char *file2, int flags, int capsicum) 305 { 306 FILE *f1, *f2; 307 int i, rval; 308 int ostdout = -1; 309 int pr_pd, kq; 310 struct kevent *e; 311 cap_rights_t rights_ro; 312 313 e = NULL; 314 kq = -1; 315 f1 = f2 = NULL; 316 rval = D_SAME; 317 anychange = 0; 318 lastline = 0; 319 lastmatchline = 0; 320 context_vec_ptr = context_vec_start - 1; 321 if (flags & D_IGNORECASE) 322 chrtran = cup2low; 323 else 324 chrtran = clow2low; 325 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 326 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 327 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 328 goto closem; 329 330 if (flags & D_EMPTY1) 331 f1 = fopen(_PATH_DEVNULL, "r"); 332 else { 333 if (!S_ISREG(stb1.st_mode)) { 334 if ((f1 = opentemp(file1)) == NULL || 335 fstat(fileno(f1), &stb1) < 0) { 336 warn("%s", file1); 337 status |= 2; 338 goto closem; 339 } 340 } else if (strcmp(file1, "-") == 0) 341 f1 = stdin; 342 else 343 f1 = fopen(file1, "r"); 344 } 345 if (f1 == NULL) { 346 warn("%s", file1); 347 status |= 2; 348 goto closem; 349 } 350 351 if (flags & D_EMPTY2) 352 f2 = fopen(_PATH_DEVNULL, "r"); 353 else { 354 if (!S_ISREG(stb2.st_mode)) { 355 if ((f2 = opentemp(file2)) == NULL || 356 fstat(fileno(f2), &stb2) < 0) { 357 warn("%s", file2); 358 status |= 2; 359 goto closem; 360 } 361 } else if (strcmp(file2, "-") == 0) 362 f2 = stdin; 363 else 364 f2 = fopen(file2, "r"); 365 } 366 if (f2 == NULL) { 367 warn("%s", file2); 368 status |= 2; 369 goto closem; 370 } 371 372 if (lflag) { 373 /* redirect stdout to pr */ 374 int pfd[2]; 375 pid_t pid; 376 char *header; 377 378 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 379 signal(SIGPIPE, SIG_IGN); 380 fflush(stdout); 381 rewind(stdout); 382 pipe(pfd); 383 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 384 case -1: 385 status |= 2; 386 free(header); 387 err(2, "No more processes"); 388 case 0: 389 /* child */ 390 if (pfd[0] != STDIN_FILENO) { 391 dup2(pfd[0], STDIN_FILENO); 392 close(pfd[0]); 393 } 394 close(pfd[1]); 395 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 396 _exit(127); 397 default: 398 399 /* parent */ 400 if (pfd[1] != STDOUT_FILENO) { 401 ostdout = dup(STDOUT_FILENO); 402 dup2(pfd[1], STDOUT_FILENO); 403 close(pfd[1]); 404 } 405 close(pfd[0]); 406 rewind(stdout); 407 free(header); 408 kq = kqueue(); 409 if (kq == -1) 410 err(2, "kqueue"); 411 e = xmalloc(sizeof(struct kevent)); 412 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 413 NULL); 414 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 415 err(2, "kevent"); 416 } 417 } 418 419 if (capsicum) { 420 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 421 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 422 && errno != ENOSYS) 423 err(2, "unable to limit rights on: %s", file1); 424 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 425 errno != ENOSYS) 426 err(2, "unable to limit rights on: %s", file2); 427 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 428 /* stding has already been limited */ 429 if (caph_limit_stderr() == -1) 430 err(2, "unable to limit stderr"); 431 if (caph_limit_stdout() == -1) 432 err(2, "unable to limit stdout"); 433 } else if (caph_limit_stdio() == -1) 434 err(2, "unable to limit stdio"); 435 436 caph_cache_catpages(); 437 caph_cache_tzdata(); 438 if (cap_enter() < 0 && errno != ENOSYS) 439 err(2, "unable to enter capability mode"); 440 } 441 442 switch (files_differ(f1, f2, flags)) { 443 case 0: 444 goto closem; 445 case 1: 446 break; 447 default: 448 /* error */ 449 status |= 2; 450 goto closem; 451 } 452 453 if ((flags & D_FORCEASCII) == 0 && 454 (!asciifile(f1) || !asciifile(f2))) { 455 rval = D_BINARY; 456 status |= 1; 457 goto closem; 458 } 459 prepare(0, f1, stb1.st_size, flags); 460 prepare(1, f2, stb2.st_size, flags); 461 462 prune(); 463 sort(sfile[0], slen[0]); 464 sort(sfile[1], slen[1]); 465 466 member = (int *)file[1]; 467 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 468 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 469 470 class = (int *)file[0]; 471 unsort(sfile[0], slen[0], class); 472 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 473 474 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 475 clen = 0; 476 clistlen = 100; 477 clist = xcalloc(clistlen, sizeof(*clist)); 478 i = stone(class, slen[0], member, klist, flags); 479 free(member); 480 free(class); 481 482 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 483 unravel(klist[i]); 484 free(clist); 485 free(klist); 486 487 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 488 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 489 check(f1, f2, flags); 490 output(file1, f1, file2, f2, flags); 491 if (ostdout != -1 && e != NULL) { 492 /* close the pipe to pr and restore stdout */ 493 int wstatus; 494 495 fflush(stdout); 496 if (ostdout != STDOUT_FILENO) { 497 close(STDOUT_FILENO); 498 dup2(ostdout, STDOUT_FILENO); 499 close(ostdout); 500 } 501 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 502 err(2, "kevent"); 503 wstatus = e[0].data; 504 close(kq); 505 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 506 errx(2, "pr exited abnormally"); 507 else if (WIFSIGNALED(wstatus)) 508 errx(2, "pr killed by signal %d", 509 WTERMSIG(wstatus)); 510 } 511 512 closem: 513 if (anychange) { 514 status |= 1; 515 if (rval == D_SAME) 516 rval = D_DIFFER; 517 } 518 if (f1 != NULL) 519 fclose(f1); 520 if (f2 != NULL) 521 fclose(f2); 522 523 return (rval); 524 } 525 526 /* 527 * Check to see if the given files differ. 528 * Returns 0 if they are the same, 1 if different, and -1 on error. 529 * XXX - could use code from cmp(1) [faster] 530 */ 531 static int 532 files_differ(FILE *f1, FILE *f2, int flags) 533 { 534 char buf1[BUFSIZ], buf2[BUFSIZ]; 535 size_t i, j; 536 537 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 538 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 539 return (1); 540 for (;;) { 541 i = fread(buf1, 1, sizeof(buf1), f1); 542 j = fread(buf2, 1, sizeof(buf2), f2); 543 if ((!i && ferror(f1)) || (!j && ferror(f2))) 544 return (-1); 545 if (i != j) 546 return (1); 547 if (i == 0) 548 return (0); 549 if (memcmp(buf1, buf2, i) != 0) 550 return (1); 551 } 552 } 553 554 static FILE * 555 opentemp(const char *f) 556 { 557 char buf[BUFSIZ], tempfile[PATH_MAX]; 558 ssize_t nread; 559 int ifd, ofd; 560 561 if (strcmp(f, "-") == 0) 562 ifd = STDIN_FILENO; 563 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 564 return (NULL); 565 566 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 567 568 if ((ofd = mkstemp(tempfile)) < 0) { 569 close(ifd); 570 return (NULL); 571 } 572 unlink(tempfile); 573 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 574 if (write(ofd, buf, nread) != nread) { 575 close(ifd); 576 close(ofd); 577 return (NULL); 578 } 579 } 580 close(ifd); 581 lseek(ofd, (off_t)0, SEEK_SET); 582 return (fdopen(ofd, "r")); 583 } 584 585 char * 586 splice(char *dir, char *path) 587 { 588 char *tail, *buf; 589 size_t dirlen; 590 591 dirlen = strlen(dir); 592 while (dirlen != 0 && dir[dirlen - 1] == '/') 593 dirlen--; 594 if ((tail = strrchr(path, '/')) == NULL) 595 tail = path; 596 else 597 tail++; 598 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 599 return (buf); 600 } 601 602 static void 603 prepare(int i, FILE *fd, size_t filesize, int flags) 604 { 605 struct line *p; 606 int h; 607 size_t sz, j; 608 609 rewind(fd); 610 611 sz = MIN(filesize, SIZE_MAX) / 25; 612 if (sz < 100) 613 sz = 100; 614 615 p = xcalloc(sz + 3, sizeof(*p)); 616 for (j = 0; (h = readhash(fd, flags));) { 617 if (j == sz) { 618 sz = sz * 3 / 2; 619 p = xreallocarray(p, sz + 3, sizeof(*p)); 620 } 621 p[++j].value = h; 622 } 623 len[i] = j; 624 file[i] = p; 625 } 626 627 static void 628 prune(void) 629 { 630 int i, j; 631 632 for (pref = 0; pref < len[0] && pref < len[1] && 633 file[0][pref + 1].value == file[1][pref + 1].value; 634 pref++) 635 ; 636 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 637 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 638 suff++) 639 ; 640 for (j = 0; j < 2; j++) { 641 sfile[j] = file[j] + pref; 642 slen[j] = len[j] - pref - suff; 643 for (i = 0; i <= slen[j]; i++) 644 sfile[j][i].serial = i; 645 } 646 } 647 648 static void 649 equiv(struct line *a, int n, struct line *b, int m, int *c) 650 { 651 int i, j; 652 653 i = j = 1; 654 while (i <= n && j <= m) { 655 if (a[i].value < b[j].value) 656 a[i++].value = 0; 657 else if (a[i].value == b[j].value) 658 a[i++].value = j; 659 else 660 j++; 661 } 662 while (i <= n) 663 a[i++].value = 0; 664 b[m + 1].value = 0; 665 j = 0; 666 while (++j <= m) { 667 c[j] = -b[j].serial; 668 while (b[j + 1].value == b[j].value) { 669 j++; 670 c[j] = b[j].serial; 671 } 672 } 673 c[j] = -1; 674 } 675 676 /* Code taken from ping.c */ 677 static int 678 isqrt(int n) 679 { 680 int y, x = 1; 681 682 if (n == 0) 683 return (0); 684 685 do { /* newton was a stinker */ 686 y = x; 687 x = n / x; 688 x += y; 689 x /= 2; 690 } while ((x - y) > 1 || (x - y) < -1); 691 692 return (x); 693 } 694 695 static int 696 stone(int *a, int n, int *b, int *c, int flags) 697 { 698 int i, k, y, j, l; 699 int oldc, tc, oldl, sq; 700 u_int numtries, bound; 701 702 if (flags & D_MINIMAL) 703 bound = UINT_MAX; 704 else { 705 sq = isqrt(n); 706 bound = MAX(256, sq); 707 } 708 709 k = 0; 710 c[0] = newcand(0, 0, 0); 711 for (i = 1; i <= n; i++) { 712 j = a[i]; 713 if (j == 0) 714 continue; 715 y = -b[j]; 716 oldl = 0; 717 oldc = c[0]; 718 numtries = 0; 719 do { 720 if (y <= clist[oldc].y) 721 continue; 722 l = search(c, k, y); 723 if (l != oldl + 1) 724 oldc = c[l - 1]; 725 if (l <= k) { 726 if (clist[c[l]].y <= y) 727 continue; 728 tc = c[l]; 729 c[l] = newcand(i, y, oldc); 730 oldc = tc; 731 oldl = l; 732 numtries++; 733 } else { 734 c[l] = newcand(i, y, oldc); 735 k++; 736 break; 737 } 738 } while ((y = b[++j]) > 0 && numtries < bound); 739 } 740 return (k); 741 } 742 743 static int 744 newcand(int x, int y, int pred) 745 { 746 struct cand *q; 747 748 if (clen == clistlen) { 749 clistlen = clistlen * 11 / 10; 750 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 751 } 752 q = clist + clen; 753 q->x = x; 754 q->y = y; 755 q->pred = pred; 756 return (clen++); 757 } 758 759 static int 760 search(int *c, int k, int y) 761 { 762 int i, j, l, t; 763 764 if (clist[c[k]].y < y) /* quick look for typical case */ 765 return (k + 1); 766 i = 0; 767 j = k + 1; 768 for (;;) { 769 l = (i + j) / 2; 770 if (l <= i) 771 break; 772 t = clist[c[l]].y; 773 if (t > y) 774 j = l; 775 else if (t < y) 776 i = l; 777 else 778 return (l); 779 } 780 return (l + 1); 781 } 782 783 static void 784 unravel(int p) 785 { 786 struct cand *q; 787 int i; 788 789 for (i = 0; i <= len[0]; i++) 790 J[i] = i <= pref ? i : 791 i > len[0] - suff ? i + len[1] - len[0] : 0; 792 for (q = clist + p; q->y != 0; q = clist + q->pred) 793 J[q->x + pref] = q->y + pref; 794 } 795 796 /* 797 * Check does double duty: 798 * 1. ferret out any fortuitous correspondences due 799 * to confounding by hashing (which result in "jackpot") 800 * 2. collect random access indexes to the two files 801 */ 802 static void 803 check(FILE *f1, FILE *f2, int flags) 804 { 805 int i, j, jackpot, c, d; 806 long ctold, ctnew; 807 808 rewind(f1); 809 rewind(f2); 810 j = 1; 811 ixold[0] = ixnew[0] = 0; 812 jackpot = 0; 813 ctold = ctnew = 0; 814 for (i = 1; i <= len[0]; i++) { 815 if (J[i] == 0) { 816 ixold[i] = ctold += skipline(f1); 817 continue; 818 } 819 while (j < J[i]) { 820 ixnew[j] = ctnew += skipline(f2); 821 j++; 822 } 823 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 824 for (;;) { 825 c = getc(f1); 826 d = getc(f2); 827 /* 828 * GNU diff ignores a missing newline 829 * in one file for -b or -w. 830 */ 831 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 832 if (c == EOF && d == '\n') { 833 ctnew++; 834 break; 835 } else if (c == '\n' && d == EOF) { 836 ctold++; 837 break; 838 } 839 } 840 ctold++; 841 ctnew++; 842 if (flags & D_STRIPCR) { 843 if (c == '\r') { 844 if ((c = getc(f1)) == '\n') { 845 ctnew++; 846 break; 847 } 848 } 849 if (d == '\r') { 850 if ((d = getc(f2)) == '\n') { 851 ctold++; 852 break; 853 } 854 } 855 } 856 if ((flags & D_FOLDBLANKS) && isspace(c) && 857 isspace(d)) { 858 do { 859 if (c == '\n') 860 break; 861 ctold++; 862 } while (isspace(c = getc(f1))); 863 do { 864 if (d == '\n') 865 break; 866 ctnew++; 867 } while (isspace(d = getc(f2))); 868 } else if ((flags & D_IGNOREBLANKS)) { 869 while (isspace(c) && c != '\n') { 870 c = getc(f1); 871 ctold++; 872 } 873 while (isspace(d) && d != '\n') { 874 d = getc(f2); 875 ctnew++; 876 } 877 } 878 if (chrtran[c] != chrtran[d]) { 879 jackpot++; 880 J[i] = 0; 881 if (c != '\n' && c != EOF) 882 ctold += skipline(f1); 883 if (d != '\n' && c != EOF) 884 ctnew += skipline(f2); 885 break; 886 } 887 if (c == '\n' || c == EOF) 888 break; 889 } 890 } else { 891 for (;;) { 892 ctold++; 893 ctnew++; 894 if ((c = getc(f1)) != (d = getc(f2))) { 895 /* jackpot++; */ 896 J[i] = 0; 897 if (c != '\n' && c != EOF) 898 ctold += skipline(f1); 899 if (d != '\n' && c != EOF) 900 ctnew += skipline(f2); 901 break; 902 } 903 if (c == '\n' || c == EOF) 904 break; 905 } 906 } 907 ixold[i] = ctold; 908 ixnew[j] = ctnew; 909 j++; 910 } 911 for (; j <= len[1]; j++) { 912 ixnew[j] = ctnew += skipline(f2); 913 } 914 /* 915 * if (jackpot) 916 * fprintf(stderr, "jackpot\n"); 917 */ 918 } 919 920 /* shellsort CACM #201 */ 921 static void 922 sort(struct line *a, int n) 923 { 924 struct line *ai, *aim, w; 925 int j, m = 0, k; 926 927 if (n == 0) 928 return; 929 for (j = 1; j <= n; j *= 2) 930 m = 2 * j - 1; 931 for (m /= 2; m != 0; m /= 2) { 932 k = n - m; 933 for (j = 1; j <= k; j++) { 934 for (ai = &a[j]; ai > a; ai -= m) { 935 aim = &ai[m]; 936 if (aim < ai) 937 break; /* wraparound */ 938 if (aim->value > ai[0].value || 939 (aim->value == ai[0].value && 940 aim->serial > ai[0].serial)) 941 break; 942 w.value = ai[0].value; 943 ai[0].value = aim->value; 944 aim->value = w.value; 945 w.serial = ai[0].serial; 946 ai[0].serial = aim->serial; 947 aim->serial = w.serial; 948 } 949 } 950 } 951 } 952 953 static void 954 unsort(struct line *f, int l, int *b) 955 { 956 int *a, i; 957 958 a = xcalloc(l + 1, sizeof(*a)); 959 for (i = 1; i <= l; i++) 960 a[f[i].serial] = f[i].value; 961 for (i = 1; i <= l; i++) 962 b[i] = a[i]; 963 free(a); 964 } 965 966 static int 967 skipline(FILE *f) 968 { 969 int i, c; 970 971 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 972 continue; 973 return (i); 974 } 975 976 static void 977 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 978 { 979 int m, i0, i1, j0, j1; 980 981 rewind(f1); 982 rewind(f2); 983 m = len[0]; 984 J[0] = 0; 985 J[m + 1] = len[1] + 1; 986 if (diff_format != D_EDIT) { 987 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 988 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 989 i0++; 990 j0 = J[i0 - 1] + 1; 991 i1 = i0 - 1; 992 while (i1 < m && J[i1 + 1] == 0) 993 i1++; 994 j1 = J[i1 + 1] - 1; 995 J[i1] = j1; 996 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 997 } 998 } else { 999 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 1000 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 1001 i0--; 1002 j0 = J[i0 + 1] - 1; 1003 i1 = i0 + 1; 1004 while (i1 > 1 && J[i1 - 1] == 0) 1005 i1--; 1006 j1 = J[i1 - 1] + 1; 1007 J[i1] = j1; 1008 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1009 } 1010 } 1011 if (m == 0) 1012 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1013 if (diff_format == D_IFDEF || diff_format == D_GFORMAT) { 1014 for (;;) { 1015 #define c i0 1016 if ((c = getc(f1)) == EOF) 1017 return; 1018 diff_output("%c", c); 1019 } 1020 #undef c 1021 } 1022 if (anychange != 0) { 1023 if (diff_format == D_CONTEXT) 1024 dump_context_vec(f1, f2, flags); 1025 else if (diff_format == D_UNIFIED) 1026 dump_unified_vec(f1, f2, flags); 1027 } 1028 } 1029 1030 static void 1031 range(int a, int b, const char *separator) 1032 { 1033 diff_output("%d", a > b ? b : a); 1034 if (a < b) 1035 diff_output("%s%d", separator, b); 1036 } 1037 1038 static void 1039 uni_range(int a, int b) 1040 { 1041 if (a < b) 1042 diff_output("%d,%d", a, b - a + 1); 1043 else if (a == b) 1044 diff_output("%d", b); 1045 else 1046 diff_output("%d,0", b); 1047 } 1048 1049 static char * 1050 preadline(int fd, size_t rlen, off_t off) 1051 { 1052 char *line; 1053 ssize_t nr; 1054 1055 line = xmalloc(rlen + 1); 1056 if ((nr = pread(fd, line, rlen, off)) < 0) 1057 err(2, "preadline"); 1058 if (nr > 0 && line[nr-1] == '\n') 1059 nr--; 1060 line[nr] = '\0'; 1061 return (line); 1062 } 1063 1064 static int 1065 ignoreline(char *line) 1066 { 1067 int ret; 1068 1069 ret = regexec(&ignore_re, line, 0, NULL, 0); 1070 free(line); 1071 return (ret == 0); /* if it matched, it should be ignored. */ 1072 } 1073 1074 /* 1075 * Indicate that there is a difference between lines a and b of the from file 1076 * to get to lines c to d of the to file. If a is greater then b then there 1077 * are no lines in the from file involved and this means that there were 1078 * lines appended (beginning at b). If c is greater than d then there are 1079 * lines missing from the to file. 1080 */ 1081 static void 1082 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1083 int *pflags) 1084 { 1085 static size_t max_context = 64; 1086 long curpos; 1087 int i, nc, f; 1088 const char *walk; 1089 1090 restart: 1091 if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) && 1092 a > b && c > d) 1093 return; 1094 if (ignore_pats != NULL) { 1095 char *line; 1096 /* 1097 * All lines in the change, insert, or delete must 1098 * match an ignore pattern for the change to be 1099 * ignored. 1100 */ 1101 if (a <= b) { /* Changes and deletes. */ 1102 for (i = a; i <= b; i++) { 1103 line = preadline(fileno(f1), 1104 ixold[i] - ixold[i - 1], ixold[i - 1]); 1105 if (!ignoreline(line)) 1106 goto proceed; 1107 } 1108 } 1109 if (a > b || c <= d) { /* Changes and inserts. */ 1110 for (i = c; i <= d; i++) { 1111 line = preadline(fileno(f2), 1112 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1113 if (!ignoreline(line)) 1114 goto proceed; 1115 } 1116 } 1117 return; 1118 } 1119 proceed: 1120 if (*pflags & D_HEADER && diff_format != D_BRIEF) { 1121 diff_output("%s %s %s\n", diffargs, file1, file2); 1122 *pflags &= ~D_HEADER; 1123 } 1124 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1125 /* 1126 * Allocate change records as needed. 1127 */ 1128 if (context_vec_ptr == context_vec_end - 1) { 1129 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1130 max_context <<= 1; 1131 context_vec_start = xreallocarray(context_vec_start, 1132 max_context, sizeof(*context_vec_start)); 1133 context_vec_end = context_vec_start + max_context; 1134 context_vec_ptr = context_vec_start + offset; 1135 } 1136 if (anychange == 0) { 1137 /* 1138 * Print the context/unidiff header first time through. 1139 */ 1140 print_header(file1, file2); 1141 anychange = 1; 1142 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1143 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1144 /* 1145 * If this change is more than 'diff_context' lines from the 1146 * previous change, dump the record and reset it. 1147 */ 1148 if (diff_format == D_CONTEXT) 1149 dump_context_vec(f1, f2, *pflags); 1150 else 1151 dump_unified_vec(f1, f2, *pflags); 1152 } 1153 context_vec_ptr++; 1154 context_vec_ptr->a = a; 1155 context_vec_ptr->b = b; 1156 context_vec_ptr->c = c; 1157 context_vec_ptr->d = d; 1158 return; 1159 } 1160 if (anychange == 0) 1161 anychange = 1; 1162 switch (diff_format) { 1163 case D_BRIEF: 1164 return; 1165 case D_NORMAL: 1166 case D_EDIT: 1167 range(a, b, ","); 1168 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1169 if (diff_format == D_NORMAL) 1170 range(c, d, ","); 1171 diff_output("\n"); 1172 break; 1173 case D_REVERSE: 1174 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1175 range(a, b, " "); 1176 diff_output("\n"); 1177 break; 1178 case D_NREVERSE: 1179 if (a > b) 1180 diff_output("a%d %d\n", b, d - c + 1); 1181 else { 1182 diff_output("d%d %d\n", a, b - a + 1); 1183 if (!(c > d)) 1184 /* add changed lines */ 1185 diff_output("a%d %d\n", b, d - c + 1); 1186 } 1187 break; 1188 } 1189 if (diff_format == D_GFORMAT) { 1190 curpos = ftell(f1); 1191 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1192 nc = ixold[a > b ? b : a - 1] - curpos; 1193 for (i = 0; i < nc; i++) 1194 diff_output("%c", getc(f1)); 1195 for (walk = group_format; *walk != '\0'; walk++) { 1196 if (*walk == '%') { 1197 walk++; 1198 switch (*walk) { 1199 case '<': 1200 fetch(ixold, a, b, f1, '<', 1, *pflags); 1201 break; 1202 case '>': 1203 fetch(ixnew, c, d, f2, '>', 0, *pflags); 1204 break; 1205 default: 1206 diff_output("%%%c", *walk); 1207 break; 1208 } 1209 continue; 1210 } 1211 diff_output("%c", *walk); 1212 } 1213 } 1214 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1215 fetch(ixold, a, b, f1, '<', 1, *pflags); 1216 if (a <= b && c <= d && diff_format == D_NORMAL) 1217 diff_output("---\n"); 1218 } 1219 f = 0; 1220 if (diff_format != D_GFORMAT) 1221 f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1222 if (f != 0 && diff_format == D_EDIT) { 1223 /* 1224 * A non-zero return value for D_EDIT indicates that the 1225 * last line printed was a bare dot (".") that has been 1226 * escaped as ".." to prevent ed(1) from misinterpreting 1227 * it. We have to add a substitute command to change this 1228 * back and restart where we left off. 1229 */ 1230 diff_output(".\n"); 1231 diff_output("%ds/.//\n", a + f - 1); 1232 b = a + f - 1; 1233 a = b + 1; 1234 c += f; 1235 goto restart; 1236 } 1237 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1238 diff_output(".\n"); 1239 if (inifdef) { 1240 diff_output("#endif /* %s */\n", ifdefname); 1241 inifdef = 0; 1242 } 1243 } 1244 1245 static int 1246 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1247 { 1248 int i, j, c, lastc, col, nc; 1249 int newcol; 1250 1251 /* 1252 * When doing #ifdef's, copy down to current line 1253 * if this is the first file, so that stuff makes it to output. 1254 */ 1255 if ((diff_format == D_IFDEF) && oldfile) { 1256 long curpos = ftell(lb); 1257 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1258 nc = f[a > b ? b : a - 1] - curpos; 1259 for (i = 0; i < nc; i++) 1260 diff_output("%c", getc(lb)); 1261 } 1262 if (a > b) 1263 return (0); 1264 if (diff_format == D_IFDEF) { 1265 if (inifdef) { 1266 diff_output("#else /* %s%s */\n", 1267 oldfile == 1 ? "!" : "", ifdefname); 1268 } else { 1269 if (oldfile) 1270 diff_output("#ifndef %s\n", ifdefname); 1271 else 1272 diff_output("#ifdef %s\n", ifdefname); 1273 } 1274 inifdef = 1 + oldfile; 1275 } 1276 for (i = a; i <= b; i++) { 1277 fseek(lb, f[i - 1], SEEK_SET); 1278 nc = f[i] - f[i - 1]; 1279 if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && 1280 ch != '\0') { 1281 diff_output("%c", ch); 1282 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1283 || diff_format == D_UNIFIED)) 1284 diff_output("\t"); 1285 else if (diff_format != D_UNIFIED) 1286 diff_output(" "); 1287 } 1288 col = 0; 1289 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1290 if ((c = getc(lb)) == EOF) { 1291 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1292 diff_format == D_NREVERSE) 1293 warnx("No newline at end of file"); 1294 else 1295 diff_output("\n\\ No newline at end of " 1296 "file\n"); 1297 return (0); 1298 } 1299 if (c == '\t' && (flags & D_EXPANDTABS)) { 1300 newcol = ((col/tabsize)+1)*tabsize; 1301 do { 1302 diff_output(" "); 1303 } while (++col < newcol); 1304 } else { 1305 if (diff_format == D_EDIT && j == 1 && c == '\n' 1306 && lastc == '.') { 1307 /* 1308 * Don't print a bare "." line 1309 * since that will confuse ed(1). 1310 * Print ".." instead and return, 1311 * giving the caller an offset 1312 * from which to restart. 1313 */ 1314 diff_output(".\n"); 1315 return (i - a + 1); 1316 } 1317 diff_output("%c", c); 1318 col++; 1319 } 1320 } 1321 } 1322 return (0); 1323 } 1324 1325 /* 1326 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1327 */ 1328 static int 1329 readhash(FILE *f, int flags) 1330 { 1331 int i, t, space; 1332 int sum; 1333 1334 sum = 1; 1335 space = 0; 1336 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1337 if (flags & D_IGNORECASE) 1338 for (i = 0; (t = getc(f)) != '\n'; i++) { 1339 if (flags & D_STRIPCR && t == '\r') { 1340 t = getc(f); 1341 if (t == '\n') 1342 break; 1343 ungetc(t, f); 1344 } 1345 if (t == EOF) { 1346 if (i == 0) 1347 return (0); 1348 break; 1349 } 1350 sum = sum * 127 + chrtran[t]; 1351 } 1352 else 1353 for (i = 0; (t = getc(f)) != '\n'; i++) { 1354 if (flags & D_STRIPCR && t == '\r') { 1355 t = getc(f); 1356 if (t == '\n') 1357 break; 1358 ungetc(t, f); 1359 } 1360 if (t == EOF) { 1361 if (i == 0) 1362 return (0); 1363 break; 1364 } 1365 sum = sum * 127 + t; 1366 } 1367 } else { 1368 for (i = 0;;) { 1369 switch (t = getc(f)) { 1370 case '\r': 1371 case '\t': 1372 case '\v': 1373 case '\f': 1374 case ' ': 1375 space++; 1376 continue; 1377 default: 1378 if (space && (flags & D_IGNOREBLANKS) == 0) { 1379 i++; 1380 space = 0; 1381 } 1382 sum = sum * 127 + chrtran[t]; 1383 i++; 1384 continue; 1385 case EOF: 1386 if (i == 0) 1387 return (0); 1388 /* FALLTHROUGH */ 1389 case '\n': 1390 break; 1391 } 1392 break; 1393 } 1394 } 1395 /* 1396 * There is a remote possibility that we end up with a zero sum. 1397 * Zero is used as an EOF marker, so return 1 instead. 1398 */ 1399 return (sum == 0 ? 1 : sum); 1400 } 1401 1402 static int 1403 asciifile(FILE *f) 1404 { 1405 unsigned char buf[BUFSIZ]; 1406 size_t cnt; 1407 1408 if (f == NULL) 1409 return (1); 1410 1411 rewind(f); 1412 cnt = fread(buf, 1, sizeof(buf), f); 1413 return (memchr(buf, '\0', cnt) == NULL); 1414 } 1415 1416 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1417 1418 static char * 1419 match_function(const long *f, int pos, FILE *fp) 1420 { 1421 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1422 size_t nc; 1423 int last = lastline; 1424 const char *state = NULL; 1425 1426 lastline = pos; 1427 while (pos > last) { 1428 fseek(fp, f[pos - 1], SEEK_SET); 1429 nc = f[pos] - f[pos - 1]; 1430 if (nc >= sizeof(buf)) 1431 nc = sizeof(buf) - 1; 1432 nc = fread(buf, 1, nc, fp); 1433 if (nc > 0) { 1434 buf[nc] = '\0'; 1435 buf[strcspn(buf, "\n")] = '\0'; 1436 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1437 if (begins_with(buf, "private:")) { 1438 if (!state) 1439 state = " (private)"; 1440 } else if (begins_with(buf, "protected:")) { 1441 if (!state) 1442 state = " (protected)"; 1443 } else if (begins_with(buf, "public:")) { 1444 if (!state) 1445 state = " (public)"; 1446 } else { 1447 strlcpy(lastbuf, buf, sizeof lastbuf); 1448 if (state) 1449 strlcat(lastbuf, state, 1450 sizeof lastbuf); 1451 lastmatchline = pos; 1452 return lastbuf; 1453 } 1454 } 1455 } 1456 pos--; 1457 } 1458 return lastmatchline > 0 ? lastbuf : NULL; 1459 } 1460 1461 /* dump accumulated "context" diff changes */ 1462 static void 1463 dump_context_vec(FILE *f1, FILE *f2, int flags) 1464 { 1465 struct context_vec *cvp = context_vec_start; 1466 int lowa, upb, lowc, upd, do_output; 1467 int a, b, c, d; 1468 char ch, *f; 1469 1470 if (context_vec_start > context_vec_ptr) 1471 return; 1472 1473 b = d = 0; /* gcc */ 1474 lowa = MAX(1, cvp->a - diff_context); 1475 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1476 lowc = MAX(1, cvp->c - diff_context); 1477 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1478 1479 diff_output("***************"); 1480 if ((flags & D_PROTOTYPE)) { 1481 f = match_function(ixold, lowa-1, f1); 1482 if (f != NULL) 1483 diff_output(" %s", f); 1484 } 1485 diff_output("\n*** "); 1486 range(lowa, upb, ","); 1487 diff_output(" ****\n"); 1488 1489 /* 1490 * Output changes to the "old" file. The first loop suppresses 1491 * output if there were no changes to the "old" file (we'll see 1492 * the "old" lines as context in the "new" list). 1493 */ 1494 do_output = 0; 1495 for (; cvp <= context_vec_ptr; cvp++) 1496 if (cvp->a <= cvp->b) { 1497 cvp = context_vec_start; 1498 do_output++; 1499 break; 1500 } 1501 if (do_output) { 1502 while (cvp <= context_vec_ptr) { 1503 a = cvp->a; 1504 b = cvp->b; 1505 c = cvp->c; 1506 d = cvp->d; 1507 1508 if (a <= b && c <= d) 1509 ch = 'c'; 1510 else 1511 ch = (a <= b) ? 'd' : 'a'; 1512 1513 if (ch == 'a') 1514 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1515 else { 1516 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1517 fetch(ixold, a, b, f1, 1518 ch == 'c' ? '!' : '-', 0, flags); 1519 } 1520 lowa = b + 1; 1521 cvp++; 1522 } 1523 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1524 } 1525 /* output changes to the "new" file */ 1526 diff_output("--- "); 1527 range(lowc, upd, ","); 1528 diff_output(" ----\n"); 1529 1530 do_output = 0; 1531 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1532 if (cvp->c <= cvp->d) { 1533 cvp = context_vec_start; 1534 do_output++; 1535 break; 1536 } 1537 if (do_output) { 1538 while (cvp <= context_vec_ptr) { 1539 a = cvp->a; 1540 b = cvp->b; 1541 c = cvp->c; 1542 d = cvp->d; 1543 1544 if (a <= b && c <= d) 1545 ch = 'c'; 1546 else 1547 ch = (a <= b) ? 'd' : 'a'; 1548 1549 if (ch == 'd') 1550 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1551 else { 1552 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1553 fetch(ixnew, c, d, f2, 1554 ch == 'c' ? '!' : '+', 0, flags); 1555 } 1556 lowc = d + 1; 1557 cvp++; 1558 } 1559 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1560 } 1561 context_vec_ptr = context_vec_start - 1; 1562 } 1563 1564 /* dump accumulated "unified" diff changes */ 1565 static void 1566 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1567 { 1568 struct context_vec *cvp = context_vec_start; 1569 int lowa, upb, lowc, upd; 1570 int a, b, c, d; 1571 char ch, *f; 1572 1573 if (context_vec_start > context_vec_ptr) 1574 return; 1575 1576 b = d = 0; /* gcc */ 1577 lowa = MAX(1, cvp->a - diff_context); 1578 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1579 lowc = MAX(1, cvp->c - diff_context); 1580 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1581 1582 diff_output("@@ -"); 1583 uni_range(lowa, upb); 1584 diff_output(" +"); 1585 uni_range(lowc, upd); 1586 diff_output(" @@"); 1587 if ((flags & D_PROTOTYPE)) { 1588 f = match_function(ixold, lowa-1, f1); 1589 if (f != NULL) 1590 diff_output(" %s", f); 1591 } 1592 diff_output("\n"); 1593 1594 /* 1595 * Output changes in "unified" diff format--the old and new lines 1596 * are printed together. 1597 */ 1598 for (; cvp <= context_vec_ptr; cvp++) { 1599 a = cvp->a; 1600 b = cvp->b; 1601 c = cvp->c; 1602 d = cvp->d; 1603 1604 /* 1605 * c: both new and old changes 1606 * d: only changes in the old file 1607 * a: only changes in the new file 1608 */ 1609 if (a <= b && c <= d) 1610 ch = 'c'; 1611 else 1612 ch = (a <= b) ? 'd' : 'a'; 1613 1614 switch (ch) { 1615 case 'c': 1616 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1617 fetch(ixold, a, b, f1, '-', 0, flags); 1618 fetch(ixnew, c, d, f2, '+', 0, flags); 1619 break; 1620 case 'd': 1621 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1622 fetch(ixold, a, b, f1, '-', 0, flags); 1623 break; 1624 case 'a': 1625 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1626 fetch(ixnew, c, d, f2, '+', 0, flags); 1627 break; 1628 } 1629 lowa = b + 1; 1630 lowc = d + 1; 1631 } 1632 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1633 1634 context_vec_ptr = context_vec_start - 1; 1635 } 1636 1637 static void 1638 print_header(const char *file1, const char *file2) 1639 { 1640 const char *time_format; 1641 char buf1[256]; 1642 char buf2[256]; 1643 char end1[10]; 1644 char end2[10]; 1645 struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; 1646 int nsec1 = stb1.st_mtim.tv_nsec; 1647 int nsec2 = stb2.st_mtim.tv_nsec; 1648 1649 time_format = "%Y-%m-%d %H:%M:%S"; 1650 1651 if (cflag) 1652 time_format = "%c"; 1653 tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); 1654 tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); 1655 strftime(buf1, 256, time_format, tm_ptr1); 1656 strftime(buf2, 256, time_format, tm_ptr2); 1657 if (!cflag) { 1658 strftime(end1, 10, "%z", tm_ptr1); 1659 strftime(end2, 10, "%z", tm_ptr2); 1660 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1661 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1662 } 1663 if (label[0] != NULL) 1664 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1665 label[0]); 1666 else 1667 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1668 file1, buf1); 1669 if (label[1] != NULL) 1670 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1671 label[1]); 1672 else 1673 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1674 file2, buf2); 1675 } 1676