1 /******************************************************************************
2  * Copyright 1994-2019,2021 by Thomas E. Dickey                               *
3  * All Rights Reserved.                                                       *
4  *                                                                            *
5  * Permission to use, copy, modify, and distribute this software and its      *
6  * documentation for any purpose and without fee is hereby granted, provided  *
7  * that the above copyright notice appear in all copies and that both that    *
8  * copyright notice and this permission notice appear in supporting           *
9  * documentation, and that the name of the above listed copyright holder(s)   *
10  * not be used in advertising or publicity pertaining to distribution of the  *
11  * software without specific, written prior permission.                       *
12  *                                                                            *
13  * THE ABOVE LISTED COPYRIGHT HOLDER(S) DISCLAIM ALL WARRANTIES WITH REGARD   *
14  * TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND  *
15  * FITNESS, IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE  *
16  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES          *
17  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN      *
18  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR *
19  * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.                *
20  ******************************************************************************/
21 
22 #ifndef	NO_IDENT
23 static const char *Id = "$Id: diffstat.c,v 1.64 2021/01/13 00:28:32 tom Exp $";
24 #endif
25 
26 /*
27  * Title:	diffstat.c
28  * Author:	T.E.Dickey
29  * Created:	02 Feb 1992
30  * Modified:
31  *		12 Jan 2021, check for git --binary diffs.
32  *		29 Nov 2019, eliminate fixed buffer when decoding range.
33  *		28 Nov 2019, use locale in computing filename column-width.
34  *			     improve parsing for git diffs.
35  *			     use terminal-width as default for -w to tty.
36  *			     minor fix in do_merging (Miloslaw Smyk).
37  *		27 Nov 2019, improve relative-pathname matching in count_lines()
38  *			     add a parsing-case for svn diff.
39  *			     quote filenames in -t/-T output.
40  *		24 Nov 2019, fix cppcheck-warnings about sscanf.
41  *		14 Aug 2018, revise -S/-D option to improve count of unmodified
42  *			     files.
43  *		14 Jan 2016, extend -S option to count unmodified files.
44  *			     add -T option to show values with histogram
45  *		06 Jul 2015, handle double-quotes, e.g., from diffutils 3.3
46  *			     when filenames have embedded spaces.
47  *		05 Jun 2014, add -E option to filter colordiff output.
48  *		28 Oct 2013, portability improvements for MinGW.
49  *		15 Apr 2013, modify to accommodate output of "diff -q", which
50  *			     tells only if the files are different.  Work
51  *			     around the equivalent ambiguous message introduced
52  *			     in diffutils 2.8.4 and finally removed for 3.0
53  *		11 Feb 2013, add -K option.  Use strtol() to provide error
54  *			     checking of optarg values.
55  *		10 Feb 2013, document -b, -C, -s option in usage (patch by
56  *			     Tim Waugh, Red Hat #852770).  Improve pathname
57  *			     merging.
58  *		02 Jun 2012, fix for svn diff with spaces in path (patch by
59  *			     Stuart Prescott, Debian #675465).
60  *		03 Jan 2012, Correct case for "xz" suffix in is_compressed()
61  *			     (patch from Frederic Culot in FreeBSD ports).  Add
62  *			     "-R" option.  Improve dequoting of filenames in
63  *			     headers.
64  *		10 Oct 2010, correct display of new files when -S/-D options
65  *			     are used.  Remove the temporary directory on
66  *			     error, introduced in 1.48+ (patch by Solar
67  *			     Designer).
68  *		19 Jul 2010, add missing "break" statement which left "-c"
69  *			     option falling-through into "-C".
70  *		16 Jul 2010, configure "xz" path explicitly, in case lzcat
71  *			     does not support xz format.  Add "-s" (summary)
72  *			     and "-C" (color) options.
73  *		15 Jul 2010, fix strict gcc warnings, e.g., using const.
74  *		10 Jan 2010, improve a case where filenames have embedded blanks
75  *			     (patch by Reinier Post).
76  *		07 Nov 2009, correct suffix-check for ".xz" files as
77  *			     command-line parameters rather than as piped
78  *			     input (report by Moritz Barsnick).
79  *		06 Oct 2009, fixes to build/run with MSYS or MinGW.  use
80  *			     $TMPDIR for path of temporary file used in
81  *			     decompression.  correct else-condition for
82  *			     detecting compression type (patch by Zach Hirsch).
83  *		31 Aug 2009, improve lzma support, add support for xz (patch by
84  *			     Eric Blake).  Add special case for no-newline
85  *			     message from some diff's (Ubuntu #269895).
86  *			     Improve configure check for getopt().
87  *		11 Aug 2009, Add logic to check standard input, decompress if
88  *			     possible.  Add -N option, to truncate long names.
89  *			     Add pack/pcat as a compression type.
90  *			     Add lzma/lzcat as a compression type.
91  *			     Allow overriding program paths with environment.
92  *		10 Aug 2009, modify to work with Perforce-style diffs (patch
93  *			     by Ed Schouten).
94  *		29 Mar 2009, modify to work with patch ".rej" files, which have
95  *			     no filename header (use the name of the ".rej"
96  *			     file if it is available).
97  *		29 Sep 2008, fix typo in usage message.
98  *		06 Aug 2008, add "-m", "-S" and "-D" options.
99  *		05 Aug 2008, add "-q" option to suppress 0-files-changed
100  *			     message (patch by Greg Norris).
101  *		04 Sep 2007, add "-b" option to suppress binary-files (patch
102  *			     by Greg Norris).
103  *		26 Aug 2007, add "-d" option to show debugging traces, rather
104  *			     than by defining DEBUG.  Add check after
105  *			     unified-diff chunk to avoid adding non-diff text
106  *			     (report by Adrian Bunk).  Quote pathname passed
107  *			     in command to gzip/uncompress.  Add a check for
108  *			     default-diff output without the "diff" command
109  *			     supplied to provide filename, mark as "unknown".
110  *		16 Jul 2006, fix to avoid modifying which is being used by
111  *			     tsearch() for ordering the binary tree (report by
112  *			     Adrian Bunk).
113  *		02 Jul 2006, do not ignore pathnames in /tmp/, since some tools
114  *			     create usable pathnames for both old/new files
115  *			     there (Debian #376086).  Correct ifdef for
116  *			     fgetc_unlocked().  Add configure check for
117  *			     compress, gzip and bzip2 programs that may be used
118  *			     to decompress files.
119  *		24 Aug 2005, update usage message for -l, -r changes.
120  *		15 Aug 2005, apply PLURAL() to num_files (Jean Delvare).
121  *			     add -l option (request by Michael Burian).
122  *			     Use fgetc_locked() if available.
123  *		14 Aug 2005, add -r2 option (rounding with adjustment to ensure
124  *			     that nonzero values always display a histogram
125  *			     bar), adapted from patch by Jean Delvare.  Extend
126  *			     the -f option (2=filled, 4=verbose).
127  *		12 Aug 2005, modify to use tsearch() for sorted lists.
128  *		11 Aug 2005, minor fixes to scaling of modified lines.  Add
129  *			     -r (round) option.
130  *		05 Aug 2005, add -t (table) option.
131  *		10 Apr 2005, change order of merging and prefix-stripping so
132  *			     stripping all prefixes, e.g., with -p9, will be
133  *			     sorted as expected (Patch by Jean Delvare
134  *			     <khali@linux-fr.org>).
135  *		10 Jan 2005, add support for '--help' and '--version' (Patch
136  *			     by Eric Blake <ebb9@byu.net>.)
137  *		16 Dec 2004, fix a different case for data beginning with "--"
138  *			     which was treated as a header line.
139  *		14 Dec 2004, Fix allocation problems.  Open files in binary
140  *			     mode for reading.  Getopt returns -1, not
141  *			     necessarily EOF.  Add const where useful.  Use
142  *			     NO_IDENT where necessary.  malloc() comes from
143  *			     <stdlib.h> in standard systems (Patch by Eric
144  *			     Blake <ebb9@byu.net>.)
145  *		08 Nov 2004, minor fix for resync of unified diffs checks for
146  *			     range (line beginning with '@' without header
147  *			     lines (successive lines beginning with "---" and
148  *			     "+++").  Fix a few problems reported by valgrind.
149  *		09 Nov 2003, modify check for lines beginning with '-' or '+'
150  *			     to treat only "---" in old-style diffs as a
151  *			     special case.
152  *		14 Feb 2003, modify check for filenames to allow for some cases
153  *			     of incomplete dates (the reported example omitted
154  *			     the day of the month).  Correct a typo in usage().
155  *			     Add -e, -h, -o options.
156  *		04 Jan 2003, improve tracking of chunks in unified diff, in
157  *			     case the original files contained a '+' or '-' in
158  *			     the first column (Debian #155000).  Add -v option
159  *			     (Debian #170947).  Modify to allocate buffers big
160  *			     enough for long input lines.  Do additional
161  *			     merging to handle unusual Index/diff constructs in
162  *			     recent makepatch script.
163  *		20 Aug 2002, add -u option to tell diffstat to preserve the
164  *			     order of filenames as given rather than sort them
165  *			     (request by H Peter Anvin <hpa@zytor.com>).  Add
166  *			     -k option for completeness.
167  *		09 Aug 2002, allow either '/' or '-' as delimiters in dates,
168  *			     to accommodate diffutils 2.8 (report by Rik van
169  *			     Riel <riel@conectiva.com.br>).
170  *		10 Oct 2001, add bzip2 (.bz2) suffix as suggested by
171  *			     Gregory T Norris <haphazard@socket.net> in Debian
172  *			     bug report #82969).
173  *			     add check for diff from RCS archive where the
174  *			     "diff" lines do not reference a filename.
175  *		29 Mar 2000, add -c option.  Check for compressed input, read
176  *			     via pipe.  Change to ANSI C.  Adapted change from
177  *			     Troy Engel to add option that displays a number
178  *			     only, rather than a histogram.
179  *		17 May 1998, handle Debian diff files, which do not contain
180  *			     dates on the header lines.
181  *		16 Jan 1998, accommodate patches w/o tabs in header lines (e.g.,
182  *			     from cut/paste).  Strip suffixes such as ".orig".
183  *		24 Mar 1996, corrected -p0 logic, more fixes in do_merging.
184  *		16 Mar 1996, corrected state-change for "Binary".  Added -p
185  *			     option.
186  *		17 Dec 1995, corrected matching algorithm in 'do_merging()'
187  *		11 Dec 1995, mods to accommodate diffs against /dev/null or
188  *			     /tmp/XXX (tempfiles).
189  *		06 May 1995, limit scaling -- only shrink-to-fit.
190  *		29 Apr 1995, recognize 'rcsdiff -u' format.
191  *		26 Dec 1994, strip common pathname-prefix.
192  *		13 Nov 1994, added '-n' option.  Corrected logic of 'match'.
193  *		17 Jun 1994, ifdef-<string.h>
194  *		12 Jun 1994, recognize unified diff, and output of makepatch.
195  *		04 Oct 1993, merge multiple diff-files, busy message when the
196  *			     output is piped to a file.
197  *
198  * Function:	this program reads the output of 'diff' and displays a histogram
199  *		of the insertions/deletions/modifications per-file.
200  */
201 
202 #if defined(HAVE_CONFIG_H)
203 #include <config.h>
204 #endif
205 
206 #if defined(WIN32) && !defined(HAVE_CONFIG_H)
207 #define HAVE_STDLIB_H
208 #define HAVE_STRING_H
209 #define HAVE_MALLOC_H
210 #define HAVE_GETOPT_H
211 #endif
212 
213 #include <stdio.h>
214 #include <ctype.h>
215 
216 #ifdef HAVE_STRING_H
217 #include <string.h>
218 #else
219 #include <strings.h>
220 #define strchr index
221 #define strrchr rindex
222 #endif
223 
224 #ifdef HAVE_STDLIB_H
225 #include <stdlib.h>
226 #else
227 extern int atoi(const char *);
228 #endif
229 
230 #ifdef HAVE_UNISTD_H
231 #include <unistd.h>
232 #else
233 extern int isatty(int);
234 #endif
235 
236 #ifdef HAVE_OPENDIR
237 #include <dirent.h>
238 #endif
239 
240 #ifdef HAVE_MALLOC_H
241 #include <malloc.h>
242 #endif
243 
244 #if defined(HAVE_SEARCH_H) && defined(HAVE_TSEARCH)
245 #include <search.h>
246 #else
247 #undef HAVE_TSEARCH
248 #endif
249 
250 #ifdef HAVE_MBSTOWCWIDTH
251 #include <locale.h>
252 #include <wchar.h>
253 #endif
254 
255 #ifdef HAVE_GETC_UNLOCKED
256 #define MY_GETC getc_unlocked
257 #else
258 #define MY_GETC getc
259 #endif
260 
261 #ifdef HAVE_GETOPT_H
262 #include <getopt.h>
263 #elif !defined(HAVE_GETOPT_HEADER)
264 extern int getopt(int, char *const *, const char *);
265 extern char *optarg;
266 extern int optind;
267 #endif
268 
269 #include <sys/types.h>
270 #include <sys/stat.h>
271 
272 #if defined(HAVE_TERMIOS_H) && defined(HAVE_TCGETATTR)
273 #ifdef HAVE_IOCTL_H
274 #include <ioctl.h>
275 #else
276 #ifdef HAVE_SYS_IOCTL_H
277 #include <sys/ioctl.h>
278 #endif
279 #endif
280 #if !defined(sun) || !defined(NL0)
281 #include <termios.h>
282 #endif
283 #endif /* HAVE_TERMIOS_H */
284 
285 #if defined(HAVE_POPEN) && !defined(HAVE_POPEN_PROTOTYPE)
286 extern FILE *popen(const char *, const char *);
287 extern int pclose(FILE *);
288 #endif
289 
290 #if !defined(EXIT_SUCCESS)
291 #define EXIT_SUCCESS 0
292 #define EXIT_FAILURE 1
293 #endif
294 
295 #ifndef BZCAT_PATH
296 #define BZCAT_PATH ""
297 #endif
298 
299 #ifndef BZIP2_PATH
300 #define BZIP2_PATH ""
301 #endif
302 
303 #ifndef COMPRESS_PATH
304 #define COMPRESS_PATH ""
305 #endif
306 
307 #ifndef GZIP_PATH
308 #define GZIP_PATH ""
309 #endif
310 
311 #ifndef LZCAT_PATH
312 #define LZCAT_PATH ""
313 #endif
314 
315 #ifndef PCAT_PATH
316 #define PCAT_PATH ""
317 #endif
318 
319 #ifndef UNCOMPRESS_PATH
320 #define UNCOMPRESS_PATH ""
321 #endif
322 
323 #ifndef XZ_PATH
324 #define XZ_PATH ""
325 #endif
326 
327 #ifndef ZCAT_PATH
328 #define ZCAT_PATH ""
329 #endif
330 
331 /******************************************************************************/
332 
333 #if defined(__MINGW32__) || defined(WIN32)
334 #define MKDIR(name,mode) mkdir(name)
335 #else
336 #define MKDIR(name,mode) mkdir(name,mode)
337 #endif
338 
339 #if defined(WIN32) && !defined(__MINGW32__)
340 #define PATHSEP '\\'
341 #else
342 #define PATHSEP '/'
343 #endif
344 
345 #define BACKSL  '\\'
346 #define LPAREN  '('
347 #define RPAREN  ')'
348 #define DQUOTE  '"'
349 #define SQUOTE  '\''
350 #define ESCAPE  '\033'
351 #define EOS     '\0'
352 #define TAB     '\t'
353 #define BLANK   ' '
354 #define DEL     '\177'
355 
356 #define UC(c)   ((unsigned char)(c))
357 
358 #define isoctal(c) (((c) >= '0') && ((c) <= '7'))
359 
360 #ifndef OPT_TRACE
361 #define OPT_TRACE 1
362 #endif
363 
364 #if OPT_TRACE
365 #define TRACE(p) if (trace_opt) printf p
366 #else
367 #define TRACE(p)		/*nothing */
368 #endif
369 
370 #define contain_any(s,reject) (strcspn(s,reject) != strlen(s))
371 #define maximum(a,b) ((a) < (b) ? (b) : (a))
372 
373 #define HAVE_NOTHING 0
374 #define HAVE_GENERIC 1		/* e.g., "Index: foo" w/o pathname */
375 #define HAVE_PATH    2		/* reference-file from "diff dirname/foo" */
376 #define HAVE_PATH2   4		/* comparison-file from "diff dirname/foo" */
377 
378 #define FMT_CONCISE  0
379 #define FMT_NORMAL   1
380 #define FMT_FILLED   2
381 #define FMT_VERBOSE  4
382 
383 typedef enum comment {
384     Normal, Only, OnlyLeft, OnlyRight, Binary, Differs, Either
385 } Comment;
386 
387 #define MARKS 4			/* each of +, - and ! */
388 
389 typedef enum {
390     cInsert = 0,
391     cDelete,
392     cModify,
393     cEquals
394 } Change;
395 
396 #define InsOf(p) (p)->count[cInsert]	/* "+" count inserted lines */
397 #define DelOf(p) (p)->count[cDelete]	/* "-" count deleted lines */
398 #define ModOf(p) (p)->count[cModify]	/* "!" count modified lines */
399 #define EqlOf(p) (p)->count[cEquals]	/* "=" count unmodified lines */
400 
401 #define TotalOf(p) (InsOf(p) + DelOf(p) + ModOf(p) + EqlOf(p))
402 #define for_each_mark(n) for (n = 0; n < num_marks; ++n)
403 
404 typedef struct _data {
405     struct _data *link;
406     char *name;			/* the filename */
407     int copy;			/* true if filename is const-literal */
408     int base;			/* beginning of name if -p option used */
409     Comment cmt;
410     int pending;
411     long chunks;		/* total number of chunks */
412     long chunk[MARKS];		/* counts for the current chunk */
413     long count[MARKS];		/* counts for the file */
414 } DATA;
415 
416 typedef enum {
417     dcNone = 0,
418     dcBzip,
419     dcCompress,
420     dcGzip,
421     dcLzma,
422     dcPack,
423     dcXz,
424     dcEmpty
425 } Decompress;
426 
427 static const char marks[MARKS + 1] = "+-!=";
428 static const int colors[MARKS + 1] =
429 {2, 1, 6, 4};
430 
431 static DATA *all_data;
432 static char *S_option = 0;
433 static char *D_option = 0;
434 static const char *comment_opt = "";
435 static char *path_opt = 0;
436 static int count_files;		/* true if we count added/deleted files */
437 static int format_opt = FMT_NORMAL;
438 static int max_name_wide;	/* maximum amount reserved for filenames */
439 static int max_width = 80;	/* the specified width-limit */
440 static int merge_names = 1;	/* true if we merge similar filenames */
441 static int merge_opt = 0;	/* true if we merge ins/del as modified */
442 static int min_name_wide;	/* minimum amount reserved for filenames */
443 static int names_only;		/* true if we list filenames only */
444 static int num_marks = 3;	/* 3 or 4, according to "-P" option */
445 static int path_dest;		/* true if path_opt is destination (patched) */
446 static int plot_width;		/* the amount left over for histogram */
447 static int prefix_opt = -1;	/* if positive, controls stripping of PATHSEP */
448 static int quiet = 0;		/* -q option */
449 static int reverse_opt;		/* true if results are reversed */
450 static int round_opt = 0;	/* if nonzero, round data for histogram */
451 static int show_colors;		/* true if showing SGR colors */
452 static int show_progress;	/* if not writing to tty, show progress */
453 static int sort_names = 1;	/* true if we sort filenames */
454 static int summary_only = 0;	/* true if only summary line is shown */
455 static int suppress_binary = 0;	/* -b option */
456 static int trim_escapes = 0;	/* -E option */
457 static int table_opt = 0;	/* if 1/2, write table instead/also plot */
458 static int trace_opt = 0;	/* if nonzero, write debugging information */
459 static int unchanged = 0;	/* special-case for -S vs modified-files */
460 static int verbose = 0;		/* -v option */
461 static long plot_scale;		/* the effective scale (1:maximum) */
462 
463 #ifdef HAVE_TSEARCH
464 static int use_tsearch;
465 static void *sorted_data;
466 #endif
467 
468 static int number_len = 5;
469 static int prefix_len = -1;
470 
471 /******************************************************************************/
472 
473 #ifdef GCC_NORETURN
474 static void failed(const char *) GCC_NORETURN;
475 #endif
476 
477 static void
failed(const char * s)478 failed(const char *s)
479 {
480     perror(s);
481     exit(EXIT_FAILURE);
482 }
483 
484 /* malloc wrapper that never returns NULL */
485 static void *
xmalloc(size_t s)486 xmalloc(size_t s)
487 {
488     void *p;
489     if ((p = malloc(s)) == NULL)
490 	failed("malloc");
491     return p;
492 }
493 
494 static int
do_stat(const char * name,struct stat * sb)495 do_stat(const char *name, struct stat *sb)
496 {
497     int rc;
498     if (name != 0) {
499 #ifdef HAVE_LSTAT
500 	rc = lstat(name, sb);
501 #else
502 	rc = stat(name, sb);
503 #endif
504     } else {
505 	rc = -1;
506     }
507     return rc;
508 }
509 
510 static mode_t
get_stat(const char * name)511 get_stat(const char *name)
512 {
513     struct stat sb;
514     int rc = do_stat(name, &sb);
515     return ((rc == 0) ? (sb.st_mode & S_IFMT) : 0);
516 }
517 
518 static int
is_dir(const char * name)519 is_dir(const char *name)
520 {
521     return get_stat(name) == S_IFDIR;
522 }
523 
524 static int
is_file(const char * name)525 is_file(const char *name)
526 {
527     return get_stat(name) == S_IFREG;
528 }
529 
530 static int
same_file(const char * source,const char * target)531 same_file(const char *source, const char *target)
532 {
533     int rc = 0;
534     struct stat ssb;
535     struct stat dsb;
536 
537     if (do_stat(source, &ssb) == 0 && S_ISREG(ssb.st_mode)
538 	&& do_stat(target, &dsb) == 0 && S_ISREG(dsb.st_mode)
539 	&& ssb.st_size == dsb.st_size) {
540 	FILE *ip = fopen(source, "r");
541 	if (ip != 0) {
542 	    FILE *op = fopen(target, "r");
543 	    if (op != 0) {
544 		int a = EOF;
545 		int b = EOF;
546 		rc = 1;
547 		while (1) {
548 		    a = fgetc(ip);
549 		    b = fgetc(op);
550 		    if (a != b) {
551 			rc = 0;
552 			break;
553 		    }
554 		    if (a == EOF) {
555 			break;
556 		    }
557 		}
558 		if (a != b) {
559 		    rc = 0;
560 		}
561 		fclose(op);
562 	    }
563 	    fclose(ip);
564 	}
565     }
566     return rc;
567 }
568 
569 static void
blip(int c)570 blip(int c)
571 {
572     if (show_progress) {
573 	(void) fflush(stdout);
574 	(void) fputc(c, stderr);
575 	(void) fflush(stderr);
576     }
577 }
578 
579 #ifdef HAVE_STRDUP
580 #define new_string(s) strdup(s)
581 #else
582 static char *
new_string(const char * s)583 new_string(const char *s)
584 {
585     return strcpy((char *) xmalloc((size_t) (strlen(s) + 1)), s);
586 }
587 #endif
588 
589 static int
compare_data(const void * a,const void * b)590 compare_data(const void *a, const void *b)
591 {
592     const DATA *p = (const DATA *) a;
593     const DATA *q = (const DATA *) b;
594     return ((p != NULL)
595 	    ? ((q != NULL)
596 	       ? strcmp(p->name + p->base, q->name + q->base)
597 	       : 1)
598 	    : -1);
599 }
600 
601 static void
init_data(DATA * data,const char * name,int copy,int base)602 init_data(DATA * data, const char *name, int copy, int base)
603 {
604     memset(data, 0, sizeof(*data));
605     data->name = (char *) name;
606     data->copy = copy;
607     data->base = base;
608     data->cmt = Normal;
609 }
610 
611 static DATA *
new_data(const char * name,int base)612 new_data(const char *name, int base)
613 {
614     DATA *r = (DATA *) xmalloc(sizeof(DATA));
615 
616     init_data(r, new_string(name), 0, base);
617 
618     return r;
619 }
620 
621 #ifdef HAVE_TSEARCH
622 static DATA *
add_tsearch_data(const char * name,int base)623 add_tsearch_data(const char *name, int base)
624 {
625     DATA find;
626     DATA *result;
627     void *pp;
628 
629     init_data(&find, name, 1, base);
630     if ((pp = tfind(&find, &sorted_data, compare_data)) != 0) {
631 	result = *(DATA **) pp;
632 	return result;
633     }
634     result = new_data(name, base);
635     (void) tsearch(result, &sorted_data, compare_data);
636     result->link = all_data;
637     all_data = result;
638 
639     return result;
640 }
641 #endif
642 
643 static int
count_prefix(const char * name)644 count_prefix(const char *name)
645 {
646     int count = 0;
647     const char *s;
648     while ((s = strchr(name, PATHSEP)) != 0) {
649 	name = s + 1;
650 	++count;
651     }
652     return count;
653 }
654 
655 static const char *
skip_prefix(const char * name,int prefix,int * base)656 skip_prefix(const char *name, int prefix, int *base)
657 {
658     if (prefix >= 0) {
659 	int n;
660 	*base = 0;
661 
662 	for (n = prefix; n > 0; n--) {
663 	    const char *s = strchr(name + *base, PATHSEP);
664 	    if (s == 0 || *++s == EOS) {
665 		name = s;
666 		break;
667 	    }
668 	    *base = (int) (s - name);
669 	}
670 	TRACE(("** base set to %d\n", *base));
671     }
672     return name;
673 }
674 
675 static DATA *
find_data(const char * name)676 find_data(const char *name)
677 {
678     DATA *r;
679     int base = 0;
680 
681     TRACE(("** find_data(%s)\n", name));
682 
683     /* Compute the base offset if the prefix option is used */
684     if (prefix_opt >= 0) {
685 	(void) skip_prefix(name, prefix_opt, &base);
686     }
687 
688     /* Insert into sorted list (usually sorted).  If we are not sorting or
689      * merging names, we fall off the end and link the new entry to the end of
690      * the list.  If the prefix option is used, the prefix is ignored by the
691      * merge and sort operations.
692      *
693      * If we have tsearch(), we will maintain the sorted list using it and
694      * tfind().
695      */
696 #ifdef HAVE_TSEARCH
697     if (use_tsearch) {
698 	r = add_tsearch_data(name, base);
699     } else
700 #endif
701     {
702 	DATA *p;
703 	DATA find;
704 	DATA *q;
705 
706 	init_data(&find, name, 1, base);
707 	for (p = all_data, q = 0; p != 0; q = p, p = p->link) {
708 	    int cmp = compare_data(p, &find);
709 	    if (merge_names && (cmp == 0))
710 		return p;
711 	    if (sort_names && (cmp > 0))
712 		break;
713 	}
714 	r = new_data(name, base);
715 	if (q != 0)
716 	    q->link = r;
717 	else
718 	    all_data = r;
719 
720 	r->link = p;
721     }
722 
723     return r;
724 }
725 
726 /*
727  * Remove a unneeded data item from the linked list.  Free the name as well.
728  */
729 static int
delink(DATA * data)730 delink(DATA * data)
731 {
732     DATA *p, *q;
733 
734     TRACE(("** delink '%s'\n", data->name));
735 
736 #ifdef HAVE_TSEARCH
737     if (use_tsearch) {
738 	if (tdelete(data, &sorted_data, compare_data) == 0)
739 	    return 0;
740     }
741 #endif
742     for (p = all_data, q = 0; p != 0; q = p, p = p->link) {
743 	if (p == data) {
744 	    if (q != 0)
745 		q->link = p->link;
746 	    else
747 		all_data = p->link;
748 	    if (!p->copy)
749 		free(p->name);
750 	    free(p);
751 	    return 1;
752 	}
753     }
754     return 0;
755 }
756 
757 /*
758  * Compare string 's' against a constant, returning either a pointer just
759  * past the matched part of 's' if it matches exactly, or null if a mismatch
760  * was found.
761  */
762 static char *
match(char * s,const char * p)763 match(char *s, const char *p)
764 {
765     int ok = 0;
766 
767     while (*s != EOS) {
768 	if (*p == EOS) {
769 	    ok = 1;
770 	    break;
771 	}
772 	if (*s++ != *p++)
773 	    break;
774 	if (*s == EOS && *p == EOS) {
775 	    ok = 1;
776 	    break;
777 	}
778     }
779     return ok ? s : 0;
780 }
781 
782 static int
version_num(const char * s)783 version_num(const char *s)
784 {
785     int main_ver, sub_ver;
786     char temp[2];
787     return (sscanf(s, "%d.%d%c", &main_ver, &sub_ver, temp) == 2);
788 }
789 
790 /*
791  * Check for a range of line-numbers, used in editing scripts.
792  */
793 static int
edit_range(const char * s)794 edit_range(const char *s)
795 {
796     int first, last;
797     char temp[2];
798     return (sscanf(s, "%d,%d%c", &first, &last, temp) == 2)
799 	|| (sscanf(s, "%d%c", &first, temp) == 1);
800 }
801 
802 /*
803  * Decode a range for default diff.
804  */
805 static int
decode_default(char * s,long * first,long * first_size,long * second,long * second_size)806 decode_default(char *s,
807 	       long *first, long *first_size,
808 	       long *second, long *second_size)
809 {
810     int rc = 0;
811     char *next;
812 
813     if (isdigit(UC(*s))) {
814 	*first_size = 1;
815 	*second_size = 1;
816 
817 	*first = strtol(s, &next, 10);
818 	if (next != 0 && next != s) {
819 	    if (*next == ',') {
820 		s = ++next;
821 		*first_size = strtol(s, &next, 10) + 1 - *first;
822 	    }
823 	}
824 	if (next != 0 && next != s) {
825 	    switch (*next++) {
826 	    case 'a':
827 	    case 'c':
828 	    case 'd':
829 		s = next;
830 		*second = strtol(s, &next, 10);
831 		if (next != 0 && next != s) {
832 		    if (*next == ',') {
833 			s = ++next;
834 			*second_size = strtol(s, &next, 10) + 1 - *second;
835 		    }
836 		}
837 		if (next != 0 && next != s && *next == EOS)
838 		    rc = 1;
839 		break;
840 	    }
841 	}
842     }
843     return rc;
844 }
845 
846 /*
847  * Decode a range for unified diff.  Oddly, the comments in diffutils code
848  * claim that both numbers are line-numbers.  However, inspection of the output
849  * shows that the numbers are a line-number followed by a count.
850  */
851 static char *
decode_range(char * s,int * first,int * second)852 decode_range(char *s, int *first, int *second)
853 {
854     if (isdigit(UC(*s))) {
855 	int count = 0;
856 	int value[2];
857 
858 	value[0] = 0;
859 	value[1] = 0;
860 	while (*s != EOS) {
861 	    int ch = UC(*s);
862 	    if (isdigit(ch)) {
863 		value[count] = (10 * value[count]) + (ch - '0');
864 	    } else if (ch == ',') {
865 		if (++count > 1) {
866 		    s = NULL;
867 		    break;
868 		}
869 		value[count] = 0;
870 	    } else {
871 		break;
872 	    }
873 	    ++s;
874 	}
875 	if (s != NULL) {
876 	    *first = value[0];
877 	    if (count == 0) {
878 		*second = *first;	/* diffutils 2.7 does this */
879 	    } else {
880 		*second = value[1];
881 	    }
882 	    TRACE(("** decode_range #%d first=%d, second=%d\n",
883 		   count + 1, *first, *second));
884 	}
885     }
886     return s;
887 }
888 
889 static int
HadDiffs(const DATA * data)890 HadDiffs(const DATA * data)
891 {
892     return InsOf(data) != 0
893 	|| DelOf(data) != 0
894 	|| ModOf(data) != 0
895 	|| data->cmt != Normal;
896 }
897 
898 /*
899  * If the given path is not one of the "ignore" paths, then return true.
900  */
901 static int
can_be_merged(const char * path)902 can_be_merged(const char *path)
903 {
904     int result = 0;
905     if (strcmp(path, "")
906 	&& strcmp(path, "/dev/null"))
907 	result = 1;
908     return result;
909 }
910 
911 static int
is_leaf(const char * theLeaf,const char * path)912 is_leaf(const char *theLeaf, const char *path)
913 {
914     char *s;
915 
916     if (strchr(theLeaf, PATHSEP) == 0
917 	&& (s = strrchr(path, PATHSEP)) != 0
918 	&& !strcmp(++s, theLeaf))
919 	return 1;
920     return 0;
921 }
922 
923 static char *
trim_datapath(DATA ** datap,size_t length,int * localp)924 trim_datapath(DATA ** datap, size_t length, int *localp)
925 {
926     char *target = (*datap)->name;
927 
928 #ifdef HAVE_TSEARCH
929     /*
930      * If we are using tsearch(), make a local copy of the data
931      * so we can trim it without interfering with tsearch's
932      * notion of the ordering of data.  That will create some
933      * spurious empty data, so we add the changed() macro in a
934      * few places to skip over those.
935      */
936     if (use_tsearch) {
937 	char *trim = new_string(target);
938 	trim[length] = EOS;
939 	*datap = add_tsearch_data(trim, (*datap)->base);
940 	target = (*datap)->name;
941 	free(trim);
942 	*localp = 1;
943     } else
944 #endif
945 	target[length] = EOS;
946 
947     return target;
948 }
949 
950 static size_t
compare_tails(const char * target,const char * source,int * diff)951 compare_tails(const char *target, const char *source, int *diff)
952 {
953     size_t len1 = strlen(target);
954     size_t len2 = strlen(source);
955     size_t n;
956     size_t matched = 0;
957 
958     *diff = 0;
959     for (n = 1; n <= len1 && n <= len2; n++) {
960 	if (target[len1 - n] != source[len2 - n]) {
961 	    *diff = (int) n;
962 	    break;
963 	}
964 	if (source[len2 - n] == PATHSEP) {
965 	    matched = n;
966 	}
967     }
968     return matched;
969 }
970 
971 /*
972  * The 'data' parameter points to the first of two markers, while
973  * 'path' is the pathname from the second marker.
974  *
975  * On the first call for
976  * a given file, the 'data' parameter stores no differences.
977  */
978 static char *
do_merging(DATA * data,char * path,int * freed)979 do_merging(DATA * data, char *path, int *freed)
980 {
981     char *target = reverse_opt ? path : data->name;
982     char *source = reverse_opt ? data->name : path;
983     char *result = source;
984     int diff;
985 
986     TRACE(("** do_merging(\"%s\",\"%s\") diffs:%d\n",
987 	   data->name, path, HadDiffs(data)));
988 
989     *freed = 0;
990     if (!HadDiffs(data)) {
991 
992 	if (is_leaf(target, source)) {
993 	    TRACE(("** is_leaf: \"%s\" vs \"%s\"\n", target, source));
994 	    if (reverse_opt) {
995 		TRACE((".. no action @%d\n", __LINE__));
996 	    } else {
997 		*freed = delink(data);
998 	    }
999 	} else if (can_be_merged(target)
1000 		   && can_be_merged(source)) {
1001 	    size_t len1 = strlen(target);
1002 	    size_t len2 = strlen(source);
1003 	    int local = 0;
1004 
1005 	    /*
1006 	     * If the source/target differ only by some suffix, e.g., ".orig"
1007 	     * or ".bak", strip that off.  The target may may also be a
1008 	     * temporary filename (which would not be merged since it has no
1009 	     * apparent relationship to the current).
1010 	     */
1011 	    if (len1 > len2) {
1012 		if (!strncmp(target, source, len2)) {
1013 		    TRACE(("** trimming data \"%s\" to \"%.*s\"\n",
1014 			   target, (int) len2, target));
1015 		    if (reverse_opt) {
1016 			TRACE((".. no action @%d\n", __LINE__));
1017 		    } else {
1018 			target = trim_datapath(&data, len2, &local);
1019 		    }
1020 		}
1021 	    } else if (len1 < len2) {
1022 		if (!strncmp(target, source, len1)) {
1023 		    TRACE(("** trimming source \"%s\" to \"%.*s\"\n",
1024 			   source, (int) len1, source));
1025 		    if (reverse_opt) {
1026 			TRACE((".. no action @%d\n", __LINE__));
1027 		    } else {
1028 			source[len2 = len1] = EOS;
1029 		    }
1030 		}
1031 	    }
1032 
1033 	    /*
1034 	     * If there was no "-p" option, look for the best match by
1035 	     * stripping prefixes from both source/target strings.
1036 	     */
1037 	    if (prefix_opt < 0) {
1038 		int matched = 0;
1039 		/*
1040 		 * Now (whether or not we trimmed a suffix), scan back from the
1041 		 * end of source/target strings to find if they happen to share
1042 		 * a common ending, e.g., a/b/c versus d/b/c.  If the strings
1043 		 * are not identical, then 'diff' will be set, but if they have
1044 		 * a common ending then 'matched' will be set.
1045 		 */
1046 		diff = 0;
1047 		matched = (int) compare_tails(target, source, &diff);
1048 
1049 		TRACE(("** merge @%d, prefix_opt=%d matched=%d diff=%d\n",
1050 		       __LINE__, prefix_opt, matched, diff));
1051 		if (matched != 0 && diff) {
1052 		    if (reverse_opt) {
1053 			TRACE((".. no action @%d\n", __LINE__));
1054 		    } else {
1055 			result = source + ((int) len2 - matched + 1);
1056 		    }
1057 		}
1058 	    }
1059 
1060 	    if (!local) {
1061 		if (reverse_opt) {
1062 		    TRACE((".. no action @%d\n", __LINE__));
1063 		} else {
1064 		    *freed = delink(data);
1065 		}
1066 	    }
1067 	} else if (reverse_opt) {
1068 	    TRACE((".. no action @%d\n", __LINE__));
1069 	    if (can_be_merged(source)) {
1070 		TRACE(("** merge @%d\n", __LINE__));
1071 	    } else {
1072 		TRACE(("** do not merge, retain @%d\n", __LINE__));
1073 		/* must not merge, retain existing name */
1074 		result = target;
1075 	    }
1076 	} else {
1077 	    if (can_be_merged(source)) {
1078 		TRACE(("** merge @%d\n", __LINE__));
1079 		*freed = delink(data);
1080 	    } else {
1081 		TRACE(("** do not merge, retain @%d\n", __LINE__));
1082 		/* must not merge, retain existing name */
1083 		result = target;
1084 	    }
1085 	}
1086     } else if (reverse_opt) {
1087 	TRACE((".. no action @%d\n", __LINE__));
1088 	if (can_be_merged(source)) {
1089 	    TRACE(("** merge @%d\n", __LINE__));
1090 	    result = target;
1091 	} else {
1092 	    TRACE(("** do not merge, retain @%d\n", __LINE__));
1093 	}
1094     } else {
1095 	if (can_be_merged(source)) {
1096 	    TRACE(("** %smerge @%d\n", merge_names ? "" : "do not ", __LINE__));
1097 	    if (merge_names
1098 		&& *target != EOS
1099 		&& prefix_opt < 0) {
1100 		size_t matched = compare_tails(target, source, &diff);
1101 		if (matched && !diff)
1102 		    result = target + (int) (strlen(target) - matched);
1103 	    }
1104 	} else {
1105 	    TRACE(("** do not merge, retain @%d\n", __LINE__));
1106 	    result = target;
1107 	}
1108     }
1109     TRACE(("** finish do_merging ->\"%s\"\n", result));
1110     return result;
1111 }
1112 
1113 static int
begin_data(const DATA * p)1114 begin_data(const DATA * p)
1115 {
1116     TRACE(("...begin_data(%s)\n", p->name));
1117     if (!can_be_merged(p->name)
1118 	&& strchr(p->name, PATHSEP) != 0) {
1119 	TRACE(("** begin_data:HAVE_PATH\n"));
1120 	return HAVE_PATH;
1121     }
1122     TRACE(("** begin_data:HAVE_GENERIC\n"));
1123     return HAVE_GENERIC;
1124 }
1125 
1126 static char *
skip_blanks(char * s)1127 skip_blanks(char *s)
1128 {
1129     while (isspace(UC(*s)))
1130 	++s;
1131     return s;
1132 }
1133 
1134 /*
1135  * Skip a filename, which may be in quotes, to allow embedded blanks in the
1136  * name.
1137  */
1138 static char *
skip_filename(char * s)1139 skip_filename(char *s)
1140 {
1141     int delim = (*s == SQUOTE) ? SQUOTE : DQUOTE;
1142 
1143     if ((*s == delim) && (s[1] != EOS) && (strchr) (s + 1, delim) != 0) {
1144 	++s;
1145 	while (*s != EOS && (*s != delim) && isprint(UC(*s))) {
1146 	    ++s;
1147 	}
1148 	++s;
1149     } else {
1150 	while (*s != EOS && isgraph(UC(*s))) {
1151 	    ++s;
1152 	}
1153     }
1154     return s;
1155 }
1156 
1157 static char *
skip_options(char * params)1158 skip_options(char *params)
1159 {
1160     while (*params != EOS) {
1161 	params = skip_blanks(params);
1162 	if (*params == '-') {
1163 	    while (isgraph(UC(*params)))
1164 		params++;
1165 	} else {
1166 	    break;
1167 	}
1168     }
1169     return skip_blanks(params);
1170 }
1171 
1172 /*
1173  * Strip single-quotes from a name (needed for recent makepatch versions).
1174  */
1175 static void
dequote(char * s)1176 dequote(char *s)
1177 {
1178     size_t len = strlen(s);
1179     int delim = (*s == SQUOTE) ? SQUOTE : DQUOTE;
1180 
1181     if (*s == delim && len > 2 && s[len - 1] == delim) {
1182 	int n;
1183 
1184 	for (n = 0; (s[n] = s[n + 1]) != EOS; ++n) {
1185 	    ;
1186 	}
1187 	s[len - 2] = EOS;
1188     }
1189 }
1190 
1191 /*
1192  * Allocate a fixed-buffer
1193  */
1194 static void
fixed_buffer(char ** buffer,size_t want)1195 fixed_buffer(char **buffer, size_t want)
1196 {
1197     *buffer = (char *) xmalloc(want);
1198 }
1199 
1200 /*
1201  * Reallocate a fixed-buffer
1202  */
1203 static void
adjust_buffer(char ** buffer,size_t want)1204 adjust_buffer(char **buffer, size_t want)
1205 {
1206     if ((*buffer = (char *) realloc(*buffer, want)) == 0)
1207 	failed("realloc");
1208 }
1209 
1210 /*
1211  * Read until newline or end-of-file, allocating the line-buffer so it is long
1212  * enough for the input.
1213  */
1214 static int
get_line(char ** buffer,size_t * have,FILE * fp)1215 get_line(char **buffer, size_t *have, FILE *fp)
1216 {
1217     int ch;
1218     size_t used = 0;
1219 
1220     while ((ch = MY_GETC(fp)) != EOF) {
1221 	if (used + 2 > *have) {
1222 	    adjust_buffer(buffer, *have *= 2);
1223 	}
1224 	(*buffer)[used++] = (char) ch;
1225 	if (ch == '\n')
1226 	    break;
1227     }
1228     (*buffer)[used] = EOS;
1229     return (used != 0);
1230 }
1231 
1232 static const char *
data_filename(const DATA * p)1233 data_filename(const DATA * p)
1234 {
1235     return p ? (p->name + (prefix_opt >= 0 ? p->base : prefix_len)) : "";
1236 }
1237 
1238 static int
count_lines2(const char * filename)1239 count_lines2(const char *filename)
1240 {
1241     int result = 0;
1242     FILE *fp;
1243 
1244     TRACE(("count_lines \"%s\"\n", filename));
1245 
1246     if ((fp = fopen(filename, "r")) != 0) {
1247 	int ch;
1248 
1249 	result = 0;
1250 	while ((ch = MY_GETC(fp)) != EOF) {
1251 	    if (ch == '\n')
1252 		++result;
1253 	}
1254 	(void) fclose(fp);
1255 	TRACE(("->%d lines\n", result));
1256     } else {
1257 	(void) fflush(stdout);
1258 	fprintf(stderr, "Cannot open \"%s\"\n", filename);
1259     }
1260     return result;
1261 }
1262 
1263 /*
1264  * Count the (new)lines in a file, return -1 if the file is not found.
1265  */
1266 static int
count_lines(DATA * p)1267 count_lines(DATA * p)
1268 {
1269     int result = -1;
1270     char *filename = 0;
1271     const char *filetail = data_filename(p);
1272     size_t want = strlen(path_opt) + 2 + strlen(filetail) + strlen(p->name);
1273 
1274     if ((filename = malloc(want)) != 0) {
1275 	int merge = 0;
1276 
1277 	if (path_dest && *path_opt != EOS && *filetail != PATHSEP) {
1278 	    size_t path_len = strlen(path_opt);
1279 	    size_t tail_len = strlen(filetail);
1280 	    char *tail_sep = strchr(filetail, PATHSEP);
1281 	    size_t n;
1282 
1283 	    for (n = path_len - 1; (int) n >= 0; --n) {
1284 		if ((path_len - n) > tail_len)
1285 		    break;
1286 		if ((n == 0 || path_opt[n - 1] == PATHSEP)
1287 		    && filetail[path_len - n] == PATHSEP) {
1288 		    if (!strncmp(path_opt + n, filetail, path_len - n)) {
1289 			merge = 1;
1290 			strcpy(filename, path_opt);
1291 			strcpy(filename + n, filetail);
1292 			break;
1293 		    }
1294 		}
1295 	    }
1296 
1297 	    if (merge == 0 && tail_sep != 0) {
1298 		tail_len = (size_t) (tail_sep - filetail);
1299 		if (tail_len != 0 && tail_len <= path_len) {
1300 		    if (tail_len < path_len
1301 			&& path_opt[path_len - tail_len - 1] != PATHSEP) {
1302 			merge = 0;
1303 		    } else if (!strncmp(path_opt + path_len - tail_len,
1304 					filetail,
1305 					tail_len - 1)) {
1306 			merge = 1;
1307 			if (path_len > tail_len) {
1308 			    sprintf(filename, "%.*s%c%s",
1309 				    (int) (path_len - tail_len),
1310 				    path_opt,
1311 				    PATHSEP,
1312 				    filetail);
1313 			} else {
1314 			    strcpy(filename, filetail);
1315 			}
1316 		    }
1317 		}
1318 	    }
1319 	}
1320 	if (!merge) {
1321 	    if (!path_opt) {
1322 		strcpy(filename, p->name);
1323 	    } else {
1324 		sprintf(filename, "%s%c%s", path_opt, PATHSEP, filetail);
1325 	    }
1326 	}
1327 
1328 	result = count_lines2(filename);
1329 	free(filename);
1330     } else {
1331 	failed("count_lines");
1332     }
1333     return result;
1334 }
1335 
1336 static void
update_chunk(DATA * p,Change change)1337 update_chunk(DATA * p, Change change)
1338 {
1339     if (merge_opt) {
1340 	p->pending += 1;
1341 	p->chunk[change] += 1;
1342     } else {
1343 	p->count[change] += 1;
1344     }
1345 }
1346 
1347 static void
finish_chunk(DATA * p)1348 finish_chunk(DATA * p)
1349 {
1350     if (p->pending) {
1351 	int i;
1352 
1353 	p->pending = 0;
1354 	p->chunks += 1;
1355 	if (merge_opt) {
1356 	    /*
1357 	     * This is crude, but to make it really precise we would have
1358 	     * to keep an array of line-numbers to which which in a chunk
1359 	     * are marked as insert/delete.
1360 	     */
1361 	    if (p->chunk[cInsert] && p->chunk[cDelete]) {
1362 		long change;
1363 		if (p->chunk[cInsert] > p->chunk[cDelete]) {
1364 		    change = p->chunk[cDelete];
1365 		} else {
1366 		    change = p->chunk[cInsert];
1367 		}
1368 		p->chunk[cInsert] -= change;
1369 		p->chunk[cDelete] -= change;
1370 		p->chunk[cModify] += change;
1371 	    }
1372 	}
1373 	for_each_mark(i) {
1374 	    p->count[i] += p->chunk[i];
1375 	    p->chunk[i] = 0;
1376 	}
1377     }
1378 }
1379 
1380 static char *
copy_notabs(char * target,char * source,size_t limit)1381 copy_notabs(char *target, char *source, size_t limit)
1382 {
1383     char *result = 0;
1384     if (limit-- != 0) {		/* count trailing null */
1385 	char ch;
1386 	int found = 0;
1387 	while ((ch = *source) != EOS) {
1388 	    if (ch == TAB) {
1389 		if (found)
1390 		    result = source;
1391 		break;
1392 	    } else if (limit-- == 0) {
1393 		break;
1394 	    }
1395 	    *target++ = ch;
1396 	    *target = EOS;
1397 	    ++source;
1398 	    found = 1;
1399 	}
1400     }
1401     return result;
1402 }
1403 
1404 static char *
copy_graphs(char * target,char * source,size_t limit)1405 copy_graphs(char *target, char *source, size_t limit)
1406 {
1407     int found = 0;
1408     if (limit-- != 0) {		/* count trailing null */
1409 	char ch;
1410 	while ((ch = *source) != EOS) {
1411 	    if (ch == TAB || ch == BLANK) {
1412 		break;
1413 	    } else if (limit-- == 0) {
1414 		found = 0;
1415 		break;
1416 	    }
1417 	    *target++ = ch;
1418 	    *target = EOS;
1419 	    ++source;
1420 	    found = 1;
1421 	}
1422     }
1423     return found ? source : NULL;
1424 }
1425 
1426 /*
1427  * Tested with git 2.11:
1428  * git uses dummy directory-names "a" and "b" rather than the actual working
1429  * directory.  Also, it allows non-printable characters, encoded in C-style
1430  * backslash sequences.  When those are used, it double-quotes the string.
1431  */
1432 static char *
copy_git_name(char * target,char * source,size_t limit)1433 copy_git_name(char *target, char *source, size_t limit)
1434 {
1435     int found = 0;
1436     int quoted = 0;
1437 
1438     /*
1439      * Account for double-quote.
1440      */
1441     if (*source == DQUOTE) {
1442 	quoted = 1;
1443 	++source;
1444 	limit--;
1445     }
1446 
1447     /*
1448      * Check for the dummy directory paths, and quit if not used.
1449      */
1450     if (limit <= 2 || (strncmp(source, "a/", 2) && strncmp(source, "b/", 2))) {
1451 	limit = 0;
1452     } else {
1453 	if (path_dest && !strncmp(source, "b/", 2)) {
1454 	    source += 2;	/* tweak to help with counting lines */
1455 	}
1456     }
1457 
1458     if (limit-- != 0) {		/* count trailing null */
1459 	char ch;
1460 	while ((ch = *source) != EOS) {
1461 	    if (quoted) {
1462 		if (ch == DQUOTE) {
1463 		    if (*++source != EOS)
1464 			found = 0;
1465 		    break;
1466 		} else if (ch == BACKSL) {
1467 		    int fail = 0;
1468 		    if ((ch = *++source) == EOS) {
1469 			fail = 1;
1470 		    } else if (isoctal(UC(ch))) {
1471 			int need = 3;
1472 			int value = 0;
1473 			/* decode octal escapes into UTF-8 bytes */
1474 			while (need-- > 0) {
1475 			    if (isoctal(*source)) {
1476 				value <<= 3;
1477 				value |= (UC(*source) - '0');
1478 				if (need) {
1479 				    ++source;
1480 				}
1481 			    } else {
1482 				fail = 1;
1483 				break;
1484 			    }
1485 			}
1486 			ch = (char) value;
1487 		    } else {
1488 			--limit;
1489 			switch (ch) {
1490 			case BACKSL:
1491 			    /* FALLTHRU */
1492 			case DQUOTE:
1493 			    break;
1494 			case 'b':
1495 			    ch = '\b';
1496 			    break;
1497 			case 'n':
1498 			    ch = '\n';
1499 			    break;
1500 			case 'r':
1501 			    ch = '\r';
1502 			    break;
1503 			case 't':
1504 			    ch = '\t';
1505 			    break;
1506 			default:
1507 			    fail = 1;
1508 			    break;
1509 			}
1510 		    }
1511 		    if (fail) {
1512 			found = 0;
1513 			break;
1514 		    }
1515 		}
1516 	    } else if (!isprint(UC(ch))) {
1517 		break;
1518 	    }
1519 	    if (limit-- == 0) {
1520 		found = 0;
1521 		break;
1522 	    }
1523 	    *target++ = ch;
1524 	    *target = EOS;
1525 	    ++source;
1526 	    found = 1;
1527 	}
1528     }
1529     return found ? source : NULL;
1530 }
1531 
1532 /* perforce */
1533 static char *
copy_p4_name(char * target,char * source,size_t limit)1534 copy_p4_name(char *target, char *source, size_t limit)
1535 {
1536     int found = 0;
1537     if (limit-- != 0) {		/* count trailing null */
1538 	char ch;
1539 	while ((ch = *source) != EOS) {
1540 	    if (ch == TAB || ch == BLANK || ch == '#') {
1541 		break;
1542 	    } else if (limit-- == 0) {
1543 		found = 0;
1544 		break;
1545 	    }
1546 	    *target++ = ch;
1547 	    *target = EOS;
1548 	    ++source;
1549 	    found = 1;
1550 	}
1551     }
1552     return found ? source : NULL;
1553 }
1554 
1555 static char *
copy_integer(int * target,char * source)1556 copy_integer(int *target, char *source)
1557 {
1558     char *next = NULL;
1559     long value = strtol(source, &next, 10);
1560     *target = (int) value;
1561     return next;
1562 }
1563 
1564 static char *
need_blanks(char * source)1565 need_blanks(char *source)
1566 {
1567     int found = 0;
1568     while (*source != EOS) {
1569 	char ch = *source++;
1570 	if (ch == BLANK || ch == TAB)
1571 	    found = 1;
1572     }
1573     return found ? source : NULL;
1574 }
1575 
1576 static char *
need_graphs(char * source)1577 need_graphs(char *source)
1578 {
1579     char *result = NULL;
1580     int found = 0;
1581     while (*source != EOS) {
1582 	char ch = *source;
1583 	if (ch == BLANK || ch == TAB || ch == EOS) {
1584 	    if (found)
1585 		result = source;
1586 	    break;
1587 	}
1588 	++source;
1589 	found = 1;
1590     }
1591     return result;
1592 }
1593 
1594 static char *
need_nospcs(char * source)1595 need_nospcs(char *source)
1596 {
1597     char *result = NULL;
1598     int found = 0;
1599     while (*source != EOS) {
1600 	char ch = *source;
1601 	if (ch == BLANK || ch == EOS) {
1602 	    if (found)
1603 		result = source;
1604 	    break;
1605 	}
1606 	++source;
1607 	found = 1;
1608     }
1609     return result;
1610 }
1611 
1612 /* this is used with SVN */
1613 static char *
need_parens(char * source)1614 need_parens(char *source)
1615 {
1616     char *result = NULL;
1617     if (*source++ == LPAREN) {
1618 	while (*source != EOS) {
1619 	    if (*source++ == RPAREN) {
1620 		result = source;
1621 		break;
1622 	    }
1623 	}
1624     }
1625     return result;
1626 }
1627 
1628 #define date_delims(a,b) (((a)=='/' && (b)=='/') || ((a) == '-' && (b) == '-'))
1629 #define CASE_TRACE() TRACE(("** handle case for '%c' %d:%s\n", *buffer, ok, that ? that->name : ""))
1630 
1631 static void
do_file(FILE * fp,const char * default_name)1632 do_file(FILE *fp, const char *default_name)
1633 {
1634     static const char *only_stars = "***************";
1635 
1636     DATA dummy;
1637     DATA *that = &dummy;
1638     DATA *prev = 0;
1639     char *buffer = 0;
1640     char *b_fname = 0;
1641     size_t length = 0;
1642     size_t fixed = 0;
1643     int ok = HAVE_NOTHING;
1644     int marker;
1645     int freed = 0;
1646 
1647     int unified = 0;
1648     int old_unify = 0;
1649     int new_unify = 0;
1650     int expect_unify = 0;
1651 
1652     long old_dft = 0;
1653     long new_dft = 0;
1654 
1655     int context = 1;
1656     int either = 0;
1657 
1658     int first_ch;
1659     int git_diff = 0;
1660 
1661     char *s;
1662 #if OPT_TRACE
1663     int line_no = 0;
1664 #endif
1665 
1666     init_data(&dummy, "", 1, 0);
1667 
1668     fixed_buffer(&buffer, fixed = length = BUFSIZ);
1669     fixed_buffer(&b_fname, length);
1670 
1671     while (get_line(&buffer, &length, fp)) {
1672 	/*
1673 	 * Adjust size of fixed-buffers so that a sscanf cannot overflow.
1674 	 */
1675 	if (length > fixed) {
1676 	    fixed = length;
1677 	    adjust_buffer(&b_fname, length);
1678 	}
1679 
1680 	/*
1681 	 * Trim trailing newline.
1682 	 */
1683 	for (s = buffer + strlen(buffer); s != buffer; s--) {
1684 	    if ((UC(s[-1]) == '\n') || (UC(s[-1]) == '\r'))
1685 		s[-1] = EOS;
1686 	    else
1687 		break;
1688 	}
1689 
1690 	/*
1691 	 * Trim escapes from colordiff.
1692 	 */
1693 #define isFINAL(c) (UC(*s) >= '\140' && UC(*s) <= '\176')
1694 	if (trim_escapes && (strchr(buffer, '\033') != 0)) {
1695 	    char *d = buffer;
1696 	    s = d;
1697 	    while (*s != EOS) {
1698 		if (*s == '\033') {
1699 		    while (*s != EOS && !isFINAL(*s)) {
1700 			++s;
1701 		    }
1702 		    if (*s != EOS) {
1703 			++s;
1704 			continue;
1705 		    } else {
1706 			break;
1707 		    }
1708 		}
1709 		*d++ = *s++;
1710 	    }
1711 	    *d = EOS;
1712 	}
1713 	++line_no;
1714 	TRACE(("[%05d] %s\n", line_no, buffer));
1715 
1716 	/*
1717 	 * "patch -U" can create ".rej" files lacking a filename header,
1718 	 * in unified format.  Check for those.
1719 	 */
1720 	if (line_no == 1 && !strncmp(buffer, "@@", (size_t) 2)) {
1721 	    unified = 2;
1722 	    that = find_data(default_name);
1723 	    ok = begin_data(that);
1724 	}
1725 
1726 	/*
1727 	 * The lines identifying files in a context diff depend on how it was
1728 	 * invoked.  But after the header, each chunk begins with a line
1729 	 * containing 15 *'s.  Each chunk may contain a line-range with '***'
1730 	 * for the "before", and a line-range with '---' for the "after".  The
1731 	 * part of the chunk depicting the deletion may be absent, though the
1732 	 * edit line is present.
1733 	 *
1734 	 * The markers for unified diff are a little different from the normal
1735 	 * context-diff.  Also, the edit-lines in a unified diff won't have a
1736 	 * space in column 2.  Because of the missing space, we have to count
1737 	 * lines to ensure we do not confuse the marker lines.
1738 	 */
1739 	marker = 0;
1740 	if (that != &dummy && !strcmp(buffer, only_stars)) {
1741 	    finish_chunk(that);
1742 	    TRACE(("** begin context chunk\n"));
1743 	    context = 2;
1744 	} else if (line_no == 1 && !strcmp(buffer, only_stars)) {
1745 	    TRACE(("** begin context chunk\n"));
1746 	    context = 2;
1747 	    that = find_data(default_name);
1748 	    ok = begin_data(that);
1749 	} else if (context == 2 && match(buffer, "*** ")) {
1750 	    context = 1;
1751 	} else if (context == 1 && match(buffer, "--- ")) {
1752 	    marker = 1;
1753 	    context = 0;
1754 	} else if (match(buffer, "*** ")) {
1755 	} else if ((old_unify + new_unify) == 0 && match(buffer, "==== ")) {
1756 	    finish_chunk(that);
1757 	    unified = 2;
1758 	} else if ((old_unify + new_unify) == 0 && match(buffer, "--- ")) {
1759 	    finish_chunk(that);
1760 	    marker = unified = 1;
1761 	} else if ((old_unify + new_unify) == 0 && match(buffer, "+++ ")) {
1762 	    marker = unified = 2;
1763 	} else if (unified == 2
1764 		   || ((old_unify + new_unify) == 0 && (*buffer == '@'))) {
1765 	    finish_chunk(that);
1766 	    unified = 0;
1767 	    if (*buffer == '@') {
1768 		int old_base, new_base;
1769 		int old_size = 0;
1770 		int new_size = 0;
1771 		char *sp;
1772 
1773 		old_unify = new_unify = 0;
1774 		if ((sp = match(buffer, "@@ -")) != NULL
1775 		    && (sp = decode_range(sp, &old_base, &old_size)) != NULL
1776 		    && (sp = match(sp, " +")) != NULL
1777 		    && (sp = decode_range(sp, &new_base, &new_size)) != NULL
1778 		    && match(sp, " @") != NULL) {
1779 		    old_unify = old_size;
1780 		    new_unify = new_size;
1781 		    unified = -1;
1782 		}
1783 	    }
1784 	} else if (unified == 1 && !context) {
1785 	    /*
1786 	     * If unified==1, we guessed we would find a "+++" line, but since
1787 	     * we are here, we did not find that.  The context check ensures
1788 	     * we do not mistake the "---" for a unified diff with that for
1789 	     * a context diff's "after" line-range.
1790 	     *
1791 	     * If we guessed wrong, then we probably found a data line with
1792 	     * "--" in the first two columns of the diff'd file.
1793 	     */
1794 	    unified = 0;
1795 	    TRACE(("?? Expected \"+++\" for unified diff\n"));
1796 	    if (prev != 0
1797 		&& prev != that
1798 		&& InsOf(that) == 0
1799 		&& DelOf(that) == 0
1800 		&& strcmp(prev->name, that->name)) {
1801 		TRACE(("?? giveup on %ld/%ld %s\n", InsOf(that),
1802 		       DelOf(that), that->name));
1803 		TRACE(("?? revert to %ld/%ld %s\n", InsOf(prev),
1804 		       DelOf(prev), prev->name));
1805 		(void) delink(that);
1806 		that = prev;
1807 		update_chunk(that, cDelete);
1808 	    }
1809 	} else if (old_unify + new_unify) {
1810 	    switch (*buffer) {
1811 	    case '-':
1812 		if (old_unify)
1813 		    --old_unify;
1814 		break;
1815 	    case '+':
1816 		if (new_unify)
1817 		    --new_unify;
1818 		break;
1819 	    case EOS:
1820 	    case ' ':
1821 		if (old_unify)
1822 		    --old_unify;
1823 		if (new_unify)
1824 		    --new_unify;
1825 		break;
1826 	    case BACKSL:
1827 		if (strstr(buffer, "newline") != 0) {
1828 		    break;
1829 		}
1830 		/* FALLTHRU */
1831 	    default:
1832 		TRACE(("?? expected more in chunk\n"));
1833 		old_unify = new_unify = 0;
1834 		break;
1835 	    }
1836 	    if (!(old_unify + new_unify)) {
1837 		expect_unify = 2;
1838 	    }
1839 	} else {
1840 	    long old_base, new_base;
1841 
1842 	    unified = 0;
1843 
1844 	    if (line_no == 1
1845 		&& decode_default(buffer,
1846 				  &old_base, &old_dft,
1847 				  &new_base, &new_dft)) {
1848 		TRACE(("DFT %ld,%ld -> %ld,%ld\n",
1849 		       old_base, old_base + old_dft - 1,
1850 		       new_base, new_base + new_dft - 1));
1851 		finish_chunk(that);
1852 		that = find_data("unknown");
1853 		ok = begin_data(that);
1854 	    }
1855 	}
1856 
1857 	/*
1858 	 * If the previous line ended a chunk of a unified diff, we may begin
1859 	 * another chunk, or begin another type of diff.  If neither, do not
1860 	 * continue to accumulate counts for the unified diff which has ended.
1861 	 */
1862 	if (expect_unify != 0) {
1863 	    if (expect_unify-- == 1) {
1864 		if (unified == 0) {
1865 		    TRACE(("?? did not get chunk\n"));
1866 		    finish_chunk(that);
1867 		    that = &dummy;
1868 		}
1869 	    }
1870 	}
1871 
1872 	/*
1873 	 * Override the beginning of the line to simplify the case statement
1874 	 * below.
1875 	 */
1876 	if (marker > 0) {
1877 	    TRACE(("** have marker=%d, override %s\n", marker, buffer));
1878 	    (void) memcpy(buffer, "***", (size_t) 3);
1879 	}
1880 
1881 	first_ch = *buffer;
1882 
1883 	/*
1884 	 * GIT binary diffs can contain blocks of data that might be confused
1885 	 * with the ordinary line-oriented sections in diff output.  Skip the
1886 	 * case statement if we are processing a GIT binary diff.
1887 	 */
1888 	switch (git_diff) {
1889 	default:
1890 	    break;
1891 	case 1:
1892 	    /* expect "index" */
1893 	    if (match(buffer, "index") != 0) {
1894 		git_diff = 2;
1895 		continue;
1896 	    } else {
1897 		git_diff = 0;
1898 	    }
1899 	    break;
1900 	case 2:
1901 	    /* perhaps "GIT binary patch" */
1902 	    if (match(buffer, "GIT binary patch") != 0) {
1903 		git_diff = 3;
1904 		that->cmt = Binary;
1905 		continue;
1906 	    } else if (match(buffer, "Binary files ") != 0) {
1907 		git_diff = 0;
1908 		that->cmt = Binary;
1909 		continue;
1910 	    } else {
1911 		git_diff = 0;
1912 	    }
1913 	    break;
1914 	case 3:
1915 	    /* had "GIT binary patch", wait for next "diff" line */
1916 	    if (first_ch != 'd')
1917 		continue;
1918 	    break;
1919 	}
1920 
1921 	/*
1922 	 * Use the first character of the input line to determine its
1923 	 * type:
1924 	 */
1925 	switch (first_ch) {
1926 	case 'O':		/* Only */
1927 	    CASE_TRACE();
1928 	    if (match(buffer, "Only in ")) {
1929 		char *path = buffer + 8;
1930 		int found = 0;
1931 		for (s = path; *s != EOS; s++) {
1932 		    if (match(s, ": ")) {
1933 			found = 1;
1934 			*s++ = PATHSEP;
1935 			while ((s[0] = s[1]) != EOS)
1936 			    s++;
1937 			break;
1938 		    }
1939 		}
1940 		if (found) {
1941 		    blip('.');
1942 		    finish_chunk(that);
1943 		    that = find_data(path);
1944 		    that->cmt = Only;
1945 		    ok = HAVE_NOTHING;
1946 		}
1947 	    }
1948 	    break;
1949 
1950 	    /*
1951 	     * Several different scripts produce "Index:" lines
1952 	     * (e.g., "makepatch").  Not all bother to put the
1953 	     * pathname of the files; some put only the leaf names.
1954 	     */
1955 	case 'I':
1956 	    CASE_TRACE();
1957 	    if ((s = match(buffer, "Index: ")) != 0) {
1958 		s = skip_blanks(s);
1959 		dequote(s);
1960 		blip('.');
1961 		finish_chunk(that);
1962 		s = do_merging(that, s, &freed);
1963 		that = find_data(s);
1964 		ok = begin_data(that);
1965 	    }
1966 	    break;
1967 
1968 	case 'd':		/* diff command trace */
1969 	    CASE_TRACE();
1970 	    if ((s = match(buffer, "diff ")) != 0
1971 		&& *(s = skip_options(s)) != EOS) {
1972 		if (reverse_opt) {
1973 		    *skip_filename(s) = EOS;
1974 		} else {
1975 		    s = skip_filename(s);
1976 		    s = skip_blanks(s);
1977 		}
1978 		dequote(s);
1979 		blip('.');
1980 		finish_chunk(that);
1981 		s = do_merging(that, s, &freed);
1982 		that = find_data(s);
1983 		ok = begin_data(that);
1984 		if (match(buffer, "diff --git ") != 0) {
1985 		    git_diff = 1;
1986 		} else {
1987 		    git_diff = 0;
1988 		}
1989 	    }
1990 	    break;
1991 
1992 	case '*':
1993 	    CASE_TRACE();
1994 	    if (!(ok & HAVE_PATH)) {
1995 		int ddd, hour, minute, second;
1996 		int day, month, year;
1997 		char yrmon, monday;
1998 		char *stars = match(buffer, "*** ");
1999 		char *sp;
2000 
2001 		if (stars == NULL)
2002 		    break;	/* ignore */
2003 
2004 		/* check for tab-delimited first, so we can
2005 		 * accept filenames containing spaces.
2006 		 */
2007 		if (((sp = copy_notabs(b_fname, stars, length)) != NULL
2008 		     && (sp = match(sp, "\t")) != NULL
2009 		     && (sp = need_nospcs(sp)) != NULL
2010 		     && (sp = match(sp, " ")) != NULL
2011 		     && (sp = need_nospcs(sp)) != NULL
2012 		     && sscanf(sp,
2013 			       " %d %d:%d:%d %d",
2014 			       &ddd,
2015 			       &hour, &minute, &second, &year) == 5)
2016 		    || ((sp = copy_notabs(b_fname, stars, length)) != NULL
2017 			&& sscanf(sp,
2018 				  "\t%d%c%d%c%d %d:%d:%d",
2019 				  &year, &yrmon, &month, &monday, &day,
2020 				  &hour, &minute, &second) == 8
2021 			&& date_delims(yrmon, monday)
2022 			&& !version_num(b_fname))
2023 		    || ((sp = copy_notabs(b_fname, stars, length)) != NULL
2024 			&& (sp = match(sp, "\t")) != NULL
2025 			&& (sp = need_parens(sp)) != NULL
2026 			&& (sp = match(sp, "\t")) != NULL
2027 			&& need_parens(sp) != NULL
2028 			&& !version_num(b_fname))
2029 		    || ((sp = copy_notabs(b_fname, stars, length)) != NULL
2030 			&& (sp = match(sp, "\t")) != NULL
2031 			&& (sp = need_parens(sp)) != NULL
2032 			&& (*skip_blanks(sp) == EOS))
2033 		    || ((sp = copy_graphs(b_fname, stars, length)) != NULL
2034 			&& (sp = need_blanks(sp)) != NULL
2035 			&& (sp = need_nospcs(sp)) != NULL
2036 			&& (sp = match(sp, " ")) != NULL
2037 			&& (sp = need_nospcs(sp)) != NULL
2038 			&& sscanf(sp,
2039 				  " %d %d:%d:%d %d",
2040 				  &ddd, &hour, &minute, &second, &year) == 5)
2041 		    || ((sp = copy_graphs(b_fname, stars, length)) != NULL
2042 			&& (sp = need_blanks(sp)) != NULL
2043 			&& sscanf(sp,
2044 				  "%d%c%d%c%d %d:%d:%d",
2045 				  &year, &yrmon, &month, &monday, &day,
2046 				  &hour, &minute, &second) == 8
2047 			&& date_delims(yrmon, monday)
2048 			&& !version_num(b_fname))
2049 		    || ((sp = copy_git_name(b_fname, stars, length)) != NULL
2050 			&& *skip_blanks(sp) == EOS)
2051 		    || ((sp = copy_graphs(b_fname, stars, length)) != NULL
2052 			&& (*sp == EOS || *sp == BLANK || *sp == TAB)
2053 			&& !version_num(b_fname)
2054 			&& !contain_any(b_fname, "*")
2055 			&& !edit_range(b_fname))
2056 		    ) {
2057 		    prev = that;
2058 		    finish_chunk(that);
2059 		    dequote(b_fname);
2060 		    s = do_merging(that, b_fname, &freed);
2061 		    if (freed)
2062 			prev = 0;
2063 		    that = find_data(s);
2064 		    ok = begin_data(that);
2065 		    TRACE(("** after merge:%d:%s\n", ok, s));
2066 		}
2067 	    }
2068 	    break;
2069 
2070 	case '=':
2071 	    CASE_TRACE();
2072 	    if (!(ok & HAVE_PATH)) {
2073 		int rev;
2074 		char *bars, *sp;
2075 
2076 		if ((bars = match(buffer, "==== ")) != NULL
2077 		    && (bars = copy_p4_name(b_fname, bars, length)) != NULL
2078 		    && (bars = match(bars, "#")) != NULL
2079 		    && (bars = copy_integer(&rev, bars)) != NULL
2080 		    && (((sp = match(bars, " - ")) != NULL
2081 			 && need_graphs(sp) != NULL)
2082 			|| (((sp = match(bars, " ")) != NULL
2083 			     && (sp = need_parens(sp)) != NULL
2084 			     && (sp = match(sp, " - ")) != NULL
2085 			     && need_graphs(sp) != NULL)))
2086 		    && !version_num(b_fname)
2087 		    && !contain_any(b_fname, "*")
2088 		    && !edit_range(b_fname)) {
2089 		    TRACE(("** found p4-diff\n"));
2090 		    prev = that;
2091 		    finish_chunk(that);
2092 		    dequote(b_fname);
2093 		    s = do_merging(that, b_fname, &freed);
2094 		    if (freed)
2095 			prev = 0;
2096 		    that = find_data(s);
2097 		    ok = begin_data(that);
2098 		    TRACE(("** after merge:%d:%s\n", ok, s));
2099 		}
2100 	    }
2101 	    break;
2102 
2103 	case '+':
2104 	    /* FALL-THRU */
2105 	case '>':
2106 	    CASE_TRACE();
2107 	    if (ok) {
2108 		update_chunk(that, cInsert);
2109 	    }
2110 	    break;
2111 
2112 	case '-':
2113 	    if (!ok) {
2114 		CASE_TRACE();
2115 		break;
2116 	    }
2117 	    if (!unified && !strcmp(buffer, "---")) {
2118 		CASE_TRACE();
2119 		break;
2120 	    }
2121 	    /* fall-thru */
2122 	case '<':
2123 	    CASE_TRACE();
2124 	    if (ok) {
2125 		update_chunk(that, cDelete);
2126 	    }
2127 	    break;
2128 
2129 	case '!':
2130 	    CASE_TRACE();
2131 	    if (ok) {
2132 		update_chunk(that, cModify);
2133 	    }
2134 	    break;
2135 
2136 	    /* Expecting "Files XXX and YYY differ" */
2137 	case 'F':		/* FALL-THRU */
2138 	case 'f':
2139 	    CASE_TRACE();
2140 	    if ((s = match(buffer + 1, "iles ")) != 0) {
2141 		char *first = skip_blanks(s);
2142 		/* blindly assume the first filename does not contain " and " */
2143 		char *at_and = strstr(s, " and ");
2144 		s = strrchr(buffer, BLANK);
2145 		if ((at_and != NULL) && !strcmp(s, " differ")) {
2146 		    char *second = skip_blanks(at_and + 5);
2147 
2148 		    if (reverse_opt) {
2149 			*at_and = EOS;
2150 			s = first;
2151 		    } else {
2152 			*s = EOS;
2153 			s = second;
2154 		    }
2155 		    blip('.');
2156 		    finish_chunk(that);
2157 		    that = find_data(s);
2158 		    that->cmt = Either;
2159 		    ok = HAVE_NOTHING;
2160 		    either = 1;
2161 		}
2162 	    }
2163 	    break;
2164 	    /* Expecting "Binary files XXX and YYY differ" */
2165 	case 'B':		/* FALL-THRU */
2166 	case 'b':
2167 	    CASE_TRACE();
2168 	    if ((s = match(buffer + 1, "inary files ")) != 0) {
2169 		char *first = skip_blanks(s);
2170 		/* blindly assume the first filename does not contain " and " */
2171 		char *at_and = strstr(s, " and ");
2172 		s = strrchr(buffer, BLANK);
2173 		if ((at_and != NULL) && !strcmp(s, " differ")) {
2174 		    char *second = skip_blanks(at_and + 5);
2175 
2176 		    if (reverse_opt) {
2177 			*at_and = EOS;
2178 			s = first;
2179 		    } else {
2180 			*s = EOS;
2181 			s = second;
2182 		    }
2183 		    blip('.');
2184 		    finish_chunk(that);
2185 		    that = find_data(s);
2186 		    that->cmt = Binary;
2187 		    ok = HAVE_NOTHING;
2188 		}
2189 	    }
2190 	    break;
2191 	}
2192     }
2193     blip('\n');
2194 
2195     finish_chunk(that);
2196     finish_chunk(&dummy);
2197 
2198     if (either) {
2199 	int pass;
2200 	int fixup_diffs = 0;
2201 
2202 	for (pass = 0; pass < 2; ++pass) {
2203 	    DATA *p;
2204 	    for (p = all_data; p; p = p->link) {
2205 		switch (p->cmt) {
2206 		default:
2207 		    break;
2208 		case Normal:
2209 		    fixup_diffs = 1;
2210 		    break;
2211 		case Either:
2212 		    if (pass) {
2213 			if (fixup_diffs) {
2214 			    p->cmt = Binary;
2215 			} else {
2216 			    p->cmt = Differs;
2217 			}
2218 		    }
2219 		    break;
2220 		}
2221 	    }
2222 	}
2223     }
2224 
2225     free(buffer);
2226     free(b_fname);
2227 }
2228 
2229 static void
show_color(int color)2230 show_color(int color)
2231 {
2232     if (color >= 0)
2233 	printf("\033[%dm", color + 30);
2234     else
2235 	printf("\033[0;39m");
2236 }
2237 
2238 static long
plot_bar(long count,int c,int color)2239 plot_bar(long count, int c, int color)
2240 {
2241     long result = count;
2242 
2243     if (show_colors && result != 0)
2244 	show_color(color);
2245 
2246     while (--count >= 0)
2247 	(void) putchar(c);
2248 
2249     if (show_colors && result != 0)
2250 	show_color(-1);
2251 
2252     return result;
2253 }
2254 
2255 /*
2256  * Each call to 'plot_num()' prints a scaled bar of 'c' characters.  The
2257  * 'extra' parameter is used to keep the accumulated error in the bar's total
2258  * length from getting large.
2259  */
2260 static long
plot_num(long num_value,int c,int color,long * extra)2261 plot_num(long num_value, int c, int color, long *extra)
2262 {
2263     long result = 0;
2264 
2265     /* the value to plot */
2266     /* character to display in the bar */
2267     /* accumulated error in the bar */
2268     if (num_value) {
2269 	long product = (plot_width * num_value);
2270 	result = ((product + *extra) / plot_scale);
2271 	*extra = product - (result * plot_scale) - *extra;
2272 	plot_bar(result, c, color);
2273     }
2274     return result;
2275 }
2276 
2277 static long
plot_round1(const long num[MARKS])2278 plot_round1(const long num[MARKS])
2279 {
2280     long result = 0;
2281     long scaled[MARKS];
2282     long remain[MARKS];
2283     long want = 0;
2284     long have = 0;
2285     long half = (plot_scale / 2);
2286     int i;
2287 
2288     memset(scaled, 0, sizeof(scaled));
2289     memset(remain, 0, sizeof(remain));
2290 
2291     for_each_mark(i) {
2292 	long product = (plot_width * num[i]);
2293 	scaled[i] = (product / plot_scale);
2294 	remain[i] = (product % plot_scale);
2295 	want += product;
2296 	have += product - remain[i];
2297     }
2298     while (want > have) {
2299 	int j = -1;
2300 	for_each_mark(i) {
2301 	    if (remain[i] != 0
2302 		&& (remain[i] > (j >= 0 ? remain[j] : half))) {
2303 		j = i;
2304 	    }
2305 	}
2306 	if (j >= 0) {
2307 	    have += remain[j];
2308 	    remain[j] = 0;
2309 	    scaled[j] += 1;
2310 	} else {
2311 	    break;
2312 	}
2313     }
2314     for_each_mark(i) {
2315 	plot_bar(scaled[i], marks[i], colors[i]);
2316 	result += scaled[i];
2317     }
2318     return result;
2319 }
2320 
2321 /*
2322  * Print a scaled bar of characters, where c[0] is for insertions, c[1]
2323  * for deletions and c[2] for modifications. The num array contains the
2324  * count for each type of change, in the same order.
2325  */
2326 static long
plot_round2(const long num[MARKS])2327 plot_round2(const long num[MARKS])
2328 {
2329     long result = 0;
2330     long scaled[MARKS];
2331     long remain[MARKS];
2332     long total = 0;
2333     int i;
2334 
2335     for (i = 0; i < MARKS; i++)
2336 	total += num[i];
2337 
2338     if (total == 0)
2339 	return result;
2340 
2341     total = (total * plot_width + (plot_scale / 2)) / plot_scale;
2342     /* display at least one character */
2343     if (total == 0)
2344 	total++;
2345 
2346     for_each_mark(i) {
2347 	scaled[i] = num[i] * plot_width / plot_scale;
2348 	remain[i] = num[i] * plot_width - scaled[i] * plot_scale;
2349 	total -= scaled[i];
2350     }
2351 
2352     /* assign the missing chars using the largest remainder algo */
2353     while (total) {
2354 	int largest, largest_count;	/* largest is a bit field */
2355 	long max_remain;
2356 
2357 	/* search for the largest remainder */
2358 	largest = largest_count = 0;
2359 	max_remain = 0;
2360 	for_each_mark(i) {
2361 	    if (remain[i] > max_remain) {
2362 		largest = 1 << i;
2363 		largest_count = 1;
2364 		max_remain = remain[i];
2365 	    } else if (remain[i] == max_remain) {	/* ex aequo */
2366 		largest |= 1 << i;
2367 		largest_count++;
2368 	    }
2369 	}
2370 
2371 	/* if there are more greatest remainders than characters
2372 	   missing, don't assign them at all */
2373 	if (total < largest_count)
2374 	    break;
2375 
2376 	/* allocate the extra characters */
2377 	for_each_mark(i) {
2378 	    if (largest & (1 << i)) {
2379 		scaled[i]++;
2380 		total--;
2381 		remain[i] -= plot_width;
2382 	    }
2383 	}
2384     }
2385 
2386     for_each_mark(i) {
2387 	result += plot_bar(scaled[i], marks[i], colors[i]);
2388     }
2389 
2390     return result;
2391 }
2392 
2393 static void
plot_numbers(const DATA * p)2394 plot_numbers(const DATA * p)
2395 {
2396     long temp = 0;
2397     int i;
2398 
2399     printf("%5ld ", TotalOf(p));
2400 
2401     if (format_opt & FMT_VERBOSE) {
2402 	printf("%5ld ", InsOf(p));
2403 	printf("%5ld ", DelOf(p));
2404 	printf("%5ld ", ModOf(p));
2405 	if (path_opt)
2406 	    printf("%5ld ", EqlOf(p));
2407     }
2408 
2409     if (format_opt == FMT_CONCISE) {
2410 	for_each_mark(i) {
2411 	    printf("\t%ld %c", p->count[i], marks[i]);
2412 	}
2413     } else {
2414 	long used = 0;
2415 
2416 	switch (round_opt) {
2417 	default:
2418 	    for_each_mark(i) {
2419 		used += plot_num(p->count[i], marks[i], colors[i], &temp);
2420 	    }
2421 	    break;
2422 	case 1:
2423 	    used = plot_round1(p->count);
2424 	    break;
2425 
2426 	case 2:
2427 	    used = plot_round2(p->count);
2428 	    break;
2429 	}
2430 
2431 	if ((format_opt & FMT_FILLED) != 0) {
2432 	    if (used > plot_width)
2433 		printf("%ld", used - plot_width);	/* oops */
2434 	    else
2435 		plot_bar(plot_width - used, '.', 0);
2436 	}
2437     }
2438 }
2439 
2440 static int
columns_of(const char * value)2441 columns_of(const char *value)
2442 {
2443     int result;
2444     int n;
2445     int ch;
2446 #ifdef HAVE_MBSTOWCWIDTH
2447     int fixup = 0;
2448     for (n = 0; (ch = UC(value[n])) != EOS; ++n) {
2449 	if (ch >= DEL || ch < BLANK) {
2450 	    fixup = 1;
2451 	    break;
2452 	}
2453     }
2454     result = (int) strlen(value);
2455     if (fixup) {
2456 	size_t needed;
2457 	mbstate_t state;
2458 	const char *source;
2459 	size_t length = strlen(value);
2460 
2461 	memset(&state, 0, sizeof(state));
2462 	source = value;
2463 	needed = mbsrtowcs(NULL, &source, length, &state);
2464 	if (needed != (size_t) (-1)) {
2465 	    wchar_t *target = calloc(1 + needed, sizeof(wchar_t));
2466 	    memset(&state, 0, sizeof(state));
2467 	    source = value;
2468 	    if (mbsrtowcs(target, &source, length, &state) == needed) {
2469 		size_t n2;
2470 		result = 0;
2471 		for (n2 = 0; n2 < needed; ++n2) {
2472 		    int nw = wcwidth(target[n2]);
2473 		    if (nw > 0)
2474 			result += nw;
2475 		    else if (target[n2] < BLANK || target[n2] == DEL)
2476 			result += 2;
2477 		    else
2478 			result += 4;	/* show as octal */
2479 		}
2480 	    }
2481 	    free(target);
2482 	}
2483     }
2484 #else
2485     result = (int) strlen(value);
2486     for (n = 0; (ch = UC(value[n])) != EOS; ++n) {
2487 	if (ch == DEL || ch < BLANK) {
2488 	    result += 1;
2489 	} else if (ch > DEL) {
2490 	    result += 3;	/* show as octal */
2491 	}
2492     }
2493 #endif
2494     return result;
2495 }
2496 
2497 #define adjustwide(width,name) width += (int) strlen(name) - columns_of(name)
2498 
2499 static void
show_quoted(const char * value)2500 show_quoted(const char *value)
2501 {
2502     int ch;
2503 
2504     putchar(DQUOTE);
2505     while ((ch = UC(*value++)) != EOS) {
2506 	if (ch == DQUOTE)
2507 	    putchar(DQUOTE);
2508 	putchar(ch);
2509     }
2510     putchar(DQUOTE);
2511 }
2512 
2513 static void
show_unquoted(const char * value,int limit)2514 show_unquoted(const char *value, int limit)
2515 {
2516     int ch;
2517     while ((ch = UC(*value++)) != EOS) {
2518 	if (ch < BLANK) {
2519 	    if (strchr("\b\n\r\t\\\"", ch) != NULL) {
2520 		putchar(BACKSL);
2521 		switch (ch) {
2522 		case '\b':
2523 		    ch = 'b';
2524 		    break;
2525 		case '\n':
2526 		    ch = 'n';
2527 		    break;
2528 		case '\r':
2529 		    ch = 'r';
2530 		    break;
2531 		case '\t':
2532 		    ch = 't';
2533 		    break;
2534 		}
2535 	    } else {
2536 		putchar('^');
2537 		ch |= '@';
2538 	    }
2539 	} else if (ch == DEL) {
2540 	    putchar('^');
2541 	    ch = '?';
2542 	}
2543 #ifndef HAVE_MBSTOWCWIDTH
2544 	else if (ch > DEL) {
2545 	    char temp[5];
2546 	    sprintf(temp, "\\%03o", ch & 0xff);
2547 	    ch = temp[3];
2548 	    temp[3] = EOS;
2549 	    fputs(temp, stdout);
2550 	}
2551 #endif
2552 	putchar(ch);
2553 	--limit;
2554     }
2555     while (limit-- > 0) {
2556 	putchar(BLANK);
2557     }
2558 }
2559 
2560 #define changed(p) (!merge_names \
2561 		    || (p)->cmt != Normal \
2562 		    || (TotalOf(p)) != 0)
2563 
2564 static void
show_data(const DATA * p)2565 show_data(const DATA * p)
2566 {
2567     const char *name = data_filename(p);
2568     int width;
2569 
2570     if (summary_only) {
2571 	;
2572     } else if (!changed(p)) {
2573 	;
2574     } else if (p->cmt == Binary && suppress_binary == 1) {
2575 	;
2576     } else if (table_opt == 1) {
2577 	if (names_only) {
2578 	    show_quoted(name);
2579 	} else {
2580 	    printf("%ld,%ld,%ld,",
2581 		   InsOf(p),
2582 		   DelOf(p),
2583 		   ModOf(p));
2584 	    if (path_opt)
2585 		printf("%ld,", EqlOf(p));
2586 	    if (count_files && !reverse_opt)
2587 		printf("%d,%d,%d,",
2588 		       (p->cmt == OnlyRight),
2589 		       (p->cmt == OnlyLeft),
2590 		       (p->cmt == Binary));
2591 	    show_quoted(name);
2592 	}
2593 	printf("\n");
2594     } else if (names_only) {
2595 	printf("%s\n", name);
2596     } else {
2597 	printf("%s ", comment_opt);
2598 	if (max_name_wide > 0
2599 	    && max_name_wide < min_name_wide
2600 	    && max_name_wide < ((width = (int) columns_of(name)))) {
2601 	    printf("%.*s", max_name_wide, name + (width - max_name_wide));
2602 	} else {
2603 	    width = ((max_name_wide > 0 && max_name_wide < min_name_wide)
2604 		     ? max_name_wide
2605 		     : min_name_wide);
2606 	    adjustwide(width, name);
2607 	    show_unquoted(name, width);
2608 	}
2609 	if (table_opt == 2) {
2610 	    putchar('|');
2611 	    if (path_opt)
2612 		printf("%*ld ", number_len, EqlOf(p));
2613 	    printf("%*ld ", number_len, InsOf(p));
2614 	    printf("%*ld ", number_len, DelOf(p));
2615 	    printf("%*ld", number_len, ModOf(p));
2616 	}
2617 	putchar('|');
2618 	switch (p->cmt) {
2619 	default:
2620 	case Normal:
2621 	    plot_numbers(p);
2622 	    break;
2623 	case Binary:
2624 	    printf("binary");
2625 	    break;
2626 	case Differs:
2627 	    printf("differ");
2628 	    break;
2629 	case Only:
2630 	    printf("only");
2631 	    break;
2632 	case OnlyLeft:
2633 	    printf(count_files ? "deleted" : "only");
2634 	    break;
2635 	case OnlyRight:
2636 	    printf(count_files ? "added" : "only");
2637 	    break;
2638 	}
2639 	printf("\n");
2640     }
2641 }
2642 
2643 #ifdef HAVE_TSEARCH
2644 static void
show_tsearch(const void * nodep,const VISIT which,const int depth)2645 show_tsearch(const void *nodep, const VISIT which, const int depth)
2646 {
2647     const DATA *p = *(DATA * const *) nodep;
2648     (void) depth;
2649     if (which == postorder || which == leaf)
2650 	show_data(p);
2651 }
2652 #endif
2653 
2654 static int
ignore_data(DATA * p)2655 ignore_data(DATA * p)
2656 {
2657     return ((!changed(p))
2658 	    || (p->cmt == Binary && suppress_binary));
2659 }
2660 
2661 /*
2662  * Return the length of any directory-prefix from the given path.
2663  */
2664 static size_t
path_length(const char * path)2665 path_length(const char *path)
2666 {
2667     size_t result = 0;
2668     char *mark = strrchr(path, PATHSEP);
2669     if (mark != 0 && mark != path)
2670 	result = (size_t) (mark + 1 - path);
2671     return result;
2672 }
2673 
2674 /*
2675  * If we have an "only" filename, we can guess whether it was added or removed
2676  * by looking at its directory and comparing that to other files' directories.
2677  *
2678  * TODO: -K -R combination is not yet supported because that relies on storing
2679  * both left-/right-paths for each file; only the right-path is currently used.
2680  */
2681 static Comment
resolve_only(DATA * p)2682 resolve_only(DATA * p)
2683 {
2684     Comment result = p->cmt;
2685     if (result == Only && !reverse_opt) {
2686 	DATA *q;
2687 	size_t len1 = path_length(p->name);
2688 	if (len1 != 0) {
2689 	    for (q = all_data; q; q = q->link) {
2690 		result = OnlyLeft;
2691 		if (q->cmt == Normal || q->cmt == Binary) {
2692 		    size_t len2 = path_length(q->name);
2693 		    if (len2 >= len1) {
2694 			if (!strncmp(p->name, q->name, len1)) {
2695 			    result = OnlyRight;
2696 			    break;
2697 			}
2698 		    }
2699 		}
2700 	    }
2701 	}
2702     }
2703     return result;
2704 }
2705 
2706 #ifdef HAVE_OPENDIR
2707 static void
count_unmodified_files(const char * pathname,long * files,long * lines)2708 count_unmodified_files(const char *pathname, long *files, long *lines)
2709 {
2710     DATA *p;
2711     char *name;
2712 
2713     TRACE(("count_unmodified_files \"%s\"\n", pathname));
2714     if (is_dir(pathname)) {
2715 	DIR *dp = opendir(pathname);
2716 
2717 	if (dp != 0) {
2718 	    struct dirent *de;
2719 
2720 	    while ((de = readdir(dp)) != 0) {
2721 		if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
2722 		    continue;
2723 		name = malloc(strlen(pathname) + 2 + strlen(de->d_name));
2724 		if (name != 0) {
2725 		    sprintf(name, "%s%c%s", pathname, PATHSEP, de->d_name);
2726 		    count_unmodified_files(name, files, lines);
2727 		    free(name);
2728 		}
2729 	    }
2730 	    closedir(dp);
2731 	}
2732     } else if (is_file(pathname)) {
2733 	/*
2734 	 * Given the pathname from the (-D) source directory, derive a
2735 	 * corresponding path for the source directory.  Then check if
2736 	 * that path appears in the list of modified files.
2737 	 */
2738 	const char *ref_name = ((all_data && !unchanged) ? all_data->name : pathname);
2739 	char *source = 0;
2740 
2741 	if (prefix_opt >= 0) {
2742 	    int level_s = count_prefix(path_opt);
2743 	    int base_s = 0;
2744 	    int base_d = 0;
2745 
2746 	    (void) skip_prefix(pathname, level_s + 1, &base_s);
2747 	    (void) skip_prefix(ref_name, level_s + 1, &base_d);
2748 	    name = malloc(2 + strlen(pathname) + strlen(ref_name));
2749 	    sprintf(name, "%.*s%s", base_d, ref_name, base_s + pathname);
2750 	    source = malloc(strlen(ref_name) + 2 + strlen(pathname) + strlen(S_option));
2751 	    sprintf(source, "%s%c%s",
2752 		    S_option,
2753 		    PATHSEP,
2754 		    base_s + pathname);
2755 	} else {
2756 	    const char *mark = unchanged ? ref_name : data_filename(all_data);
2757 	    int skip = 1 + (int) strlen(path_opt);
2758 
2759 	    name = malloc(strlen(ref_name) + 2 + strlen(pathname));
2760 	    sprintf(name, "%.*s%s",
2761 		    (int) (mark - ref_name),
2762 		    ref_name,
2763 		    pathname + skip);
2764 	    source = malloc(strlen(ref_name) + 2 + strlen(pathname) + strlen(S_option));
2765 	    sprintf(source, "%s%c%.*s%s",
2766 		    S_option,
2767 		    PATHSEP,
2768 		    (int) (mark - ref_name),
2769 		    ref_name,
2770 		    pathname + skip);
2771 	}
2772 
2773 	if (same_file(source, pathname)) {
2774 	    int found = 0;
2775 
2776 	    for (p = all_data; p != 0 && !found; p = p->link) {
2777 		if (!strcmp(name, p->name)) {
2778 		    found = 1;
2779 		}
2780 	    }
2781 	    if (!found) {
2782 		p = find_data(name);
2783 		*files += 1;
2784 		EqlOf(p) = count_lines(p);
2785 		*lines += EqlOf(p);
2786 
2787 		if (unchanged) {
2788 		    int len = columns_of(p->name);
2789 		    if (min_name_wide < (len - p->base))
2790 			min_name_wide = (len - p->base);
2791 		}
2792 	    }
2793 	}
2794 	free(name);
2795 	free(source);
2796     }
2797 }
2798 #endif
2799 
2800 static void
update_min_name_wide(long longest_name)2801 update_min_name_wide(long longest_name)
2802 {
2803     if (prefix_opt < 0) {
2804 	if (prefix_len < 0)
2805 	    prefix_len = 0;
2806 	if ((longest_name - prefix_len) > min_name_wide)
2807 	    min_name_wide = (int) (longest_name - prefix_len);
2808     }
2809 
2810     if (min_name_wide < 1)
2811 	min_name_wide = 0;
2812     min_name_wide++;		/* make sure it's nonzero */
2813 }
2814 
2815 static void
summarize(void)2816 summarize(void)
2817 {
2818     DATA *p;
2819     long total_ins = 0;
2820     long total_del = 0;
2821     long total_mod = 0;
2822     long total_eql = 0;
2823     long files_added = 0;
2824     long files_equal = 0;
2825     long files_binary = 0;
2826     long files_removed = 0;
2827     long temp;
2828     int num_files = 0, shortest_name = -1, longest_name = -1;
2829 
2830     plot_scale = 0;
2831     for (p = all_data; p; p = p->link) {
2832 	int len = columns_of(p->name);
2833 
2834 	if (ignore_data(p))
2835 	    continue;
2836 
2837 	/*
2838 	 * If "-pX" option is given, prefix_opt is positive.
2839 	 *
2840 	 * "-p0" gives the whole pathname unmodified.  "-p1" strips
2841 	 * through the first path-separator, etc.
2842 	 */
2843 	if (prefix_opt >= 0) {
2844 	    /* p->base has been computed at node creation */
2845 	    if (min_name_wide < (len - p->base))
2846 		min_name_wide = (len - p->base);
2847 	} else {
2848 	    /*
2849 	     * If "-pX" option is not given, strip off any prefix which is
2850 	     * shared by all of the names.
2851 	     */
2852 	    if (len < prefix_len || prefix_len < 0)
2853 		prefix_len = len;
2854 	    while (prefix_len > 0) {
2855 		if (p->name[prefix_len - 1] != PATHSEP)
2856 		    prefix_len--;
2857 		else if (strncmp(all_data->name, p->name, (size_t) prefix_len))
2858 		    prefix_len--;
2859 		else
2860 		    break;
2861 	    }
2862 
2863 	    if (len > longest_name)
2864 		longest_name = len;
2865 	    if (len < shortest_name || shortest_name < 0)
2866 		shortest_name = len;
2867 	}
2868     }
2869 
2870     /*
2871      * Get additional counts for files where we cannot count lines changed.
2872      */
2873     if (count_files) {
2874 	for (p = all_data; p; p = p->link) {
2875 	    switch (p->cmt) {
2876 	    case Binary:
2877 		files_binary++;
2878 		break;
2879 	    case Only:
2880 		switch (resolve_only(p)) {
2881 		case OnlyRight:
2882 		    p->cmt = OnlyRight;
2883 		    files_added++;
2884 		    break;
2885 		case OnlyLeft:
2886 		    p->cmt = OnlyLeft;
2887 		    files_removed++;
2888 		    break;
2889 		default:
2890 		    /* ignore - we could not guess */
2891 		    break;
2892 		}
2893 	    default:
2894 		break;
2895 	    }
2896 	}
2897     }
2898 
2899     /*
2900      * Use a separate loop after computing prefix_len so we can apply the "-S"
2901      * or "-D" options to find files that we can use as reference for the
2902      * unchanged-count.
2903      */
2904     for (p = all_data; p; p = p->link) {
2905 	if (!ignore_data(p)) {
2906 	    EqlOf(p) = 0;
2907 	    if (reverse_opt) {
2908 		long save_ins = InsOf(p);
2909 		long save_del = DelOf(p);
2910 		InsOf(p) = save_del;
2911 		DelOf(p) = save_ins;
2912 	    }
2913 	    if (path_opt != 0) {
2914 		int count = count_lines(p);
2915 
2916 		if (count >= 0) {
2917 		    EqlOf(p) = count - ModOf(p);
2918 		    if (path_dest != 0) {
2919 			EqlOf(p) -= InsOf(p);
2920 		    } else {
2921 			EqlOf(p) -= DelOf(p);
2922 		    }
2923 		    if (EqlOf(p) < 0)
2924 			EqlOf(p) = 0;
2925 		}
2926 	    }
2927 	    num_files++;
2928 	    total_ins += InsOf(p);
2929 	    total_del += DelOf(p);
2930 	    total_mod += ModOf(p);
2931 	    total_eql += EqlOf(p);
2932 	    temp = TotalOf(p);
2933 	    if (temp > plot_scale)
2934 		plot_scale = temp;
2935 	}
2936     }
2937 
2938     update_min_name_wide(longest_name);
2939 
2940 #ifdef HAVE_OPENDIR
2941     if (S_option != 0 && D_option != 0) {
2942 	unchanged = (all_data == 0);
2943 	count_unmodified_files(D_option, &files_equal, &total_eql);
2944 	if (unchanged) {
2945 	    for (p = all_data; p; p = p->link) {
2946 		int len = columns_of(p->name);
2947 		if (longest_name < len)
2948 		    longest_name = len;
2949 		temp = TotalOf(p);
2950 		if (temp > plot_scale)
2951 		    plot_scale = temp;
2952 	    }
2953 	    update_min_name_wide(longest_name);
2954 	}
2955     }
2956 #endif
2957 
2958     plot_width = (max_width - min_name_wide - 8);
2959     if (plot_width < 10)
2960 	plot_width = 10;
2961 
2962     if (plot_scale < plot_width)
2963 	plot_scale = plot_width;	/* 1:1 */
2964 
2965     if (table_opt == 1) {
2966 	if (!names_only) {
2967 	    printf("INSERTED,DELETED,MODIFIED,");
2968 	    if (path_opt)
2969 		printf("UNCHANGED,");
2970 	    if (count_files && !reverse_opt)
2971 		printf("FILE-ADDED,FILE-DELETED,FILE-BINARY,");
2972 	}
2973 	printf("FILENAME\n");
2974     } else if (table_opt == 2) {
2975 	long largest = 0;
2976 	for (p = all_data; p; p = p->link) {
2977 	    if (path_opt)
2978 		largest = maximum(largest, EqlOf(p));
2979 	    largest = maximum(largest, InsOf(p));
2980 	    largest = maximum(largest, DelOf(p));
2981 	    largest = maximum(largest, ModOf(p));
2982 	}
2983 	number_len = 0;
2984 	while (largest > 0) {
2985 	    number_len++;
2986 	    largest /= 10;
2987 	}
2988 	number_len = maximum(number_len, 3);
2989     }
2990 #ifdef HAVE_TSEARCH
2991     if (use_tsearch) {
2992 	twalk(sorted_data, show_tsearch);
2993     } else
2994 #endif
2995 	for (p = all_data; p; p = p->link) {
2996 	    show_data(p);
2997 	}
2998 
2999     if ((table_opt != 1) && !names_only) {
3000 #define PLURAL(n) n, n != 1 ? "s" : ""
3001 	if (num_files > 0 || !quiet) {
3002 	    printf("%s %d file%s changed", comment_opt, PLURAL(num_files));
3003 	    if (total_ins)
3004 		printf(", %ld insertion%s(+)", PLURAL(total_ins));
3005 	    if (total_del)
3006 		printf(", %ld deletion%s(-)", PLURAL(total_del));
3007 	    if (total_mod)
3008 		printf(", %ld modification%s(!)", PLURAL(total_mod));
3009 	    if (total_eql && path_opt != 0)
3010 		printf(", %ld unchanged line%s(=)", PLURAL(total_eql));
3011 	    if (count_files) {
3012 		if (files_added)
3013 		    printf(", %ld file%s added", PLURAL(files_added));
3014 		if (files_removed)
3015 		    printf(", %ld file%s removed", PLURAL(files_removed));
3016 		if (files_binary)
3017 		    printf(", %ld binary file%s", PLURAL(files_binary));
3018 	    }
3019 	    (void) putchar('\n');
3020 	}
3021     }
3022 }
3023 
3024 #ifdef HAVE_POPEN
3025 static const char *
get_program(const char * name,const char * dft)3026 get_program(const char *name, const char *dft)
3027 {
3028     const char *result = getenv(name);
3029     if (result == 0 || *result == EOS)
3030 	result = dft;
3031     TRACE(("get_program(%s) = %s\n", name, result));
3032     return result;
3033 }
3034 #define GET_PROGRAM(name) get_program("DIFFSTAT_" #name, name)
3035 
3036 static char *
decompressor(Decompress which,const char * name)3037 decompressor(Decompress which, const char *name)
3038 {
3039     const char *verb = 0;
3040     const char *opts = "";
3041     char *result = 0;
3042     size_t len = strlen(name);
3043 
3044     switch (which) {
3045     case dcBzip:
3046 	verb = GET_PROGRAM(BZCAT_PATH);
3047 	if (*verb == EOS) {
3048 	    verb = GET_PROGRAM(BZIP2_PATH);
3049 	    opts = "-dc";
3050 	}
3051 	break;
3052     case dcCompress:
3053 	verb = GET_PROGRAM(ZCAT_PATH);
3054 	if (*verb == EOS) {
3055 	    verb = GET_PROGRAM(UNCOMPRESS_PATH);
3056 	    opts = "-c";
3057 	    if (*verb == EOS) {
3058 		/* not all compress's recognize the options, test this last */
3059 		verb = GET_PROGRAM(COMPRESS_PATH);
3060 		opts = "-dc";
3061 	    }
3062 	}
3063 	break;
3064     case dcGzip:
3065 	verb = GET_PROGRAM(GZIP_PATH);
3066 	opts = "-dc";
3067 	break;
3068     case dcLzma:
3069 	verb = GET_PROGRAM(LZCAT_PATH);
3070 	opts = "-dc";
3071 	break;
3072     case dcPack:
3073 	verb = GET_PROGRAM(PCAT_PATH);
3074 	break;
3075     case dcXz:
3076 	verb = GET_PROGRAM(XZ_PATH);
3077 	opts = "-dc";
3078 	break;
3079     case dcEmpty:
3080 	/* FALLTHRU */
3081     case dcNone:
3082 	break;
3083     }
3084     if (verb != 0 && *verb != EOS) {
3085 	result = (char *) xmalloc(strlen(verb) + 10 + len);
3086 	sprintf(result, "%s %s", verb, opts);
3087 	if (*name != EOS) {
3088 	    sprintf(result + strlen(result), " \"%s\"", name);
3089 	}
3090     }
3091     return result;
3092 }
3093 
3094 static char *
is_compressed(const char * name)3095 is_compressed(const char *name)
3096 {
3097     size_t len = strlen(name);
3098     Decompress which;
3099 
3100     if (len > 2 && !strcmp(name + len - 2, ".Z")) {
3101 	which = dcCompress;
3102     } else if (len > 2 && !strcmp(name + len - 2, ".z")) {
3103 	which = dcPack;
3104     } else if (len > 3 && !strcmp(name + len - 3, ".gz")) {
3105 	which = dcGzip;
3106     } else if (len > 4 && !strcmp(name + len - 4, ".bz2")) {
3107 	which = dcBzip;
3108     } else if (len > 5 && !strcmp(name + len - 5, ".lzma")) {
3109 	which = dcLzma;
3110     } else if (len > 3 && !strcmp(name + len - 3, ".xz")) {
3111 	which = dcXz;
3112     } else {
3113 	which = dcNone;
3114     }
3115     return decompressor(which, name);
3116 }
3117 
3118 #ifdef HAVE_MKDTEMP
3119 #define MY_MKDTEMP(path) mkdtemp(path)
3120 #else
3121 /*
3122  * mktemp is supposedly marked obsolete at the same point that mkdtemp is
3123  * introduced.
3124  */
3125 static char *
my_mkdtemp(char * path)3126 my_mkdtemp(char *path)
3127 {
3128     char *result = mktemp(path);
3129     if (result != 0) {
3130 	if (MKDIR(result, 0700) < 0) {
3131 	    result = 0;
3132 	}
3133     }
3134     return path;
3135 }
3136 #define MY_MKDTEMP(path) my_mkdtemp(path)
3137 #endif
3138 
3139 static char *
copy_stdin(char ** dirpath)3140 copy_stdin(char **dirpath)
3141 {
3142     const char *tmp = getenv("TMPDIR");
3143     char *result = 0;
3144     if (tmp == 0)
3145 	tmp = "/tmp/";
3146     *dirpath = xmalloc(strlen(tmp) + 12);
3147 
3148     strcpy(*dirpath, tmp);
3149     strcat(*dirpath, "/diffXXXXXX");
3150 
3151     if (MY_MKDTEMP(*dirpath) != 0) {
3152 	FILE *fp;
3153 
3154 	result = xmalloc(strlen(*dirpath) + 10);
3155 	sprintf(result, "%s/stdin", *dirpath);
3156 
3157 	if ((fp = fopen(result, "w")) != 0) {
3158 	    int ch;
3159 
3160 	    while ((ch = MY_GETC(stdin)) != EOF) {
3161 		fputc(ch, fp);
3162 	    }
3163 	    (void) fclose(fp);
3164 	} else {
3165 	    free(result);
3166 	    result = 0;
3167 	    rmdir(*dirpath);	/* Assume that the /stdin file was not created */
3168 	    free(*dirpath);
3169 	    *dirpath = 0;
3170 	}
3171     } else {
3172 	free(*dirpath);
3173 	*dirpath = 0;
3174     }
3175     return result;
3176 }
3177 #endif
3178 
3179 static void
set_path_opt(char * value,int destination)3180 set_path_opt(char *value, int destination)
3181 {
3182     path_opt = value;
3183     path_dest = destination;
3184     if (*path_opt != 0) {
3185 	if (is_dir(path_opt)) {
3186 	    num_marks = 4;
3187 	} else {
3188 	    (void) fflush(stdout);
3189 	    fprintf(stderr, "Not a directory:%s\n", path_opt);
3190 	    exit(EXIT_FAILURE);
3191 	}
3192     }
3193 }
3194 
3195 static void
usage(FILE * fp)3196 usage(FILE *fp)
3197 {
3198     static const char *msg[] =
3199     {
3200 	"Usage: diffstat [options] [files]",
3201 	"",
3202 	"Reads from one or more input files which contain output from 'diff',",
3203 	"producing a histogram of total lines changed for each file referenced.",
3204 	"If no filename is given on the command line, reads from standard input.",
3205 	"",
3206 	"Options:",
3207 	"  -b      ignore lines matching \"Binary files XXX and YYY differ\"",
3208 	"  -c      prefix each line with comment (#)",
3209 	"  -C      add SGR color escape sequences to highlight the histogram",
3210 #if OPT_TRACE
3211 	"  -d      debug - prints a lot of information",
3212 #endif
3213 	"  -D PATH specify location of patched files, use for unchanged-count",
3214 	"  -e FILE redirect standard error to FILE",
3215 	"  -E      trim escape-sequences, e.g., from colordiff",
3216 	"  -f NUM  format (0=concise, 1=normal, 2=filled, 4=values)",
3217 	"  -h      print this message",
3218 	"  -k      do not merge filenames",
3219 	"  -K      resolve ambiguity of \"only\" filenames",
3220 	"  -l      list filenames only",
3221 	"  -m      merge insert/delete data in chunks as modified-lines",
3222 	"  -n NUM  specify minimum width for the filenames (default: auto)",
3223 	"  -N NUM  specify maximum width for the filenames (default: auto)",
3224 	"  -o FILE redirect standard output to FILE",
3225 	"  -p NUM  specify number of pathname-separators to strip (default: common)",
3226 	"  -q      suppress the \"0 files changed\" message for empty diffs",
3227 	"  -r NUM  specify rounding for histogram (0=none, 1=simple, 2=adjusted)",
3228 	"  -R      assume patch was created with old and new files swapped",
3229 	"  -s      show only the summary line",
3230 	"  -S PATH specify location of original files, use for unchanged-count",
3231 	"  -t      print a table (comma-separated-values) rather than histogram",
3232 	"  -T      print amounts (like -t option) in addition to histogram",
3233 	"  -u      do not sort the input list",
3234 	"  -v      show progress if output is redirected to a file",
3235 	"  -V      prints the version number",
3236 	"  -w NUM  specify maximum width of the output (default: 80)",
3237     };
3238     unsigned j;
3239     for (j = 0; j < sizeof(msg) / sizeof(msg[0]); j++)
3240 	fprintf(fp, "%s\n", msg[j]);
3241 }
3242 
3243 /* Wrapper around getopt that also parses "--help" and "--version".
3244  * argc, argv, opts, return value, and globals optarg, optind,
3245  * opterr, and optopt are as in getopt().  help and version designate
3246  * what should be returned if --help or --version are encountered. */
3247 static int
getopt_helper(int argc,char * const argv[],const char * opts,int help,int version)3248 getopt_helper(int argc, char *const argv[], const char *opts,
3249 	      int help, int version)
3250 {
3251     if (optind < argc && argv[optind] != NULL) {
3252 	if (strcmp(argv[optind], "--help") == 0) {
3253 	    optind++;
3254 	    return help;
3255 	} else if (strcmp(argv[optind], "--version") == 0) {
3256 	    optind++;
3257 	    return version;
3258 	}
3259     }
3260     return getopt(argc, argv, opts);
3261 }
3262 
3263 static int
getopt_value(void)3264 getopt_value(void)
3265 {
3266     char *next = 0;
3267     long value = strtol(optarg, &next, 0);
3268     if (next == 0 || *next != EOS) {
3269 	(void) fflush(stdout);
3270 	fprintf(stderr, "expected a number, have '%s'\n", optarg);
3271 	exit(EXIT_FAILURE);
3272     }
3273     return (int) value;
3274 }
3275 
3276 int
main(int argc,char * argv[])3277 main(int argc, char *argv[])
3278 {
3279     int j;
3280     char version[80];
3281 
3282 #if defined(HAVE_TCGETATTR) && defined(TIOCGWINSZ)
3283     if (isatty(fileno(stdout))) {
3284 	struct winsize data;
3285 	if (ioctl(fileno(stdout), TIOCGWINSZ, &data) == 0) {
3286 	    max_width = data.ws_col;
3287 	}
3288     }
3289 #endif
3290 
3291 #ifdef HAVE_MBSTOWCWIDTH
3292     setlocale(LC_CTYPE, "");
3293 #endif
3294 
3295     while ((j = getopt_helper(argc, argv,
3296 			      "bcCdD:e:Ef:hkKlmn:N:o:p:qr:RsS:tTuvVw:", 'h', 'V'))
3297 	   != -1) {
3298 	switch (j) {
3299 	case 'b':
3300 	    suppress_binary = 1;
3301 	    break;
3302 	case 'c':
3303 	    comment_opt = "#";
3304 	    break;
3305 	case 'C':
3306 	    show_colors = 1;
3307 	    break;
3308 #if OPT_TRACE
3309 	case 'd':
3310 	    trace_opt = 1;
3311 	    break;
3312 #endif
3313 	case 'D':
3314 	    D_option = optarg;
3315 	    break;
3316 	case 'e':
3317 	    if (freopen(optarg, "w", stderr) == 0)
3318 		failed(optarg);
3319 	    break;
3320 	case 'E':
3321 	    trim_escapes = 1;
3322 	    break;
3323 	case 'f':
3324 	    format_opt = getopt_value();
3325 	    break;
3326 	case 'h':
3327 	    usage(stdout);
3328 	    return (EXIT_SUCCESS);
3329 	case 'k':
3330 	    merge_names = 0;
3331 	    break;
3332 	case 'K':
3333 	    count_files = 1;
3334 	    break;
3335 	case 'l':
3336 	    names_only = 1;
3337 	    break;
3338 	case 'm':
3339 	    merge_opt = 1;
3340 	    break;
3341 	case 'n':
3342 	    min_name_wide = getopt_value();
3343 	    break;
3344 	case 'N':
3345 	    max_name_wide = getopt_value();
3346 	    break;
3347 	case 'o':
3348 	    if (freopen(optarg, "w", stdout) == 0)
3349 		failed(optarg);
3350 	    break;
3351 	case 'p':
3352 	    prefix_opt = getopt_value();
3353 	    break;
3354 	case 'r':
3355 	    round_opt = getopt_value();
3356 	    break;
3357 	case 'R':
3358 	    reverse_opt = 1;
3359 	    break;
3360 	case 's':
3361 	    summary_only = 1;
3362 	    break;
3363 	case 'S':
3364 	    S_option = optarg;
3365 	    break;
3366 	case 't':
3367 	    table_opt = 1;
3368 	    break;
3369 	case 'T':
3370 	    table_opt = 2;
3371 	    break;
3372 	case 'u':
3373 	    sort_names = 0;
3374 	    break;
3375 	case 'v':
3376 	    verbose = 1;
3377 	    break;
3378 	case 'V':
3379 #ifndef	NO_IDENT
3380 	    if (!sscanf(Id, "%*s %*s %30s", version))
3381 #endif
3382 		(void) strcpy(version, "?");
3383 	    printf("diffstat version %s\n", version);
3384 	    return (EXIT_SUCCESS);
3385 	case 'w':
3386 	    max_width = getopt_value();
3387 	    break;
3388 	case 'q':
3389 	    quiet = 1;
3390 	    break;
3391 	default:
3392 	    usage(stderr);
3393 	    return (EXIT_FAILURE);
3394 	}
3395     }
3396 
3397     /*
3398      * The numbers from -S/-D options will only be useful if the merge option
3399      * is added.
3400      */
3401     if (S_option)
3402 	set_path_opt(S_option, 0);
3403     if (D_option)
3404 	set_path_opt(D_option, 1);
3405     if (path_opt)
3406 	merge_opt = 1;
3407 
3408     show_progress = verbose && (!isatty(fileno(stdout))
3409 				&& isatty(fileno(stderr)));
3410 
3411 #ifdef HAVE_TSEARCH
3412     use_tsearch = (sort_names && merge_names);
3413 #endif
3414 
3415     if (optind < argc) {
3416 	while (optind < argc) {
3417 	    FILE *fp;
3418 	    char *name = argv[optind++];
3419 #ifdef HAVE_POPEN
3420 	    char *command = is_compressed(name);
3421 	    if (command != 0) {
3422 		if ((fp = popen(command, "r")) != 0) {
3423 		    if (show_progress) {
3424 			(void) fflush(stdout);
3425 			(void) fprintf(stderr, "%s\n", name);
3426 			(void) fflush(stderr);
3427 		    }
3428 		    do_file(fp, name);
3429 		    (void) pclose(fp);
3430 		}
3431 		free(command);
3432 	    } else
3433 #endif
3434 	    if ((fp = fopen(name, "rb")) != 0) {
3435 		if (show_progress) {
3436 		    (void) fflush(stdout);
3437 		    (void) fprintf(stderr, "%s\n", name);
3438 		    (void) fflush(stderr);
3439 		}
3440 		do_file(fp, name);
3441 		(void) fclose(fp);
3442 	    } else {
3443 		failed(name);
3444 	    }
3445 	}
3446     } else {
3447 #ifdef HAVE_POPEN
3448 	Decompress which = dcEmpty;
3449 	char *stdin_dir = 0;
3450 	char *myfile;
3451 	char sniff[8];
3452 	int ch;
3453 	unsigned got = 0;
3454 
3455 	if ((ch = MY_GETC(stdin)) != EOF) {
3456 	    which = dcNone;
3457 	    if (ch == 'B') {	/* perhaps bzip2 (poor magic design...) */
3458 		sniff[got++] = (char) ch;
3459 		while (got < 5) {
3460 		    if ((ch = MY_GETC(stdin)) == EOF)
3461 			break;
3462 		    sniff[got++] = (char) ch;
3463 		}
3464 		if (got == 5
3465 		    && !strncmp(sniff, "BZh", (size_t) 3)
3466 		    && isdigit(UC(sniff[3]))
3467 		    && isdigit(UC(sniff[4]))) {
3468 		    which = dcBzip;
3469 		}
3470 	    } else if (ch == ']') {	/* perhaps lzma */
3471 		sniff[got++] = (char) ch;
3472 		while (got < 4) {
3473 		    if ((ch = MY_GETC(stdin)) == EOF)
3474 			break;
3475 		    sniff[got++] = (char) ch;
3476 		}
3477 		if (got == 4
3478 		    && !memcmp(sniff, "]\0\0\200", (size_t) 4)) {
3479 		    which = dcLzma;
3480 		}
3481 	    } else if (ch == 0xfd) {	/* perhaps xz */
3482 		sniff[got++] = (char) ch;
3483 		while (got < 6) {
3484 		    if ((ch = MY_GETC(stdin)) == EOF)
3485 			break;
3486 		    sniff[got++] = (char) ch;
3487 		}
3488 		if (got == 6
3489 		    && !memcmp(sniff, "\3757zXZ\0", (size_t) 6)) {
3490 		    which = dcXz;
3491 		}
3492 	    } else if (ch == '\037') {	/* perhaps compress, etc. */
3493 		sniff[got++] = (char) ch;
3494 		if ((ch = MY_GETC(stdin)) != EOF) {
3495 		    sniff[got++] = (char) ch;
3496 		    switch (ch) {
3497 		    case 0213:
3498 			which = dcGzip;
3499 			break;
3500 		    case 0235:
3501 			which = dcCompress;
3502 			break;
3503 		    case 0036:
3504 			which = dcPack;
3505 			break;
3506 		    }
3507 		}
3508 	    } else {
3509 		sniff[got++] = (char) ch;
3510 	    }
3511 	}
3512 	/*
3513 	 * The C standard only guarantees one ungetc;
3514 	 * virtually everyone allows more.
3515 	 */
3516 	while (got != 0) {
3517 	    ungetc(sniff[--got], stdin);
3518 	}
3519 	if (which != dcNone
3520 	    && which != dcEmpty
3521 	    && (myfile = copy_stdin(&stdin_dir)) != 0) {
3522 	    FILE *fp;
3523 	    char *command;
3524 
3525 	    /* open pipe to decompress temporary file */
3526 	    command = decompressor(which, myfile);
3527 	    if ((fp = popen(command, "r")) != 0) {
3528 		do_file(fp, "stdin");
3529 		(void) pclose(fp);
3530 	    }
3531 	    free(command);
3532 
3533 	    unlink(myfile);
3534 	    free(myfile);
3535 	    myfile = 0;
3536 	    rmdir(stdin_dir);
3537 	    free(stdin_dir);
3538 	    stdin_dir = 0;
3539 	} else if (which != dcEmpty)
3540 #endif
3541 	    do_file(stdin, "stdin");
3542     }
3543     summarize();
3544 #if defined(NO_LEAKS)
3545     while (all_data != 0) {
3546 	delink(all_data);
3547     }
3548 #endif
3549     return (EXIT_SUCCESS);
3550 }
3551