1 /******************************************************************************
2 * Copyright 1994-2019,2021 by Thomas E. Dickey *
3 * All Rights Reserved. *
4 * *
5 * Permission to use, copy, modify, and distribute this software and its *
6 * documentation for any purpose and without fee is hereby granted, provided *
7 * that the above copyright notice appear in all copies and that both that *
8 * copyright notice and this permission notice appear in supporting *
9 * documentation, and that the name of the above listed copyright holder(s) *
10 * not be used in advertising or publicity pertaining to distribution of the *
11 * software without specific, written prior permission. *
12 * *
13 * THE ABOVE LISTED COPYRIGHT HOLDER(S) DISCLAIM ALL WARRANTIES WITH REGARD *
14 * TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND *
15 * FITNESS, IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE *
16 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES *
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN *
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR *
19 * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *
20 ******************************************************************************/
21
22 #ifndef NO_IDENT
23 static const char *Id = "$Id: diffstat.c,v 1.64 2021/01/13 00:28:32 tom Exp $";
24 #endif
25
26 /*
27 * Title: diffstat.c
28 * Author: T.E.Dickey
29 * Created: 02 Feb 1992
30 * Modified:
31 * 12 Jan 2021, check for git --binary diffs.
32 * 29 Nov 2019, eliminate fixed buffer when decoding range.
33 * 28 Nov 2019, use locale in computing filename column-width.
34 * improve parsing for git diffs.
35 * use terminal-width as default for -w to tty.
36 * minor fix in do_merging (Miloslaw Smyk).
37 * 27 Nov 2019, improve relative-pathname matching in count_lines()
38 * add a parsing-case for svn diff.
39 * quote filenames in -t/-T output.
40 * 24 Nov 2019, fix cppcheck-warnings about sscanf.
41 * 14 Aug 2018, revise -S/-D option to improve count of unmodified
42 * files.
43 * 14 Jan 2016, extend -S option to count unmodified files.
44 * add -T option to show values with histogram
45 * 06 Jul 2015, handle double-quotes, e.g., from diffutils 3.3
46 * when filenames have embedded spaces.
47 * 05 Jun 2014, add -E option to filter colordiff output.
48 * 28 Oct 2013, portability improvements for MinGW.
49 * 15 Apr 2013, modify to accommodate output of "diff -q", which
50 * tells only if the files are different. Work
51 * around the equivalent ambiguous message introduced
52 * in diffutils 2.8.4 and finally removed for 3.0
53 * 11 Feb 2013, add -K option. Use strtol() to provide error
54 * checking of optarg values.
55 * 10 Feb 2013, document -b, -C, -s option in usage (patch by
56 * Tim Waugh, Red Hat #852770). Improve pathname
57 * merging.
58 * 02 Jun 2012, fix for svn diff with spaces in path (patch by
59 * Stuart Prescott, Debian #675465).
60 * 03 Jan 2012, Correct case for "xz" suffix in is_compressed()
61 * (patch from Frederic Culot in FreeBSD ports). Add
62 * "-R" option. Improve dequoting of filenames in
63 * headers.
64 * 10 Oct 2010, correct display of new files when -S/-D options
65 * are used. Remove the temporary directory on
66 * error, introduced in 1.48+ (patch by Solar
67 * Designer).
68 * 19 Jul 2010, add missing "break" statement which left "-c"
69 * option falling-through into "-C".
70 * 16 Jul 2010, configure "xz" path explicitly, in case lzcat
71 * does not support xz format. Add "-s" (summary)
72 * and "-C" (color) options.
73 * 15 Jul 2010, fix strict gcc warnings, e.g., using const.
74 * 10 Jan 2010, improve a case where filenames have embedded blanks
75 * (patch by Reinier Post).
76 * 07 Nov 2009, correct suffix-check for ".xz" files as
77 * command-line parameters rather than as piped
78 * input (report by Moritz Barsnick).
79 * 06 Oct 2009, fixes to build/run with MSYS or MinGW. use
80 * $TMPDIR for path of temporary file used in
81 * decompression. correct else-condition for
82 * detecting compression type (patch by Zach Hirsch).
83 * 31 Aug 2009, improve lzma support, add support for xz (patch by
84 * Eric Blake). Add special case for no-newline
85 * message from some diff's (Ubuntu #269895).
86 * Improve configure check for getopt().
87 * 11 Aug 2009, Add logic to check standard input, decompress if
88 * possible. Add -N option, to truncate long names.
89 * Add pack/pcat as a compression type.
90 * Add lzma/lzcat as a compression type.
91 * Allow overriding program paths with environment.
92 * 10 Aug 2009, modify to work with Perforce-style diffs (patch
93 * by Ed Schouten).
94 * 29 Mar 2009, modify to work with patch ".rej" files, which have
95 * no filename header (use the name of the ".rej"
96 * file if it is available).
97 * 29 Sep 2008, fix typo in usage message.
98 * 06 Aug 2008, add "-m", "-S" and "-D" options.
99 * 05 Aug 2008, add "-q" option to suppress 0-files-changed
100 * message (patch by Greg Norris).
101 * 04 Sep 2007, add "-b" option to suppress binary-files (patch
102 * by Greg Norris).
103 * 26 Aug 2007, add "-d" option to show debugging traces, rather
104 * than by defining DEBUG. Add check after
105 * unified-diff chunk to avoid adding non-diff text
106 * (report by Adrian Bunk). Quote pathname passed
107 * in command to gzip/uncompress. Add a check for
108 * default-diff output without the "diff" command
109 * supplied to provide filename, mark as "unknown".
110 * 16 Jul 2006, fix to avoid modifying which is being used by
111 * tsearch() for ordering the binary tree (report by
112 * Adrian Bunk).
113 * 02 Jul 2006, do not ignore pathnames in /tmp/, since some tools
114 * create usable pathnames for both old/new files
115 * there (Debian #376086). Correct ifdef for
116 * fgetc_unlocked(). Add configure check for
117 * compress, gzip and bzip2 programs that may be used
118 * to decompress files.
119 * 24 Aug 2005, update usage message for -l, -r changes.
120 * 15 Aug 2005, apply PLURAL() to num_files (Jean Delvare).
121 * add -l option (request by Michael Burian).
122 * Use fgetc_locked() if available.
123 * 14 Aug 2005, add -r2 option (rounding with adjustment to ensure
124 * that nonzero values always display a histogram
125 * bar), adapted from patch by Jean Delvare. Extend
126 * the -f option (2=filled, 4=verbose).
127 * 12 Aug 2005, modify to use tsearch() for sorted lists.
128 * 11 Aug 2005, minor fixes to scaling of modified lines. Add
129 * -r (round) option.
130 * 05 Aug 2005, add -t (table) option.
131 * 10 Apr 2005, change order of merging and prefix-stripping so
132 * stripping all prefixes, e.g., with -p9, will be
133 * sorted as expected (Patch by Jean Delvare
134 * <khali@linux-fr.org>).
135 * 10 Jan 2005, add support for '--help' and '--version' (Patch
136 * by Eric Blake <ebb9@byu.net>.)
137 * 16 Dec 2004, fix a different case for data beginning with "--"
138 * which was treated as a header line.
139 * 14 Dec 2004, Fix allocation problems. Open files in binary
140 * mode for reading. Getopt returns -1, not
141 * necessarily EOF. Add const where useful. Use
142 * NO_IDENT where necessary. malloc() comes from
143 * <stdlib.h> in standard systems (Patch by Eric
144 * Blake <ebb9@byu.net>.)
145 * 08 Nov 2004, minor fix for resync of unified diffs checks for
146 * range (line beginning with '@' without header
147 * lines (successive lines beginning with "---" and
148 * "+++"). Fix a few problems reported by valgrind.
149 * 09 Nov 2003, modify check for lines beginning with '-' or '+'
150 * to treat only "---" in old-style diffs as a
151 * special case.
152 * 14 Feb 2003, modify check for filenames to allow for some cases
153 * of incomplete dates (the reported example omitted
154 * the day of the month). Correct a typo in usage().
155 * Add -e, -h, -o options.
156 * 04 Jan 2003, improve tracking of chunks in unified diff, in
157 * case the original files contained a '+' or '-' in
158 * the first column (Debian #155000). Add -v option
159 * (Debian #170947). Modify to allocate buffers big
160 * enough for long input lines. Do additional
161 * merging to handle unusual Index/diff constructs in
162 * recent makepatch script.
163 * 20 Aug 2002, add -u option to tell diffstat to preserve the
164 * order of filenames as given rather than sort them
165 * (request by H Peter Anvin <hpa@zytor.com>). Add
166 * -k option for completeness.
167 * 09 Aug 2002, allow either '/' or '-' as delimiters in dates,
168 * to accommodate diffutils 2.8 (report by Rik van
169 * Riel <riel@conectiva.com.br>).
170 * 10 Oct 2001, add bzip2 (.bz2) suffix as suggested by
171 * Gregory T Norris <haphazard@socket.net> in Debian
172 * bug report #82969).
173 * add check for diff from RCS archive where the
174 * "diff" lines do not reference a filename.
175 * 29 Mar 2000, add -c option. Check for compressed input, read
176 * via pipe. Change to ANSI C. Adapted change from
177 * Troy Engel to add option that displays a number
178 * only, rather than a histogram.
179 * 17 May 1998, handle Debian diff files, which do not contain
180 * dates on the header lines.
181 * 16 Jan 1998, accommodate patches w/o tabs in header lines (e.g.,
182 * from cut/paste). Strip suffixes such as ".orig".
183 * 24 Mar 1996, corrected -p0 logic, more fixes in do_merging.
184 * 16 Mar 1996, corrected state-change for "Binary". Added -p
185 * option.
186 * 17 Dec 1995, corrected matching algorithm in 'do_merging()'
187 * 11 Dec 1995, mods to accommodate diffs against /dev/null or
188 * /tmp/XXX (tempfiles).
189 * 06 May 1995, limit scaling -- only shrink-to-fit.
190 * 29 Apr 1995, recognize 'rcsdiff -u' format.
191 * 26 Dec 1994, strip common pathname-prefix.
192 * 13 Nov 1994, added '-n' option. Corrected logic of 'match'.
193 * 17 Jun 1994, ifdef-<string.h>
194 * 12 Jun 1994, recognize unified diff, and output of makepatch.
195 * 04 Oct 1993, merge multiple diff-files, busy message when the
196 * output is piped to a file.
197 *
198 * Function: this program reads the output of 'diff' and displays a histogram
199 * of the insertions/deletions/modifications per-file.
200 */
201
202 #if defined(HAVE_CONFIG_H)
203 #include <config.h>
204 #endif
205
206 #if defined(WIN32) && !defined(HAVE_CONFIG_H)
207 #define HAVE_STDLIB_H
208 #define HAVE_STRING_H
209 #define HAVE_MALLOC_H
210 #define HAVE_GETOPT_H
211 #endif
212
213 #include <stdio.h>
214 #include <ctype.h>
215
216 #ifdef HAVE_STRING_H
217 #include <string.h>
218 #else
219 #include <strings.h>
220 #define strchr index
221 #define strrchr rindex
222 #endif
223
224 #ifdef HAVE_STDLIB_H
225 #include <stdlib.h>
226 #else
227 extern int atoi(const char *);
228 #endif
229
230 #ifdef HAVE_UNISTD_H
231 #include <unistd.h>
232 #else
233 extern int isatty(int);
234 #endif
235
236 #ifdef HAVE_OPENDIR
237 #include <dirent.h>
238 #endif
239
240 #ifdef HAVE_MALLOC_H
241 #include <malloc.h>
242 #endif
243
244 #if defined(HAVE_SEARCH_H) && defined(HAVE_TSEARCH)
245 #include <search.h>
246 #else
247 #undef HAVE_TSEARCH
248 #endif
249
250 #ifdef HAVE_MBSTOWCWIDTH
251 #include <locale.h>
252 #include <wchar.h>
253 #endif
254
255 #ifdef HAVE_GETC_UNLOCKED
256 #define MY_GETC getc_unlocked
257 #else
258 #define MY_GETC getc
259 #endif
260
261 #ifdef HAVE_GETOPT_H
262 #include <getopt.h>
263 #elif !defined(HAVE_GETOPT_HEADER)
264 extern int getopt(int, char *const *, const char *);
265 extern char *optarg;
266 extern int optind;
267 #endif
268
269 #include <sys/types.h>
270 #include <sys/stat.h>
271
272 #if defined(HAVE_TERMIOS_H) && defined(HAVE_TCGETATTR)
273 #ifdef HAVE_IOCTL_H
274 #include <ioctl.h>
275 #else
276 #ifdef HAVE_SYS_IOCTL_H
277 #include <sys/ioctl.h>
278 #endif
279 #endif
280 #if !defined(sun) || !defined(NL0)
281 #include <termios.h>
282 #endif
283 #endif /* HAVE_TERMIOS_H */
284
285 #if defined(HAVE_POPEN) && !defined(HAVE_POPEN_PROTOTYPE)
286 extern FILE *popen(const char *, const char *);
287 extern int pclose(FILE *);
288 #endif
289
290 #if !defined(EXIT_SUCCESS)
291 #define EXIT_SUCCESS 0
292 #define EXIT_FAILURE 1
293 #endif
294
295 #ifndef BZCAT_PATH
296 #define BZCAT_PATH ""
297 #endif
298
299 #ifndef BZIP2_PATH
300 #define BZIP2_PATH ""
301 #endif
302
303 #ifndef COMPRESS_PATH
304 #define COMPRESS_PATH ""
305 #endif
306
307 #ifndef GZIP_PATH
308 #define GZIP_PATH ""
309 #endif
310
311 #ifndef LZCAT_PATH
312 #define LZCAT_PATH ""
313 #endif
314
315 #ifndef PCAT_PATH
316 #define PCAT_PATH ""
317 #endif
318
319 #ifndef UNCOMPRESS_PATH
320 #define UNCOMPRESS_PATH ""
321 #endif
322
323 #ifndef XZ_PATH
324 #define XZ_PATH ""
325 #endif
326
327 #ifndef ZCAT_PATH
328 #define ZCAT_PATH ""
329 #endif
330
331 /******************************************************************************/
332
333 #if defined(__MINGW32__) || defined(WIN32)
334 #define MKDIR(name,mode) mkdir(name)
335 #else
336 #define MKDIR(name,mode) mkdir(name,mode)
337 #endif
338
339 #if defined(WIN32) && !defined(__MINGW32__)
340 #define PATHSEP '\\'
341 #else
342 #define PATHSEP '/'
343 #endif
344
345 #define BACKSL '\\'
346 #define LPAREN '('
347 #define RPAREN ')'
348 #define DQUOTE '"'
349 #define SQUOTE '\''
350 #define ESCAPE '\033'
351 #define EOS '\0'
352 #define TAB '\t'
353 #define BLANK ' '
354 #define DEL '\177'
355
356 #define UC(c) ((unsigned char)(c))
357
358 #define isoctal(c) (((c) >= '0') && ((c) <= '7'))
359
360 #ifndef OPT_TRACE
361 #define OPT_TRACE 1
362 #endif
363
364 #if OPT_TRACE
365 #define TRACE(p) if (trace_opt) printf p
366 #else
367 #define TRACE(p) /*nothing */
368 #endif
369
370 #define contain_any(s,reject) (strcspn(s,reject) != strlen(s))
371 #define maximum(a,b) ((a) < (b) ? (b) : (a))
372
373 #define HAVE_NOTHING 0
374 #define HAVE_GENERIC 1 /* e.g., "Index: foo" w/o pathname */
375 #define HAVE_PATH 2 /* reference-file from "diff dirname/foo" */
376 #define HAVE_PATH2 4 /* comparison-file from "diff dirname/foo" */
377
378 #define FMT_CONCISE 0
379 #define FMT_NORMAL 1
380 #define FMT_FILLED 2
381 #define FMT_VERBOSE 4
382
383 typedef enum comment {
384 Normal, Only, OnlyLeft, OnlyRight, Binary, Differs, Either
385 } Comment;
386
387 #define MARKS 4 /* each of +, - and ! */
388
389 typedef enum {
390 cInsert = 0,
391 cDelete,
392 cModify,
393 cEquals
394 } Change;
395
396 #define InsOf(p) (p)->count[cInsert] /* "+" count inserted lines */
397 #define DelOf(p) (p)->count[cDelete] /* "-" count deleted lines */
398 #define ModOf(p) (p)->count[cModify] /* "!" count modified lines */
399 #define EqlOf(p) (p)->count[cEquals] /* "=" count unmodified lines */
400
401 #define TotalOf(p) (InsOf(p) + DelOf(p) + ModOf(p) + EqlOf(p))
402 #define for_each_mark(n) for (n = 0; n < num_marks; ++n)
403
404 typedef struct _data {
405 struct _data *link;
406 char *name; /* the filename */
407 int copy; /* true if filename is const-literal */
408 int base; /* beginning of name if -p option used */
409 Comment cmt;
410 int pending;
411 long chunks; /* total number of chunks */
412 long chunk[MARKS]; /* counts for the current chunk */
413 long count[MARKS]; /* counts for the file */
414 } DATA;
415
416 typedef enum {
417 dcNone = 0,
418 dcBzip,
419 dcCompress,
420 dcGzip,
421 dcLzma,
422 dcPack,
423 dcXz,
424 dcEmpty
425 } Decompress;
426
427 static const char marks[MARKS + 1] = "+-!=";
428 static const int colors[MARKS + 1] =
429 {2, 1, 6, 4};
430
431 static DATA *all_data;
432 static char *S_option = 0;
433 static char *D_option = 0;
434 static const char *comment_opt = "";
435 static char *path_opt = 0;
436 static int count_files; /* true if we count added/deleted files */
437 static int format_opt = FMT_NORMAL;
438 static int max_name_wide; /* maximum amount reserved for filenames */
439 static int max_width = 80; /* the specified width-limit */
440 static int merge_names = 1; /* true if we merge similar filenames */
441 static int merge_opt = 0; /* true if we merge ins/del as modified */
442 static int min_name_wide; /* minimum amount reserved for filenames */
443 static int names_only; /* true if we list filenames only */
444 static int num_marks = 3; /* 3 or 4, according to "-P" option */
445 static int path_dest; /* true if path_opt is destination (patched) */
446 static int plot_width; /* the amount left over for histogram */
447 static int prefix_opt = -1; /* if positive, controls stripping of PATHSEP */
448 static int quiet = 0; /* -q option */
449 static int reverse_opt; /* true if results are reversed */
450 static int round_opt = 0; /* if nonzero, round data for histogram */
451 static int show_colors; /* true if showing SGR colors */
452 static int show_progress; /* if not writing to tty, show progress */
453 static int sort_names = 1; /* true if we sort filenames */
454 static int summary_only = 0; /* true if only summary line is shown */
455 static int suppress_binary = 0; /* -b option */
456 static int trim_escapes = 0; /* -E option */
457 static int table_opt = 0; /* if 1/2, write table instead/also plot */
458 static int trace_opt = 0; /* if nonzero, write debugging information */
459 static int unchanged = 0; /* special-case for -S vs modified-files */
460 static int verbose = 0; /* -v option */
461 static long plot_scale; /* the effective scale (1:maximum) */
462
463 #ifdef HAVE_TSEARCH
464 static int use_tsearch;
465 static void *sorted_data;
466 #endif
467
468 static int number_len = 5;
469 static int prefix_len = -1;
470
471 /******************************************************************************/
472
473 #ifdef GCC_NORETURN
474 static void failed(const char *) GCC_NORETURN;
475 #endif
476
477 static void
failed(const char * s)478 failed(const char *s)
479 {
480 perror(s);
481 exit(EXIT_FAILURE);
482 }
483
484 /* malloc wrapper that never returns NULL */
485 static void *
xmalloc(size_t s)486 xmalloc(size_t s)
487 {
488 void *p;
489 if ((p = malloc(s)) == NULL)
490 failed("malloc");
491 return p;
492 }
493
494 static int
do_stat(const char * name,struct stat * sb)495 do_stat(const char *name, struct stat *sb)
496 {
497 int rc;
498 if (name != 0) {
499 #ifdef HAVE_LSTAT
500 rc = lstat(name, sb);
501 #else
502 rc = stat(name, sb);
503 #endif
504 } else {
505 rc = -1;
506 }
507 return rc;
508 }
509
510 static mode_t
get_stat(const char * name)511 get_stat(const char *name)
512 {
513 struct stat sb;
514 int rc = do_stat(name, &sb);
515 return ((rc == 0) ? (sb.st_mode & S_IFMT) : 0);
516 }
517
518 static int
is_dir(const char * name)519 is_dir(const char *name)
520 {
521 return get_stat(name) == S_IFDIR;
522 }
523
524 static int
is_file(const char * name)525 is_file(const char *name)
526 {
527 return get_stat(name) == S_IFREG;
528 }
529
530 static int
same_file(const char * source,const char * target)531 same_file(const char *source, const char *target)
532 {
533 int rc = 0;
534 struct stat ssb;
535 struct stat dsb;
536
537 if (do_stat(source, &ssb) == 0 && S_ISREG(ssb.st_mode)
538 && do_stat(target, &dsb) == 0 && S_ISREG(dsb.st_mode)
539 && ssb.st_size == dsb.st_size) {
540 FILE *ip = fopen(source, "r");
541 if (ip != 0) {
542 FILE *op = fopen(target, "r");
543 if (op != 0) {
544 int a = EOF;
545 int b = EOF;
546 rc = 1;
547 while (1) {
548 a = fgetc(ip);
549 b = fgetc(op);
550 if (a != b) {
551 rc = 0;
552 break;
553 }
554 if (a == EOF) {
555 break;
556 }
557 }
558 if (a != b) {
559 rc = 0;
560 }
561 fclose(op);
562 }
563 fclose(ip);
564 }
565 }
566 return rc;
567 }
568
569 static void
blip(int c)570 blip(int c)
571 {
572 if (show_progress) {
573 (void) fflush(stdout);
574 (void) fputc(c, stderr);
575 (void) fflush(stderr);
576 }
577 }
578
579 #ifdef HAVE_STRDUP
580 #define new_string(s) strdup(s)
581 #else
582 static char *
new_string(const char * s)583 new_string(const char *s)
584 {
585 return strcpy((char *) xmalloc((size_t) (strlen(s) + 1)), s);
586 }
587 #endif
588
589 static int
compare_data(const void * a,const void * b)590 compare_data(const void *a, const void *b)
591 {
592 const DATA *p = (const DATA *) a;
593 const DATA *q = (const DATA *) b;
594 return ((p != NULL)
595 ? ((q != NULL)
596 ? strcmp(p->name + p->base, q->name + q->base)
597 : 1)
598 : -1);
599 }
600
601 static void
init_data(DATA * data,const char * name,int copy,int base)602 init_data(DATA * data, const char *name, int copy, int base)
603 {
604 memset(data, 0, sizeof(*data));
605 data->name = (char *) name;
606 data->copy = copy;
607 data->base = base;
608 data->cmt = Normal;
609 }
610
611 static DATA *
new_data(const char * name,int base)612 new_data(const char *name, int base)
613 {
614 DATA *r = (DATA *) xmalloc(sizeof(DATA));
615
616 init_data(r, new_string(name), 0, base);
617
618 return r;
619 }
620
621 #ifdef HAVE_TSEARCH
622 static DATA *
add_tsearch_data(const char * name,int base)623 add_tsearch_data(const char *name, int base)
624 {
625 DATA find;
626 DATA *result;
627 void *pp;
628
629 init_data(&find, name, 1, base);
630 if ((pp = tfind(&find, &sorted_data, compare_data)) != 0) {
631 result = *(DATA **) pp;
632 return result;
633 }
634 result = new_data(name, base);
635 (void) tsearch(result, &sorted_data, compare_data);
636 result->link = all_data;
637 all_data = result;
638
639 return result;
640 }
641 #endif
642
643 static int
count_prefix(const char * name)644 count_prefix(const char *name)
645 {
646 int count = 0;
647 const char *s;
648 while ((s = strchr(name, PATHSEP)) != 0) {
649 name = s + 1;
650 ++count;
651 }
652 return count;
653 }
654
655 static const char *
skip_prefix(const char * name,int prefix,int * base)656 skip_prefix(const char *name, int prefix, int *base)
657 {
658 if (prefix >= 0) {
659 int n;
660 *base = 0;
661
662 for (n = prefix; n > 0; n--) {
663 const char *s = strchr(name + *base, PATHSEP);
664 if (s == 0 || *++s == EOS) {
665 name = s;
666 break;
667 }
668 *base = (int) (s - name);
669 }
670 TRACE(("** base set to %d\n", *base));
671 }
672 return name;
673 }
674
675 static DATA *
find_data(const char * name)676 find_data(const char *name)
677 {
678 DATA *r;
679 int base = 0;
680
681 TRACE(("** find_data(%s)\n", name));
682
683 /* Compute the base offset if the prefix option is used */
684 if (prefix_opt >= 0) {
685 (void) skip_prefix(name, prefix_opt, &base);
686 }
687
688 /* Insert into sorted list (usually sorted). If we are not sorting or
689 * merging names, we fall off the end and link the new entry to the end of
690 * the list. If the prefix option is used, the prefix is ignored by the
691 * merge and sort operations.
692 *
693 * If we have tsearch(), we will maintain the sorted list using it and
694 * tfind().
695 */
696 #ifdef HAVE_TSEARCH
697 if (use_tsearch) {
698 r = add_tsearch_data(name, base);
699 } else
700 #endif
701 {
702 DATA *p;
703 DATA find;
704 DATA *q;
705
706 init_data(&find, name, 1, base);
707 for (p = all_data, q = 0; p != 0; q = p, p = p->link) {
708 int cmp = compare_data(p, &find);
709 if (merge_names && (cmp == 0))
710 return p;
711 if (sort_names && (cmp > 0))
712 break;
713 }
714 r = new_data(name, base);
715 if (q != 0)
716 q->link = r;
717 else
718 all_data = r;
719
720 r->link = p;
721 }
722
723 return r;
724 }
725
726 /*
727 * Remove a unneeded data item from the linked list. Free the name as well.
728 */
729 static int
delink(DATA * data)730 delink(DATA * data)
731 {
732 DATA *p, *q;
733
734 TRACE(("** delink '%s'\n", data->name));
735
736 #ifdef HAVE_TSEARCH
737 if (use_tsearch) {
738 if (tdelete(data, &sorted_data, compare_data) == 0)
739 return 0;
740 }
741 #endif
742 for (p = all_data, q = 0; p != 0; q = p, p = p->link) {
743 if (p == data) {
744 if (q != 0)
745 q->link = p->link;
746 else
747 all_data = p->link;
748 if (!p->copy)
749 free(p->name);
750 free(p);
751 return 1;
752 }
753 }
754 return 0;
755 }
756
757 /*
758 * Compare string 's' against a constant, returning either a pointer just
759 * past the matched part of 's' if it matches exactly, or null if a mismatch
760 * was found.
761 */
762 static char *
match(char * s,const char * p)763 match(char *s, const char *p)
764 {
765 int ok = 0;
766
767 while (*s != EOS) {
768 if (*p == EOS) {
769 ok = 1;
770 break;
771 }
772 if (*s++ != *p++)
773 break;
774 if (*s == EOS && *p == EOS) {
775 ok = 1;
776 break;
777 }
778 }
779 return ok ? s : 0;
780 }
781
782 static int
version_num(const char * s)783 version_num(const char *s)
784 {
785 int main_ver, sub_ver;
786 char temp[2];
787 return (sscanf(s, "%d.%d%c", &main_ver, &sub_ver, temp) == 2);
788 }
789
790 /*
791 * Check for a range of line-numbers, used in editing scripts.
792 */
793 static int
edit_range(const char * s)794 edit_range(const char *s)
795 {
796 int first, last;
797 char temp[2];
798 return (sscanf(s, "%d,%d%c", &first, &last, temp) == 2)
799 || (sscanf(s, "%d%c", &first, temp) == 1);
800 }
801
802 /*
803 * Decode a range for default diff.
804 */
805 static int
decode_default(char * s,long * first,long * first_size,long * second,long * second_size)806 decode_default(char *s,
807 long *first, long *first_size,
808 long *second, long *second_size)
809 {
810 int rc = 0;
811 char *next;
812
813 if (isdigit(UC(*s))) {
814 *first_size = 1;
815 *second_size = 1;
816
817 *first = strtol(s, &next, 10);
818 if (next != 0 && next != s) {
819 if (*next == ',') {
820 s = ++next;
821 *first_size = strtol(s, &next, 10) + 1 - *first;
822 }
823 }
824 if (next != 0 && next != s) {
825 switch (*next++) {
826 case 'a':
827 case 'c':
828 case 'd':
829 s = next;
830 *second = strtol(s, &next, 10);
831 if (next != 0 && next != s) {
832 if (*next == ',') {
833 s = ++next;
834 *second_size = strtol(s, &next, 10) + 1 - *second;
835 }
836 }
837 if (next != 0 && next != s && *next == EOS)
838 rc = 1;
839 break;
840 }
841 }
842 }
843 return rc;
844 }
845
846 /*
847 * Decode a range for unified diff. Oddly, the comments in diffutils code
848 * claim that both numbers are line-numbers. However, inspection of the output
849 * shows that the numbers are a line-number followed by a count.
850 */
851 static char *
decode_range(char * s,int * first,int * second)852 decode_range(char *s, int *first, int *second)
853 {
854 if (isdigit(UC(*s))) {
855 int count = 0;
856 int value[2];
857
858 value[0] = 0;
859 value[1] = 0;
860 while (*s != EOS) {
861 int ch = UC(*s);
862 if (isdigit(ch)) {
863 value[count] = (10 * value[count]) + (ch - '0');
864 } else if (ch == ',') {
865 if (++count > 1) {
866 s = NULL;
867 break;
868 }
869 value[count] = 0;
870 } else {
871 break;
872 }
873 ++s;
874 }
875 if (s != NULL) {
876 *first = value[0];
877 if (count == 0) {
878 *second = *first; /* diffutils 2.7 does this */
879 } else {
880 *second = value[1];
881 }
882 TRACE(("** decode_range #%d first=%d, second=%d\n",
883 count + 1, *first, *second));
884 }
885 }
886 return s;
887 }
888
889 static int
HadDiffs(const DATA * data)890 HadDiffs(const DATA * data)
891 {
892 return InsOf(data) != 0
893 || DelOf(data) != 0
894 || ModOf(data) != 0
895 || data->cmt != Normal;
896 }
897
898 /*
899 * If the given path is not one of the "ignore" paths, then return true.
900 */
901 static int
can_be_merged(const char * path)902 can_be_merged(const char *path)
903 {
904 int result = 0;
905 if (strcmp(path, "")
906 && strcmp(path, "/dev/null"))
907 result = 1;
908 return result;
909 }
910
911 static int
is_leaf(const char * theLeaf,const char * path)912 is_leaf(const char *theLeaf, const char *path)
913 {
914 char *s;
915
916 if (strchr(theLeaf, PATHSEP) == 0
917 && (s = strrchr(path, PATHSEP)) != 0
918 && !strcmp(++s, theLeaf))
919 return 1;
920 return 0;
921 }
922
923 static char *
trim_datapath(DATA ** datap,size_t length,int * localp)924 trim_datapath(DATA ** datap, size_t length, int *localp)
925 {
926 char *target = (*datap)->name;
927
928 #ifdef HAVE_TSEARCH
929 /*
930 * If we are using tsearch(), make a local copy of the data
931 * so we can trim it without interfering with tsearch's
932 * notion of the ordering of data. That will create some
933 * spurious empty data, so we add the changed() macro in a
934 * few places to skip over those.
935 */
936 if (use_tsearch) {
937 char *trim = new_string(target);
938 trim[length] = EOS;
939 *datap = add_tsearch_data(trim, (*datap)->base);
940 target = (*datap)->name;
941 free(trim);
942 *localp = 1;
943 } else
944 #endif
945 target[length] = EOS;
946
947 return target;
948 }
949
950 static size_t
compare_tails(const char * target,const char * source,int * diff)951 compare_tails(const char *target, const char *source, int *diff)
952 {
953 size_t len1 = strlen(target);
954 size_t len2 = strlen(source);
955 size_t n;
956 size_t matched = 0;
957
958 *diff = 0;
959 for (n = 1; n <= len1 && n <= len2; n++) {
960 if (target[len1 - n] != source[len2 - n]) {
961 *diff = (int) n;
962 break;
963 }
964 if (source[len2 - n] == PATHSEP) {
965 matched = n;
966 }
967 }
968 return matched;
969 }
970
971 /*
972 * The 'data' parameter points to the first of two markers, while
973 * 'path' is the pathname from the second marker.
974 *
975 * On the first call for
976 * a given file, the 'data' parameter stores no differences.
977 */
978 static char *
do_merging(DATA * data,char * path,int * freed)979 do_merging(DATA * data, char *path, int *freed)
980 {
981 char *target = reverse_opt ? path : data->name;
982 char *source = reverse_opt ? data->name : path;
983 char *result = source;
984 int diff;
985
986 TRACE(("** do_merging(\"%s\",\"%s\") diffs:%d\n",
987 data->name, path, HadDiffs(data)));
988
989 *freed = 0;
990 if (!HadDiffs(data)) {
991
992 if (is_leaf(target, source)) {
993 TRACE(("** is_leaf: \"%s\" vs \"%s\"\n", target, source));
994 if (reverse_opt) {
995 TRACE((".. no action @%d\n", __LINE__));
996 } else {
997 *freed = delink(data);
998 }
999 } else if (can_be_merged(target)
1000 && can_be_merged(source)) {
1001 size_t len1 = strlen(target);
1002 size_t len2 = strlen(source);
1003 int local = 0;
1004
1005 /*
1006 * If the source/target differ only by some suffix, e.g., ".orig"
1007 * or ".bak", strip that off. The target may may also be a
1008 * temporary filename (which would not be merged since it has no
1009 * apparent relationship to the current).
1010 */
1011 if (len1 > len2) {
1012 if (!strncmp(target, source, len2)) {
1013 TRACE(("** trimming data \"%s\" to \"%.*s\"\n",
1014 target, (int) len2, target));
1015 if (reverse_opt) {
1016 TRACE((".. no action @%d\n", __LINE__));
1017 } else {
1018 target = trim_datapath(&data, len2, &local);
1019 }
1020 }
1021 } else if (len1 < len2) {
1022 if (!strncmp(target, source, len1)) {
1023 TRACE(("** trimming source \"%s\" to \"%.*s\"\n",
1024 source, (int) len1, source));
1025 if (reverse_opt) {
1026 TRACE((".. no action @%d\n", __LINE__));
1027 } else {
1028 source[len2 = len1] = EOS;
1029 }
1030 }
1031 }
1032
1033 /*
1034 * If there was no "-p" option, look for the best match by
1035 * stripping prefixes from both source/target strings.
1036 */
1037 if (prefix_opt < 0) {
1038 int matched = 0;
1039 /*
1040 * Now (whether or not we trimmed a suffix), scan back from the
1041 * end of source/target strings to find if they happen to share
1042 * a common ending, e.g., a/b/c versus d/b/c. If the strings
1043 * are not identical, then 'diff' will be set, but if they have
1044 * a common ending then 'matched' will be set.
1045 */
1046 diff = 0;
1047 matched = (int) compare_tails(target, source, &diff);
1048
1049 TRACE(("** merge @%d, prefix_opt=%d matched=%d diff=%d\n",
1050 __LINE__, prefix_opt, matched, diff));
1051 if (matched != 0 && diff) {
1052 if (reverse_opt) {
1053 TRACE((".. no action @%d\n", __LINE__));
1054 } else {
1055 result = source + ((int) len2 - matched + 1);
1056 }
1057 }
1058 }
1059
1060 if (!local) {
1061 if (reverse_opt) {
1062 TRACE((".. no action @%d\n", __LINE__));
1063 } else {
1064 *freed = delink(data);
1065 }
1066 }
1067 } else if (reverse_opt) {
1068 TRACE((".. no action @%d\n", __LINE__));
1069 if (can_be_merged(source)) {
1070 TRACE(("** merge @%d\n", __LINE__));
1071 } else {
1072 TRACE(("** do not merge, retain @%d\n", __LINE__));
1073 /* must not merge, retain existing name */
1074 result = target;
1075 }
1076 } else {
1077 if (can_be_merged(source)) {
1078 TRACE(("** merge @%d\n", __LINE__));
1079 *freed = delink(data);
1080 } else {
1081 TRACE(("** do not merge, retain @%d\n", __LINE__));
1082 /* must not merge, retain existing name */
1083 result = target;
1084 }
1085 }
1086 } else if (reverse_opt) {
1087 TRACE((".. no action @%d\n", __LINE__));
1088 if (can_be_merged(source)) {
1089 TRACE(("** merge @%d\n", __LINE__));
1090 result = target;
1091 } else {
1092 TRACE(("** do not merge, retain @%d\n", __LINE__));
1093 }
1094 } else {
1095 if (can_be_merged(source)) {
1096 TRACE(("** %smerge @%d\n", merge_names ? "" : "do not ", __LINE__));
1097 if (merge_names
1098 && *target != EOS
1099 && prefix_opt < 0) {
1100 size_t matched = compare_tails(target, source, &diff);
1101 if (matched && !diff)
1102 result = target + (int) (strlen(target) - matched);
1103 }
1104 } else {
1105 TRACE(("** do not merge, retain @%d\n", __LINE__));
1106 result = target;
1107 }
1108 }
1109 TRACE(("** finish do_merging ->\"%s\"\n", result));
1110 return result;
1111 }
1112
1113 static int
begin_data(const DATA * p)1114 begin_data(const DATA * p)
1115 {
1116 TRACE(("...begin_data(%s)\n", p->name));
1117 if (!can_be_merged(p->name)
1118 && strchr(p->name, PATHSEP) != 0) {
1119 TRACE(("** begin_data:HAVE_PATH\n"));
1120 return HAVE_PATH;
1121 }
1122 TRACE(("** begin_data:HAVE_GENERIC\n"));
1123 return HAVE_GENERIC;
1124 }
1125
1126 static char *
skip_blanks(char * s)1127 skip_blanks(char *s)
1128 {
1129 while (isspace(UC(*s)))
1130 ++s;
1131 return s;
1132 }
1133
1134 /*
1135 * Skip a filename, which may be in quotes, to allow embedded blanks in the
1136 * name.
1137 */
1138 static char *
skip_filename(char * s)1139 skip_filename(char *s)
1140 {
1141 int delim = (*s == SQUOTE) ? SQUOTE : DQUOTE;
1142
1143 if ((*s == delim) && (s[1] != EOS) && (strchr) (s + 1, delim) != 0) {
1144 ++s;
1145 while (*s != EOS && (*s != delim) && isprint(UC(*s))) {
1146 ++s;
1147 }
1148 ++s;
1149 } else {
1150 while (*s != EOS && isgraph(UC(*s))) {
1151 ++s;
1152 }
1153 }
1154 return s;
1155 }
1156
1157 static char *
skip_options(char * params)1158 skip_options(char *params)
1159 {
1160 while (*params != EOS) {
1161 params = skip_blanks(params);
1162 if (*params == '-') {
1163 while (isgraph(UC(*params)))
1164 params++;
1165 } else {
1166 break;
1167 }
1168 }
1169 return skip_blanks(params);
1170 }
1171
1172 /*
1173 * Strip single-quotes from a name (needed for recent makepatch versions).
1174 */
1175 static void
dequote(char * s)1176 dequote(char *s)
1177 {
1178 size_t len = strlen(s);
1179 int delim = (*s == SQUOTE) ? SQUOTE : DQUOTE;
1180
1181 if (*s == delim && len > 2 && s[len - 1] == delim) {
1182 int n;
1183
1184 for (n = 0; (s[n] = s[n + 1]) != EOS; ++n) {
1185 ;
1186 }
1187 s[len - 2] = EOS;
1188 }
1189 }
1190
1191 /*
1192 * Allocate a fixed-buffer
1193 */
1194 static void
fixed_buffer(char ** buffer,size_t want)1195 fixed_buffer(char **buffer, size_t want)
1196 {
1197 *buffer = (char *) xmalloc(want);
1198 }
1199
1200 /*
1201 * Reallocate a fixed-buffer
1202 */
1203 static void
adjust_buffer(char ** buffer,size_t want)1204 adjust_buffer(char **buffer, size_t want)
1205 {
1206 if ((*buffer = (char *) realloc(*buffer, want)) == 0)
1207 failed("realloc");
1208 }
1209
1210 /*
1211 * Read until newline or end-of-file, allocating the line-buffer so it is long
1212 * enough for the input.
1213 */
1214 static int
get_line(char ** buffer,size_t * have,FILE * fp)1215 get_line(char **buffer, size_t *have, FILE *fp)
1216 {
1217 int ch;
1218 size_t used = 0;
1219
1220 while ((ch = MY_GETC(fp)) != EOF) {
1221 if (used + 2 > *have) {
1222 adjust_buffer(buffer, *have *= 2);
1223 }
1224 (*buffer)[used++] = (char) ch;
1225 if (ch == '\n')
1226 break;
1227 }
1228 (*buffer)[used] = EOS;
1229 return (used != 0);
1230 }
1231
1232 static const char *
data_filename(const DATA * p)1233 data_filename(const DATA * p)
1234 {
1235 return p ? (p->name + (prefix_opt >= 0 ? p->base : prefix_len)) : "";
1236 }
1237
1238 static int
count_lines2(const char * filename)1239 count_lines2(const char *filename)
1240 {
1241 int result = 0;
1242 FILE *fp;
1243
1244 TRACE(("count_lines \"%s\"\n", filename));
1245
1246 if ((fp = fopen(filename, "r")) != 0) {
1247 int ch;
1248
1249 result = 0;
1250 while ((ch = MY_GETC(fp)) != EOF) {
1251 if (ch == '\n')
1252 ++result;
1253 }
1254 (void) fclose(fp);
1255 TRACE(("->%d lines\n", result));
1256 } else {
1257 (void) fflush(stdout);
1258 fprintf(stderr, "Cannot open \"%s\"\n", filename);
1259 }
1260 return result;
1261 }
1262
1263 /*
1264 * Count the (new)lines in a file, return -1 if the file is not found.
1265 */
1266 static int
count_lines(DATA * p)1267 count_lines(DATA * p)
1268 {
1269 int result = -1;
1270 char *filename = 0;
1271 const char *filetail = data_filename(p);
1272 size_t want = strlen(path_opt) + 2 + strlen(filetail) + strlen(p->name);
1273
1274 if ((filename = malloc(want)) != 0) {
1275 int merge = 0;
1276
1277 if (path_dest && *path_opt != EOS && *filetail != PATHSEP) {
1278 size_t path_len = strlen(path_opt);
1279 size_t tail_len = strlen(filetail);
1280 char *tail_sep = strchr(filetail, PATHSEP);
1281 size_t n;
1282
1283 for (n = path_len - 1; (int) n >= 0; --n) {
1284 if ((path_len - n) > tail_len)
1285 break;
1286 if ((n == 0 || path_opt[n - 1] == PATHSEP)
1287 && filetail[path_len - n] == PATHSEP) {
1288 if (!strncmp(path_opt + n, filetail, path_len - n)) {
1289 merge = 1;
1290 strcpy(filename, path_opt);
1291 strcpy(filename + n, filetail);
1292 break;
1293 }
1294 }
1295 }
1296
1297 if (merge == 0 && tail_sep != 0) {
1298 tail_len = (size_t) (tail_sep - filetail);
1299 if (tail_len != 0 && tail_len <= path_len) {
1300 if (tail_len < path_len
1301 && path_opt[path_len - tail_len - 1] != PATHSEP) {
1302 merge = 0;
1303 } else if (!strncmp(path_opt + path_len - tail_len,
1304 filetail,
1305 tail_len - 1)) {
1306 merge = 1;
1307 if (path_len > tail_len) {
1308 sprintf(filename, "%.*s%c%s",
1309 (int) (path_len - tail_len),
1310 path_opt,
1311 PATHSEP,
1312 filetail);
1313 } else {
1314 strcpy(filename, filetail);
1315 }
1316 }
1317 }
1318 }
1319 }
1320 if (!merge) {
1321 if (!path_opt) {
1322 strcpy(filename, p->name);
1323 } else {
1324 sprintf(filename, "%s%c%s", path_opt, PATHSEP, filetail);
1325 }
1326 }
1327
1328 result = count_lines2(filename);
1329 free(filename);
1330 } else {
1331 failed("count_lines");
1332 }
1333 return result;
1334 }
1335
1336 static void
update_chunk(DATA * p,Change change)1337 update_chunk(DATA * p, Change change)
1338 {
1339 if (merge_opt) {
1340 p->pending += 1;
1341 p->chunk[change] += 1;
1342 } else {
1343 p->count[change] += 1;
1344 }
1345 }
1346
1347 static void
finish_chunk(DATA * p)1348 finish_chunk(DATA * p)
1349 {
1350 if (p->pending) {
1351 int i;
1352
1353 p->pending = 0;
1354 p->chunks += 1;
1355 if (merge_opt) {
1356 /*
1357 * This is crude, but to make it really precise we would have
1358 * to keep an array of line-numbers to which which in a chunk
1359 * are marked as insert/delete.
1360 */
1361 if (p->chunk[cInsert] && p->chunk[cDelete]) {
1362 long change;
1363 if (p->chunk[cInsert] > p->chunk[cDelete]) {
1364 change = p->chunk[cDelete];
1365 } else {
1366 change = p->chunk[cInsert];
1367 }
1368 p->chunk[cInsert] -= change;
1369 p->chunk[cDelete] -= change;
1370 p->chunk[cModify] += change;
1371 }
1372 }
1373 for_each_mark(i) {
1374 p->count[i] += p->chunk[i];
1375 p->chunk[i] = 0;
1376 }
1377 }
1378 }
1379
1380 static char *
copy_notabs(char * target,char * source,size_t limit)1381 copy_notabs(char *target, char *source, size_t limit)
1382 {
1383 char *result = 0;
1384 if (limit-- != 0) { /* count trailing null */
1385 char ch;
1386 int found = 0;
1387 while ((ch = *source) != EOS) {
1388 if (ch == TAB) {
1389 if (found)
1390 result = source;
1391 break;
1392 } else if (limit-- == 0) {
1393 break;
1394 }
1395 *target++ = ch;
1396 *target = EOS;
1397 ++source;
1398 found = 1;
1399 }
1400 }
1401 return result;
1402 }
1403
1404 static char *
copy_graphs(char * target,char * source,size_t limit)1405 copy_graphs(char *target, char *source, size_t limit)
1406 {
1407 int found = 0;
1408 if (limit-- != 0) { /* count trailing null */
1409 char ch;
1410 while ((ch = *source) != EOS) {
1411 if (ch == TAB || ch == BLANK) {
1412 break;
1413 } else if (limit-- == 0) {
1414 found = 0;
1415 break;
1416 }
1417 *target++ = ch;
1418 *target = EOS;
1419 ++source;
1420 found = 1;
1421 }
1422 }
1423 return found ? source : NULL;
1424 }
1425
1426 /*
1427 * Tested with git 2.11:
1428 * git uses dummy directory-names "a" and "b" rather than the actual working
1429 * directory. Also, it allows non-printable characters, encoded in C-style
1430 * backslash sequences. When those are used, it double-quotes the string.
1431 */
1432 static char *
copy_git_name(char * target,char * source,size_t limit)1433 copy_git_name(char *target, char *source, size_t limit)
1434 {
1435 int found = 0;
1436 int quoted = 0;
1437
1438 /*
1439 * Account for double-quote.
1440 */
1441 if (*source == DQUOTE) {
1442 quoted = 1;
1443 ++source;
1444 limit--;
1445 }
1446
1447 /*
1448 * Check for the dummy directory paths, and quit if not used.
1449 */
1450 if (limit <= 2 || (strncmp(source, "a/", 2) && strncmp(source, "b/", 2))) {
1451 limit = 0;
1452 } else {
1453 if (path_dest && !strncmp(source, "b/", 2)) {
1454 source += 2; /* tweak to help with counting lines */
1455 }
1456 }
1457
1458 if (limit-- != 0) { /* count trailing null */
1459 char ch;
1460 while ((ch = *source) != EOS) {
1461 if (quoted) {
1462 if (ch == DQUOTE) {
1463 if (*++source != EOS)
1464 found = 0;
1465 break;
1466 } else if (ch == BACKSL) {
1467 int fail = 0;
1468 if ((ch = *++source) == EOS) {
1469 fail = 1;
1470 } else if (isoctal(UC(ch))) {
1471 int need = 3;
1472 int value = 0;
1473 /* decode octal escapes into UTF-8 bytes */
1474 while (need-- > 0) {
1475 if (isoctal(*source)) {
1476 value <<= 3;
1477 value |= (UC(*source) - '0');
1478 if (need) {
1479 ++source;
1480 }
1481 } else {
1482 fail = 1;
1483 break;
1484 }
1485 }
1486 ch = (char) value;
1487 } else {
1488 --limit;
1489 switch (ch) {
1490 case BACKSL:
1491 /* FALLTHRU */
1492 case DQUOTE:
1493 break;
1494 case 'b':
1495 ch = '\b';
1496 break;
1497 case 'n':
1498 ch = '\n';
1499 break;
1500 case 'r':
1501 ch = '\r';
1502 break;
1503 case 't':
1504 ch = '\t';
1505 break;
1506 default:
1507 fail = 1;
1508 break;
1509 }
1510 }
1511 if (fail) {
1512 found = 0;
1513 break;
1514 }
1515 }
1516 } else if (!isprint(UC(ch))) {
1517 break;
1518 }
1519 if (limit-- == 0) {
1520 found = 0;
1521 break;
1522 }
1523 *target++ = ch;
1524 *target = EOS;
1525 ++source;
1526 found = 1;
1527 }
1528 }
1529 return found ? source : NULL;
1530 }
1531
1532 /* perforce */
1533 static char *
copy_p4_name(char * target,char * source,size_t limit)1534 copy_p4_name(char *target, char *source, size_t limit)
1535 {
1536 int found = 0;
1537 if (limit-- != 0) { /* count trailing null */
1538 char ch;
1539 while ((ch = *source) != EOS) {
1540 if (ch == TAB || ch == BLANK || ch == '#') {
1541 break;
1542 } else if (limit-- == 0) {
1543 found = 0;
1544 break;
1545 }
1546 *target++ = ch;
1547 *target = EOS;
1548 ++source;
1549 found = 1;
1550 }
1551 }
1552 return found ? source : NULL;
1553 }
1554
1555 static char *
copy_integer(int * target,char * source)1556 copy_integer(int *target, char *source)
1557 {
1558 char *next = NULL;
1559 long value = strtol(source, &next, 10);
1560 *target = (int) value;
1561 return next;
1562 }
1563
1564 static char *
need_blanks(char * source)1565 need_blanks(char *source)
1566 {
1567 int found = 0;
1568 while (*source != EOS) {
1569 char ch = *source++;
1570 if (ch == BLANK || ch == TAB)
1571 found = 1;
1572 }
1573 return found ? source : NULL;
1574 }
1575
1576 static char *
need_graphs(char * source)1577 need_graphs(char *source)
1578 {
1579 char *result = NULL;
1580 int found = 0;
1581 while (*source != EOS) {
1582 char ch = *source;
1583 if (ch == BLANK || ch == TAB || ch == EOS) {
1584 if (found)
1585 result = source;
1586 break;
1587 }
1588 ++source;
1589 found = 1;
1590 }
1591 return result;
1592 }
1593
1594 static char *
need_nospcs(char * source)1595 need_nospcs(char *source)
1596 {
1597 char *result = NULL;
1598 int found = 0;
1599 while (*source != EOS) {
1600 char ch = *source;
1601 if (ch == BLANK || ch == EOS) {
1602 if (found)
1603 result = source;
1604 break;
1605 }
1606 ++source;
1607 found = 1;
1608 }
1609 return result;
1610 }
1611
1612 /* this is used with SVN */
1613 static char *
need_parens(char * source)1614 need_parens(char *source)
1615 {
1616 char *result = NULL;
1617 if (*source++ == LPAREN) {
1618 while (*source != EOS) {
1619 if (*source++ == RPAREN) {
1620 result = source;
1621 break;
1622 }
1623 }
1624 }
1625 return result;
1626 }
1627
1628 #define date_delims(a,b) (((a)=='/' && (b)=='/') || ((a) == '-' && (b) == '-'))
1629 #define CASE_TRACE() TRACE(("** handle case for '%c' %d:%s\n", *buffer, ok, that ? that->name : ""))
1630
1631 static void
do_file(FILE * fp,const char * default_name)1632 do_file(FILE *fp, const char *default_name)
1633 {
1634 static const char *only_stars = "***************";
1635
1636 DATA dummy;
1637 DATA *that = &dummy;
1638 DATA *prev = 0;
1639 char *buffer = 0;
1640 char *b_fname = 0;
1641 size_t length = 0;
1642 size_t fixed = 0;
1643 int ok = HAVE_NOTHING;
1644 int marker;
1645 int freed = 0;
1646
1647 int unified = 0;
1648 int old_unify = 0;
1649 int new_unify = 0;
1650 int expect_unify = 0;
1651
1652 long old_dft = 0;
1653 long new_dft = 0;
1654
1655 int context = 1;
1656 int either = 0;
1657
1658 int first_ch;
1659 int git_diff = 0;
1660
1661 char *s;
1662 #if OPT_TRACE
1663 int line_no = 0;
1664 #endif
1665
1666 init_data(&dummy, "", 1, 0);
1667
1668 fixed_buffer(&buffer, fixed = length = BUFSIZ);
1669 fixed_buffer(&b_fname, length);
1670
1671 while (get_line(&buffer, &length, fp)) {
1672 /*
1673 * Adjust size of fixed-buffers so that a sscanf cannot overflow.
1674 */
1675 if (length > fixed) {
1676 fixed = length;
1677 adjust_buffer(&b_fname, length);
1678 }
1679
1680 /*
1681 * Trim trailing newline.
1682 */
1683 for (s = buffer + strlen(buffer); s != buffer; s--) {
1684 if ((UC(s[-1]) == '\n') || (UC(s[-1]) == '\r'))
1685 s[-1] = EOS;
1686 else
1687 break;
1688 }
1689
1690 /*
1691 * Trim escapes from colordiff.
1692 */
1693 #define isFINAL(c) (UC(*s) >= '\140' && UC(*s) <= '\176')
1694 if (trim_escapes && (strchr(buffer, '\033') != 0)) {
1695 char *d = buffer;
1696 s = d;
1697 while (*s != EOS) {
1698 if (*s == '\033') {
1699 while (*s != EOS && !isFINAL(*s)) {
1700 ++s;
1701 }
1702 if (*s != EOS) {
1703 ++s;
1704 continue;
1705 } else {
1706 break;
1707 }
1708 }
1709 *d++ = *s++;
1710 }
1711 *d = EOS;
1712 }
1713 ++line_no;
1714 TRACE(("[%05d] %s\n", line_no, buffer));
1715
1716 /*
1717 * "patch -U" can create ".rej" files lacking a filename header,
1718 * in unified format. Check for those.
1719 */
1720 if (line_no == 1 && !strncmp(buffer, "@@", (size_t) 2)) {
1721 unified = 2;
1722 that = find_data(default_name);
1723 ok = begin_data(that);
1724 }
1725
1726 /*
1727 * The lines identifying files in a context diff depend on how it was
1728 * invoked. But after the header, each chunk begins with a line
1729 * containing 15 *'s. Each chunk may contain a line-range with '***'
1730 * for the "before", and a line-range with '---' for the "after". The
1731 * part of the chunk depicting the deletion may be absent, though the
1732 * edit line is present.
1733 *
1734 * The markers for unified diff are a little different from the normal
1735 * context-diff. Also, the edit-lines in a unified diff won't have a
1736 * space in column 2. Because of the missing space, we have to count
1737 * lines to ensure we do not confuse the marker lines.
1738 */
1739 marker = 0;
1740 if (that != &dummy && !strcmp(buffer, only_stars)) {
1741 finish_chunk(that);
1742 TRACE(("** begin context chunk\n"));
1743 context = 2;
1744 } else if (line_no == 1 && !strcmp(buffer, only_stars)) {
1745 TRACE(("** begin context chunk\n"));
1746 context = 2;
1747 that = find_data(default_name);
1748 ok = begin_data(that);
1749 } else if (context == 2 && match(buffer, "*** ")) {
1750 context = 1;
1751 } else if (context == 1 && match(buffer, "--- ")) {
1752 marker = 1;
1753 context = 0;
1754 } else if (match(buffer, "*** ")) {
1755 } else if ((old_unify + new_unify) == 0 && match(buffer, "==== ")) {
1756 finish_chunk(that);
1757 unified = 2;
1758 } else if ((old_unify + new_unify) == 0 && match(buffer, "--- ")) {
1759 finish_chunk(that);
1760 marker = unified = 1;
1761 } else if ((old_unify + new_unify) == 0 && match(buffer, "+++ ")) {
1762 marker = unified = 2;
1763 } else if (unified == 2
1764 || ((old_unify + new_unify) == 0 && (*buffer == '@'))) {
1765 finish_chunk(that);
1766 unified = 0;
1767 if (*buffer == '@') {
1768 int old_base, new_base;
1769 int old_size = 0;
1770 int new_size = 0;
1771 char *sp;
1772
1773 old_unify = new_unify = 0;
1774 if ((sp = match(buffer, "@@ -")) != NULL
1775 && (sp = decode_range(sp, &old_base, &old_size)) != NULL
1776 && (sp = match(sp, " +")) != NULL
1777 && (sp = decode_range(sp, &new_base, &new_size)) != NULL
1778 && match(sp, " @") != NULL) {
1779 old_unify = old_size;
1780 new_unify = new_size;
1781 unified = -1;
1782 }
1783 }
1784 } else if (unified == 1 && !context) {
1785 /*
1786 * If unified==1, we guessed we would find a "+++" line, but since
1787 * we are here, we did not find that. The context check ensures
1788 * we do not mistake the "---" for a unified diff with that for
1789 * a context diff's "after" line-range.
1790 *
1791 * If we guessed wrong, then we probably found a data line with
1792 * "--" in the first two columns of the diff'd file.
1793 */
1794 unified = 0;
1795 TRACE(("?? Expected \"+++\" for unified diff\n"));
1796 if (prev != 0
1797 && prev != that
1798 && InsOf(that) == 0
1799 && DelOf(that) == 0
1800 && strcmp(prev->name, that->name)) {
1801 TRACE(("?? giveup on %ld/%ld %s\n", InsOf(that),
1802 DelOf(that), that->name));
1803 TRACE(("?? revert to %ld/%ld %s\n", InsOf(prev),
1804 DelOf(prev), prev->name));
1805 (void) delink(that);
1806 that = prev;
1807 update_chunk(that, cDelete);
1808 }
1809 } else if (old_unify + new_unify) {
1810 switch (*buffer) {
1811 case '-':
1812 if (old_unify)
1813 --old_unify;
1814 break;
1815 case '+':
1816 if (new_unify)
1817 --new_unify;
1818 break;
1819 case EOS:
1820 case ' ':
1821 if (old_unify)
1822 --old_unify;
1823 if (new_unify)
1824 --new_unify;
1825 break;
1826 case BACKSL:
1827 if (strstr(buffer, "newline") != 0) {
1828 break;
1829 }
1830 /* FALLTHRU */
1831 default:
1832 TRACE(("?? expected more in chunk\n"));
1833 old_unify = new_unify = 0;
1834 break;
1835 }
1836 if (!(old_unify + new_unify)) {
1837 expect_unify = 2;
1838 }
1839 } else {
1840 long old_base, new_base;
1841
1842 unified = 0;
1843
1844 if (line_no == 1
1845 && decode_default(buffer,
1846 &old_base, &old_dft,
1847 &new_base, &new_dft)) {
1848 TRACE(("DFT %ld,%ld -> %ld,%ld\n",
1849 old_base, old_base + old_dft - 1,
1850 new_base, new_base + new_dft - 1));
1851 finish_chunk(that);
1852 that = find_data("unknown");
1853 ok = begin_data(that);
1854 }
1855 }
1856
1857 /*
1858 * If the previous line ended a chunk of a unified diff, we may begin
1859 * another chunk, or begin another type of diff. If neither, do not
1860 * continue to accumulate counts for the unified diff which has ended.
1861 */
1862 if (expect_unify != 0) {
1863 if (expect_unify-- == 1) {
1864 if (unified == 0) {
1865 TRACE(("?? did not get chunk\n"));
1866 finish_chunk(that);
1867 that = &dummy;
1868 }
1869 }
1870 }
1871
1872 /*
1873 * Override the beginning of the line to simplify the case statement
1874 * below.
1875 */
1876 if (marker > 0) {
1877 TRACE(("** have marker=%d, override %s\n", marker, buffer));
1878 (void) memcpy(buffer, "***", (size_t) 3);
1879 }
1880
1881 first_ch = *buffer;
1882
1883 /*
1884 * GIT binary diffs can contain blocks of data that might be confused
1885 * with the ordinary line-oriented sections in diff output. Skip the
1886 * case statement if we are processing a GIT binary diff.
1887 */
1888 switch (git_diff) {
1889 default:
1890 break;
1891 case 1:
1892 /* expect "index" */
1893 if (match(buffer, "index") != 0) {
1894 git_diff = 2;
1895 continue;
1896 } else {
1897 git_diff = 0;
1898 }
1899 break;
1900 case 2:
1901 /* perhaps "GIT binary patch" */
1902 if (match(buffer, "GIT binary patch") != 0) {
1903 git_diff = 3;
1904 that->cmt = Binary;
1905 continue;
1906 } else if (match(buffer, "Binary files ") != 0) {
1907 git_diff = 0;
1908 that->cmt = Binary;
1909 continue;
1910 } else {
1911 git_diff = 0;
1912 }
1913 break;
1914 case 3:
1915 /* had "GIT binary patch", wait for next "diff" line */
1916 if (first_ch != 'd')
1917 continue;
1918 break;
1919 }
1920
1921 /*
1922 * Use the first character of the input line to determine its
1923 * type:
1924 */
1925 switch (first_ch) {
1926 case 'O': /* Only */
1927 CASE_TRACE();
1928 if (match(buffer, "Only in ")) {
1929 char *path = buffer + 8;
1930 int found = 0;
1931 for (s = path; *s != EOS; s++) {
1932 if (match(s, ": ")) {
1933 found = 1;
1934 *s++ = PATHSEP;
1935 while ((s[0] = s[1]) != EOS)
1936 s++;
1937 break;
1938 }
1939 }
1940 if (found) {
1941 blip('.');
1942 finish_chunk(that);
1943 that = find_data(path);
1944 that->cmt = Only;
1945 ok = HAVE_NOTHING;
1946 }
1947 }
1948 break;
1949
1950 /*
1951 * Several different scripts produce "Index:" lines
1952 * (e.g., "makepatch"). Not all bother to put the
1953 * pathname of the files; some put only the leaf names.
1954 */
1955 case 'I':
1956 CASE_TRACE();
1957 if ((s = match(buffer, "Index: ")) != 0) {
1958 s = skip_blanks(s);
1959 dequote(s);
1960 blip('.');
1961 finish_chunk(that);
1962 s = do_merging(that, s, &freed);
1963 that = find_data(s);
1964 ok = begin_data(that);
1965 }
1966 break;
1967
1968 case 'd': /* diff command trace */
1969 CASE_TRACE();
1970 if ((s = match(buffer, "diff ")) != 0
1971 && *(s = skip_options(s)) != EOS) {
1972 if (reverse_opt) {
1973 *skip_filename(s) = EOS;
1974 } else {
1975 s = skip_filename(s);
1976 s = skip_blanks(s);
1977 }
1978 dequote(s);
1979 blip('.');
1980 finish_chunk(that);
1981 s = do_merging(that, s, &freed);
1982 that = find_data(s);
1983 ok = begin_data(that);
1984 if (match(buffer, "diff --git ") != 0) {
1985 git_diff = 1;
1986 } else {
1987 git_diff = 0;
1988 }
1989 }
1990 break;
1991
1992 case '*':
1993 CASE_TRACE();
1994 if (!(ok & HAVE_PATH)) {
1995 int ddd, hour, minute, second;
1996 int day, month, year;
1997 char yrmon, monday;
1998 char *stars = match(buffer, "*** ");
1999 char *sp;
2000
2001 if (stars == NULL)
2002 break; /* ignore */
2003
2004 /* check for tab-delimited first, so we can
2005 * accept filenames containing spaces.
2006 */
2007 if (((sp = copy_notabs(b_fname, stars, length)) != NULL
2008 && (sp = match(sp, "\t")) != NULL
2009 && (sp = need_nospcs(sp)) != NULL
2010 && (sp = match(sp, " ")) != NULL
2011 && (sp = need_nospcs(sp)) != NULL
2012 && sscanf(sp,
2013 " %d %d:%d:%d %d",
2014 &ddd,
2015 &hour, &minute, &second, &year) == 5)
2016 || ((sp = copy_notabs(b_fname, stars, length)) != NULL
2017 && sscanf(sp,
2018 "\t%d%c%d%c%d %d:%d:%d",
2019 &year, &yrmon, &month, &monday, &day,
2020 &hour, &minute, &second) == 8
2021 && date_delims(yrmon, monday)
2022 && !version_num(b_fname))
2023 || ((sp = copy_notabs(b_fname, stars, length)) != NULL
2024 && (sp = match(sp, "\t")) != NULL
2025 && (sp = need_parens(sp)) != NULL
2026 && (sp = match(sp, "\t")) != NULL
2027 && need_parens(sp) != NULL
2028 && !version_num(b_fname))
2029 || ((sp = copy_notabs(b_fname, stars, length)) != NULL
2030 && (sp = match(sp, "\t")) != NULL
2031 && (sp = need_parens(sp)) != NULL
2032 && (*skip_blanks(sp) == EOS))
2033 || ((sp = copy_graphs(b_fname, stars, length)) != NULL
2034 && (sp = need_blanks(sp)) != NULL
2035 && (sp = need_nospcs(sp)) != NULL
2036 && (sp = match(sp, " ")) != NULL
2037 && (sp = need_nospcs(sp)) != NULL
2038 && sscanf(sp,
2039 " %d %d:%d:%d %d",
2040 &ddd, &hour, &minute, &second, &year) == 5)
2041 || ((sp = copy_graphs(b_fname, stars, length)) != NULL
2042 && (sp = need_blanks(sp)) != NULL
2043 && sscanf(sp,
2044 "%d%c%d%c%d %d:%d:%d",
2045 &year, &yrmon, &month, &monday, &day,
2046 &hour, &minute, &second) == 8
2047 && date_delims(yrmon, monday)
2048 && !version_num(b_fname))
2049 || ((sp = copy_git_name(b_fname, stars, length)) != NULL
2050 && *skip_blanks(sp) == EOS)
2051 || ((sp = copy_graphs(b_fname, stars, length)) != NULL
2052 && (*sp == EOS || *sp == BLANK || *sp == TAB)
2053 && !version_num(b_fname)
2054 && !contain_any(b_fname, "*")
2055 && !edit_range(b_fname))
2056 ) {
2057 prev = that;
2058 finish_chunk(that);
2059 dequote(b_fname);
2060 s = do_merging(that, b_fname, &freed);
2061 if (freed)
2062 prev = 0;
2063 that = find_data(s);
2064 ok = begin_data(that);
2065 TRACE(("** after merge:%d:%s\n", ok, s));
2066 }
2067 }
2068 break;
2069
2070 case '=':
2071 CASE_TRACE();
2072 if (!(ok & HAVE_PATH)) {
2073 int rev;
2074 char *bars, *sp;
2075
2076 if ((bars = match(buffer, "==== ")) != NULL
2077 && (bars = copy_p4_name(b_fname, bars, length)) != NULL
2078 && (bars = match(bars, "#")) != NULL
2079 && (bars = copy_integer(&rev, bars)) != NULL
2080 && (((sp = match(bars, " - ")) != NULL
2081 && need_graphs(sp) != NULL)
2082 || (((sp = match(bars, " ")) != NULL
2083 && (sp = need_parens(sp)) != NULL
2084 && (sp = match(sp, " - ")) != NULL
2085 && need_graphs(sp) != NULL)))
2086 && !version_num(b_fname)
2087 && !contain_any(b_fname, "*")
2088 && !edit_range(b_fname)) {
2089 TRACE(("** found p4-diff\n"));
2090 prev = that;
2091 finish_chunk(that);
2092 dequote(b_fname);
2093 s = do_merging(that, b_fname, &freed);
2094 if (freed)
2095 prev = 0;
2096 that = find_data(s);
2097 ok = begin_data(that);
2098 TRACE(("** after merge:%d:%s\n", ok, s));
2099 }
2100 }
2101 break;
2102
2103 case '+':
2104 /* FALL-THRU */
2105 case '>':
2106 CASE_TRACE();
2107 if (ok) {
2108 update_chunk(that, cInsert);
2109 }
2110 break;
2111
2112 case '-':
2113 if (!ok) {
2114 CASE_TRACE();
2115 break;
2116 }
2117 if (!unified && !strcmp(buffer, "---")) {
2118 CASE_TRACE();
2119 break;
2120 }
2121 /* fall-thru */
2122 case '<':
2123 CASE_TRACE();
2124 if (ok) {
2125 update_chunk(that, cDelete);
2126 }
2127 break;
2128
2129 case '!':
2130 CASE_TRACE();
2131 if (ok) {
2132 update_chunk(that, cModify);
2133 }
2134 break;
2135
2136 /* Expecting "Files XXX and YYY differ" */
2137 case 'F': /* FALL-THRU */
2138 case 'f':
2139 CASE_TRACE();
2140 if ((s = match(buffer + 1, "iles ")) != 0) {
2141 char *first = skip_blanks(s);
2142 /* blindly assume the first filename does not contain " and " */
2143 char *at_and = strstr(s, " and ");
2144 s = strrchr(buffer, BLANK);
2145 if ((at_and != NULL) && !strcmp(s, " differ")) {
2146 char *second = skip_blanks(at_and + 5);
2147
2148 if (reverse_opt) {
2149 *at_and = EOS;
2150 s = first;
2151 } else {
2152 *s = EOS;
2153 s = second;
2154 }
2155 blip('.');
2156 finish_chunk(that);
2157 that = find_data(s);
2158 that->cmt = Either;
2159 ok = HAVE_NOTHING;
2160 either = 1;
2161 }
2162 }
2163 break;
2164 /* Expecting "Binary files XXX and YYY differ" */
2165 case 'B': /* FALL-THRU */
2166 case 'b':
2167 CASE_TRACE();
2168 if ((s = match(buffer + 1, "inary files ")) != 0) {
2169 char *first = skip_blanks(s);
2170 /* blindly assume the first filename does not contain " and " */
2171 char *at_and = strstr(s, " and ");
2172 s = strrchr(buffer, BLANK);
2173 if ((at_and != NULL) && !strcmp(s, " differ")) {
2174 char *second = skip_blanks(at_and + 5);
2175
2176 if (reverse_opt) {
2177 *at_and = EOS;
2178 s = first;
2179 } else {
2180 *s = EOS;
2181 s = second;
2182 }
2183 blip('.');
2184 finish_chunk(that);
2185 that = find_data(s);
2186 that->cmt = Binary;
2187 ok = HAVE_NOTHING;
2188 }
2189 }
2190 break;
2191 }
2192 }
2193 blip('\n');
2194
2195 finish_chunk(that);
2196 finish_chunk(&dummy);
2197
2198 if (either) {
2199 int pass;
2200 int fixup_diffs = 0;
2201
2202 for (pass = 0; pass < 2; ++pass) {
2203 DATA *p;
2204 for (p = all_data; p; p = p->link) {
2205 switch (p->cmt) {
2206 default:
2207 break;
2208 case Normal:
2209 fixup_diffs = 1;
2210 break;
2211 case Either:
2212 if (pass) {
2213 if (fixup_diffs) {
2214 p->cmt = Binary;
2215 } else {
2216 p->cmt = Differs;
2217 }
2218 }
2219 break;
2220 }
2221 }
2222 }
2223 }
2224
2225 free(buffer);
2226 free(b_fname);
2227 }
2228
2229 static void
show_color(int color)2230 show_color(int color)
2231 {
2232 if (color >= 0)
2233 printf("\033[%dm", color + 30);
2234 else
2235 printf("\033[0;39m");
2236 }
2237
2238 static long
plot_bar(long count,int c,int color)2239 plot_bar(long count, int c, int color)
2240 {
2241 long result = count;
2242
2243 if (show_colors && result != 0)
2244 show_color(color);
2245
2246 while (--count >= 0)
2247 (void) putchar(c);
2248
2249 if (show_colors && result != 0)
2250 show_color(-1);
2251
2252 return result;
2253 }
2254
2255 /*
2256 * Each call to 'plot_num()' prints a scaled bar of 'c' characters. The
2257 * 'extra' parameter is used to keep the accumulated error in the bar's total
2258 * length from getting large.
2259 */
2260 static long
plot_num(long num_value,int c,int color,long * extra)2261 plot_num(long num_value, int c, int color, long *extra)
2262 {
2263 long result = 0;
2264
2265 /* the value to plot */
2266 /* character to display in the bar */
2267 /* accumulated error in the bar */
2268 if (num_value) {
2269 long product = (plot_width * num_value);
2270 result = ((product + *extra) / plot_scale);
2271 *extra = product - (result * plot_scale) - *extra;
2272 plot_bar(result, c, color);
2273 }
2274 return result;
2275 }
2276
2277 static long
plot_round1(const long num[MARKS])2278 plot_round1(const long num[MARKS])
2279 {
2280 long result = 0;
2281 long scaled[MARKS];
2282 long remain[MARKS];
2283 long want = 0;
2284 long have = 0;
2285 long half = (plot_scale / 2);
2286 int i;
2287
2288 memset(scaled, 0, sizeof(scaled));
2289 memset(remain, 0, sizeof(remain));
2290
2291 for_each_mark(i) {
2292 long product = (plot_width * num[i]);
2293 scaled[i] = (product / plot_scale);
2294 remain[i] = (product % plot_scale);
2295 want += product;
2296 have += product - remain[i];
2297 }
2298 while (want > have) {
2299 int j = -1;
2300 for_each_mark(i) {
2301 if (remain[i] != 0
2302 && (remain[i] > (j >= 0 ? remain[j] : half))) {
2303 j = i;
2304 }
2305 }
2306 if (j >= 0) {
2307 have += remain[j];
2308 remain[j] = 0;
2309 scaled[j] += 1;
2310 } else {
2311 break;
2312 }
2313 }
2314 for_each_mark(i) {
2315 plot_bar(scaled[i], marks[i], colors[i]);
2316 result += scaled[i];
2317 }
2318 return result;
2319 }
2320
2321 /*
2322 * Print a scaled bar of characters, where c[0] is for insertions, c[1]
2323 * for deletions and c[2] for modifications. The num array contains the
2324 * count for each type of change, in the same order.
2325 */
2326 static long
plot_round2(const long num[MARKS])2327 plot_round2(const long num[MARKS])
2328 {
2329 long result = 0;
2330 long scaled[MARKS];
2331 long remain[MARKS];
2332 long total = 0;
2333 int i;
2334
2335 for (i = 0; i < MARKS; i++)
2336 total += num[i];
2337
2338 if (total == 0)
2339 return result;
2340
2341 total = (total * plot_width + (plot_scale / 2)) / plot_scale;
2342 /* display at least one character */
2343 if (total == 0)
2344 total++;
2345
2346 for_each_mark(i) {
2347 scaled[i] = num[i] * plot_width / plot_scale;
2348 remain[i] = num[i] * plot_width - scaled[i] * plot_scale;
2349 total -= scaled[i];
2350 }
2351
2352 /* assign the missing chars using the largest remainder algo */
2353 while (total) {
2354 int largest, largest_count; /* largest is a bit field */
2355 long max_remain;
2356
2357 /* search for the largest remainder */
2358 largest = largest_count = 0;
2359 max_remain = 0;
2360 for_each_mark(i) {
2361 if (remain[i] > max_remain) {
2362 largest = 1 << i;
2363 largest_count = 1;
2364 max_remain = remain[i];
2365 } else if (remain[i] == max_remain) { /* ex aequo */
2366 largest |= 1 << i;
2367 largest_count++;
2368 }
2369 }
2370
2371 /* if there are more greatest remainders than characters
2372 missing, don't assign them at all */
2373 if (total < largest_count)
2374 break;
2375
2376 /* allocate the extra characters */
2377 for_each_mark(i) {
2378 if (largest & (1 << i)) {
2379 scaled[i]++;
2380 total--;
2381 remain[i] -= plot_width;
2382 }
2383 }
2384 }
2385
2386 for_each_mark(i) {
2387 result += plot_bar(scaled[i], marks[i], colors[i]);
2388 }
2389
2390 return result;
2391 }
2392
2393 static void
plot_numbers(const DATA * p)2394 plot_numbers(const DATA * p)
2395 {
2396 long temp = 0;
2397 int i;
2398
2399 printf("%5ld ", TotalOf(p));
2400
2401 if (format_opt & FMT_VERBOSE) {
2402 printf("%5ld ", InsOf(p));
2403 printf("%5ld ", DelOf(p));
2404 printf("%5ld ", ModOf(p));
2405 if (path_opt)
2406 printf("%5ld ", EqlOf(p));
2407 }
2408
2409 if (format_opt == FMT_CONCISE) {
2410 for_each_mark(i) {
2411 printf("\t%ld %c", p->count[i], marks[i]);
2412 }
2413 } else {
2414 long used = 0;
2415
2416 switch (round_opt) {
2417 default:
2418 for_each_mark(i) {
2419 used += plot_num(p->count[i], marks[i], colors[i], &temp);
2420 }
2421 break;
2422 case 1:
2423 used = plot_round1(p->count);
2424 break;
2425
2426 case 2:
2427 used = plot_round2(p->count);
2428 break;
2429 }
2430
2431 if ((format_opt & FMT_FILLED) != 0) {
2432 if (used > plot_width)
2433 printf("%ld", used - plot_width); /* oops */
2434 else
2435 plot_bar(plot_width - used, '.', 0);
2436 }
2437 }
2438 }
2439
2440 static int
columns_of(const char * value)2441 columns_of(const char *value)
2442 {
2443 int result;
2444 int n;
2445 int ch;
2446 #ifdef HAVE_MBSTOWCWIDTH
2447 int fixup = 0;
2448 for (n = 0; (ch = UC(value[n])) != EOS; ++n) {
2449 if (ch >= DEL || ch < BLANK) {
2450 fixup = 1;
2451 break;
2452 }
2453 }
2454 result = (int) strlen(value);
2455 if (fixup) {
2456 size_t needed;
2457 mbstate_t state;
2458 const char *source;
2459 size_t length = strlen(value);
2460
2461 memset(&state, 0, sizeof(state));
2462 source = value;
2463 needed = mbsrtowcs(NULL, &source, length, &state);
2464 if (needed != (size_t) (-1)) {
2465 wchar_t *target = calloc(1 + needed, sizeof(wchar_t));
2466 memset(&state, 0, sizeof(state));
2467 source = value;
2468 if (mbsrtowcs(target, &source, length, &state) == needed) {
2469 size_t n2;
2470 result = 0;
2471 for (n2 = 0; n2 < needed; ++n2) {
2472 int nw = wcwidth(target[n2]);
2473 if (nw > 0)
2474 result += nw;
2475 else if (target[n2] < BLANK || target[n2] == DEL)
2476 result += 2;
2477 else
2478 result += 4; /* show as octal */
2479 }
2480 }
2481 free(target);
2482 }
2483 }
2484 #else
2485 result = (int) strlen(value);
2486 for (n = 0; (ch = UC(value[n])) != EOS; ++n) {
2487 if (ch == DEL || ch < BLANK) {
2488 result += 1;
2489 } else if (ch > DEL) {
2490 result += 3; /* show as octal */
2491 }
2492 }
2493 #endif
2494 return result;
2495 }
2496
2497 #define adjustwide(width,name) width += (int) strlen(name) - columns_of(name)
2498
2499 static void
show_quoted(const char * value)2500 show_quoted(const char *value)
2501 {
2502 int ch;
2503
2504 putchar(DQUOTE);
2505 while ((ch = UC(*value++)) != EOS) {
2506 if (ch == DQUOTE)
2507 putchar(DQUOTE);
2508 putchar(ch);
2509 }
2510 putchar(DQUOTE);
2511 }
2512
2513 static void
show_unquoted(const char * value,int limit)2514 show_unquoted(const char *value, int limit)
2515 {
2516 int ch;
2517 while ((ch = UC(*value++)) != EOS) {
2518 if (ch < BLANK) {
2519 if (strchr("\b\n\r\t\\\"", ch) != NULL) {
2520 putchar(BACKSL);
2521 switch (ch) {
2522 case '\b':
2523 ch = 'b';
2524 break;
2525 case '\n':
2526 ch = 'n';
2527 break;
2528 case '\r':
2529 ch = 'r';
2530 break;
2531 case '\t':
2532 ch = 't';
2533 break;
2534 }
2535 } else {
2536 putchar('^');
2537 ch |= '@';
2538 }
2539 } else if (ch == DEL) {
2540 putchar('^');
2541 ch = '?';
2542 }
2543 #ifndef HAVE_MBSTOWCWIDTH
2544 else if (ch > DEL) {
2545 char temp[5];
2546 sprintf(temp, "\\%03o", ch & 0xff);
2547 ch = temp[3];
2548 temp[3] = EOS;
2549 fputs(temp, stdout);
2550 }
2551 #endif
2552 putchar(ch);
2553 --limit;
2554 }
2555 while (limit-- > 0) {
2556 putchar(BLANK);
2557 }
2558 }
2559
2560 #define changed(p) (!merge_names \
2561 || (p)->cmt != Normal \
2562 || (TotalOf(p)) != 0)
2563
2564 static void
show_data(const DATA * p)2565 show_data(const DATA * p)
2566 {
2567 const char *name = data_filename(p);
2568 int width;
2569
2570 if (summary_only) {
2571 ;
2572 } else if (!changed(p)) {
2573 ;
2574 } else if (p->cmt == Binary && suppress_binary == 1) {
2575 ;
2576 } else if (table_opt == 1) {
2577 if (names_only) {
2578 show_quoted(name);
2579 } else {
2580 printf("%ld,%ld,%ld,",
2581 InsOf(p),
2582 DelOf(p),
2583 ModOf(p));
2584 if (path_opt)
2585 printf("%ld,", EqlOf(p));
2586 if (count_files && !reverse_opt)
2587 printf("%d,%d,%d,",
2588 (p->cmt == OnlyRight),
2589 (p->cmt == OnlyLeft),
2590 (p->cmt == Binary));
2591 show_quoted(name);
2592 }
2593 printf("\n");
2594 } else if (names_only) {
2595 printf("%s\n", name);
2596 } else {
2597 printf("%s ", comment_opt);
2598 if (max_name_wide > 0
2599 && max_name_wide < min_name_wide
2600 && max_name_wide < ((width = (int) columns_of(name)))) {
2601 printf("%.*s", max_name_wide, name + (width - max_name_wide));
2602 } else {
2603 width = ((max_name_wide > 0 && max_name_wide < min_name_wide)
2604 ? max_name_wide
2605 : min_name_wide);
2606 adjustwide(width, name);
2607 show_unquoted(name, width);
2608 }
2609 if (table_opt == 2) {
2610 putchar('|');
2611 if (path_opt)
2612 printf("%*ld ", number_len, EqlOf(p));
2613 printf("%*ld ", number_len, InsOf(p));
2614 printf("%*ld ", number_len, DelOf(p));
2615 printf("%*ld", number_len, ModOf(p));
2616 }
2617 putchar('|');
2618 switch (p->cmt) {
2619 default:
2620 case Normal:
2621 plot_numbers(p);
2622 break;
2623 case Binary:
2624 printf("binary");
2625 break;
2626 case Differs:
2627 printf("differ");
2628 break;
2629 case Only:
2630 printf("only");
2631 break;
2632 case OnlyLeft:
2633 printf(count_files ? "deleted" : "only");
2634 break;
2635 case OnlyRight:
2636 printf(count_files ? "added" : "only");
2637 break;
2638 }
2639 printf("\n");
2640 }
2641 }
2642
2643 #ifdef HAVE_TSEARCH
2644 static void
show_tsearch(const void * nodep,const VISIT which,const int depth)2645 show_tsearch(const void *nodep, const VISIT which, const int depth)
2646 {
2647 const DATA *p = *(DATA * const *) nodep;
2648 (void) depth;
2649 if (which == postorder || which == leaf)
2650 show_data(p);
2651 }
2652 #endif
2653
2654 static int
ignore_data(DATA * p)2655 ignore_data(DATA * p)
2656 {
2657 return ((!changed(p))
2658 || (p->cmt == Binary && suppress_binary));
2659 }
2660
2661 /*
2662 * Return the length of any directory-prefix from the given path.
2663 */
2664 static size_t
path_length(const char * path)2665 path_length(const char *path)
2666 {
2667 size_t result = 0;
2668 char *mark = strrchr(path, PATHSEP);
2669 if (mark != 0 && mark != path)
2670 result = (size_t) (mark + 1 - path);
2671 return result;
2672 }
2673
2674 /*
2675 * If we have an "only" filename, we can guess whether it was added or removed
2676 * by looking at its directory and comparing that to other files' directories.
2677 *
2678 * TODO: -K -R combination is not yet supported because that relies on storing
2679 * both left-/right-paths for each file; only the right-path is currently used.
2680 */
2681 static Comment
resolve_only(DATA * p)2682 resolve_only(DATA * p)
2683 {
2684 Comment result = p->cmt;
2685 if (result == Only && !reverse_opt) {
2686 DATA *q;
2687 size_t len1 = path_length(p->name);
2688 if (len1 != 0) {
2689 for (q = all_data; q; q = q->link) {
2690 result = OnlyLeft;
2691 if (q->cmt == Normal || q->cmt == Binary) {
2692 size_t len2 = path_length(q->name);
2693 if (len2 >= len1) {
2694 if (!strncmp(p->name, q->name, len1)) {
2695 result = OnlyRight;
2696 break;
2697 }
2698 }
2699 }
2700 }
2701 }
2702 }
2703 return result;
2704 }
2705
2706 #ifdef HAVE_OPENDIR
2707 static void
count_unmodified_files(const char * pathname,long * files,long * lines)2708 count_unmodified_files(const char *pathname, long *files, long *lines)
2709 {
2710 DATA *p;
2711 char *name;
2712
2713 TRACE(("count_unmodified_files \"%s\"\n", pathname));
2714 if (is_dir(pathname)) {
2715 DIR *dp = opendir(pathname);
2716
2717 if (dp != 0) {
2718 struct dirent *de;
2719
2720 while ((de = readdir(dp)) != 0) {
2721 if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
2722 continue;
2723 name = malloc(strlen(pathname) + 2 + strlen(de->d_name));
2724 if (name != 0) {
2725 sprintf(name, "%s%c%s", pathname, PATHSEP, de->d_name);
2726 count_unmodified_files(name, files, lines);
2727 free(name);
2728 }
2729 }
2730 closedir(dp);
2731 }
2732 } else if (is_file(pathname)) {
2733 /*
2734 * Given the pathname from the (-D) source directory, derive a
2735 * corresponding path for the source directory. Then check if
2736 * that path appears in the list of modified files.
2737 */
2738 const char *ref_name = ((all_data && !unchanged) ? all_data->name : pathname);
2739 char *source = 0;
2740
2741 if (prefix_opt >= 0) {
2742 int level_s = count_prefix(path_opt);
2743 int base_s = 0;
2744 int base_d = 0;
2745
2746 (void) skip_prefix(pathname, level_s + 1, &base_s);
2747 (void) skip_prefix(ref_name, level_s + 1, &base_d);
2748 name = malloc(2 + strlen(pathname) + strlen(ref_name));
2749 sprintf(name, "%.*s%s", base_d, ref_name, base_s + pathname);
2750 source = malloc(strlen(ref_name) + 2 + strlen(pathname) + strlen(S_option));
2751 sprintf(source, "%s%c%s",
2752 S_option,
2753 PATHSEP,
2754 base_s + pathname);
2755 } else {
2756 const char *mark = unchanged ? ref_name : data_filename(all_data);
2757 int skip = 1 + (int) strlen(path_opt);
2758
2759 name = malloc(strlen(ref_name) + 2 + strlen(pathname));
2760 sprintf(name, "%.*s%s",
2761 (int) (mark - ref_name),
2762 ref_name,
2763 pathname + skip);
2764 source = malloc(strlen(ref_name) + 2 + strlen(pathname) + strlen(S_option));
2765 sprintf(source, "%s%c%.*s%s",
2766 S_option,
2767 PATHSEP,
2768 (int) (mark - ref_name),
2769 ref_name,
2770 pathname + skip);
2771 }
2772
2773 if (same_file(source, pathname)) {
2774 int found = 0;
2775
2776 for (p = all_data; p != 0 && !found; p = p->link) {
2777 if (!strcmp(name, p->name)) {
2778 found = 1;
2779 }
2780 }
2781 if (!found) {
2782 p = find_data(name);
2783 *files += 1;
2784 EqlOf(p) = count_lines(p);
2785 *lines += EqlOf(p);
2786
2787 if (unchanged) {
2788 int len = columns_of(p->name);
2789 if (min_name_wide < (len - p->base))
2790 min_name_wide = (len - p->base);
2791 }
2792 }
2793 }
2794 free(name);
2795 free(source);
2796 }
2797 }
2798 #endif
2799
2800 static void
update_min_name_wide(long longest_name)2801 update_min_name_wide(long longest_name)
2802 {
2803 if (prefix_opt < 0) {
2804 if (prefix_len < 0)
2805 prefix_len = 0;
2806 if ((longest_name - prefix_len) > min_name_wide)
2807 min_name_wide = (int) (longest_name - prefix_len);
2808 }
2809
2810 if (min_name_wide < 1)
2811 min_name_wide = 0;
2812 min_name_wide++; /* make sure it's nonzero */
2813 }
2814
2815 static void
summarize(void)2816 summarize(void)
2817 {
2818 DATA *p;
2819 long total_ins = 0;
2820 long total_del = 0;
2821 long total_mod = 0;
2822 long total_eql = 0;
2823 long files_added = 0;
2824 long files_equal = 0;
2825 long files_binary = 0;
2826 long files_removed = 0;
2827 long temp;
2828 int num_files = 0, shortest_name = -1, longest_name = -1;
2829
2830 plot_scale = 0;
2831 for (p = all_data; p; p = p->link) {
2832 int len = columns_of(p->name);
2833
2834 if (ignore_data(p))
2835 continue;
2836
2837 /*
2838 * If "-pX" option is given, prefix_opt is positive.
2839 *
2840 * "-p0" gives the whole pathname unmodified. "-p1" strips
2841 * through the first path-separator, etc.
2842 */
2843 if (prefix_opt >= 0) {
2844 /* p->base has been computed at node creation */
2845 if (min_name_wide < (len - p->base))
2846 min_name_wide = (len - p->base);
2847 } else {
2848 /*
2849 * If "-pX" option is not given, strip off any prefix which is
2850 * shared by all of the names.
2851 */
2852 if (len < prefix_len || prefix_len < 0)
2853 prefix_len = len;
2854 while (prefix_len > 0) {
2855 if (p->name[prefix_len - 1] != PATHSEP)
2856 prefix_len--;
2857 else if (strncmp(all_data->name, p->name, (size_t) prefix_len))
2858 prefix_len--;
2859 else
2860 break;
2861 }
2862
2863 if (len > longest_name)
2864 longest_name = len;
2865 if (len < shortest_name || shortest_name < 0)
2866 shortest_name = len;
2867 }
2868 }
2869
2870 /*
2871 * Get additional counts for files where we cannot count lines changed.
2872 */
2873 if (count_files) {
2874 for (p = all_data; p; p = p->link) {
2875 switch (p->cmt) {
2876 case Binary:
2877 files_binary++;
2878 break;
2879 case Only:
2880 switch (resolve_only(p)) {
2881 case OnlyRight:
2882 p->cmt = OnlyRight;
2883 files_added++;
2884 break;
2885 case OnlyLeft:
2886 p->cmt = OnlyLeft;
2887 files_removed++;
2888 break;
2889 default:
2890 /* ignore - we could not guess */
2891 break;
2892 }
2893 default:
2894 break;
2895 }
2896 }
2897 }
2898
2899 /*
2900 * Use a separate loop after computing prefix_len so we can apply the "-S"
2901 * or "-D" options to find files that we can use as reference for the
2902 * unchanged-count.
2903 */
2904 for (p = all_data; p; p = p->link) {
2905 if (!ignore_data(p)) {
2906 EqlOf(p) = 0;
2907 if (reverse_opt) {
2908 long save_ins = InsOf(p);
2909 long save_del = DelOf(p);
2910 InsOf(p) = save_del;
2911 DelOf(p) = save_ins;
2912 }
2913 if (path_opt != 0) {
2914 int count = count_lines(p);
2915
2916 if (count >= 0) {
2917 EqlOf(p) = count - ModOf(p);
2918 if (path_dest != 0) {
2919 EqlOf(p) -= InsOf(p);
2920 } else {
2921 EqlOf(p) -= DelOf(p);
2922 }
2923 if (EqlOf(p) < 0)
2924 EqlOf(p) = 0;
2925 }
2926 }
2927 num_files++;
2928 total_ins += InsOf(p);
2929 total_del += DelOf(p);
2930 total_mod += ModOf(p);
2931 total_eql += EqlOf(p);
2932 temp = TotalOf(p);
2933 if (temp > plot_scale)
2934 plot_scale = temp;
2935 }
2936 }
2937
2938 update_min_name_wide(longest_name);
2939
2940 #ifdef HAVE_OPENDIR
2941 if (S_option != 0 && D_option != 0) {
2942 unchanged = (all_data == 0);
2943 count_unmodified_files(D_option, &files_equal, &total_eql);
2944 if (unchanged) {
2945 for (p = all_data; p; p = p->link) {
2946 int len = columns_of(p->name);
2947 if (longest_name < len)
2948 longest_name = len;
2949 temp = TotalOf(p);
2950 if (temp > plot_scale)
2951 plot_scale = temp;
2952 }
2953 update_min_name_wide(longest_name);
2954 }
2955 }
2956 #endif
2957
2958 plot_width = (max_width - min_name_wide - 8);
2959 if (plot_width < 10)
2960 plot_width = 10;
2961
2962 if (plot_scale < plot_width)
2963 plot_scale = plot_width; /* 1:1 */
2964
2965 if (table_opt == 1) {
2966 if (!names_only) {
2967 printf("INSERTED,DELETED,MODIFIED,");
2968 if (path_opt)
2969 printf("UNCHANGED,");
2970 if (count_files && !reverse_opt)
2971 printf("FILE-ADDED,FILE-DELETED,FILE-BINARY,");
2972 }
2973 printf("FILENAME\n");
2974 } else if (table_opt == 2) {
2975 long largest = 0;
2976 for (p = all_data; p; p = p->link) {
2977 if (path_opt)
2978 largest = maximum(largest, EqlOf(p));
2979 largest = maximum(largest, InsOf(p));
2980 largest = maximum(largest, DelOf(p));
2981 largest = maximum(largest, ModOf(p));
2982 }
2983 number_len = 0;
2984 while (largest > 0) {
2985 number_len++;
2986 largest /= 10;
2987 }
2988 number_len = maximum(number_len, 3);
2989 }
2990 #ifdef HAVE_TSEARCH
2991 if (use_tsearch) {
2992 twalk(sorted_data, show_tsearch);
2993 } else
2994 #endif
2995 for (p = all_data; p; p = p->link) {
2996 show_data(p);
2997 }
2998
2999 if ((table_opt != 1) && !names_only) {
3000 #define PLURAL(n) n, n != 1 ? "s" : ""
3001 if (num_files > 0 || !quiet) {
3002 printf("%s %d file%s changed", comment_opt, PLURAL(num_files));
3003 if (total_ins)
3004 printf(", %ld insertion%s(+)", PLURAL(total_ins));
3005 if (total_del)
3006 printf(", %ld deletion%s(-)", PLURAL(total_del));
3007 if (total_mod)
3008 printf(", %ld modification%s(!)", PLURAL(total_mod));
3009 if (total_eql && path_opt != 0)
3010 printf(", %ld unchanged line%s(=)", PLURAL(total_eql));
3011 if (count_files) {
3012 if (files_added)
3013 printf(", %ld file%s added", PLURAL(files_added));
3014 if (files_removed)
3015 printf(", %ld file%s removed", PLURAL(files_removed));
3016 if (files_binary)
3017 printf(", %ld binary file%s", PLURAL(files_binary));
3018 }
3019 (void) putchar('\n');
3020 }
3021 }
3022 }
3023
3024 #ifdef HAVE_POPEN
3025 static const char *
get_program(const char * name,const char * dft)3026 get_program(const char *name, const char *dft)
3027 {
3028 const char *result = getenv(name);
3029 if (result == 0 || *result == EOS)
3030 result = dft;
3031 TRACE(("get_program(%s) = %s\n", name, result));
3032 return result;
3033 }
3034 #define GET_PROGRAM(name) get_program("DIFFSTAT_" #name, name)
3035
3036 static char *
decompressor(Decompress which,const char * name)3037 decompressor(Decompress which, const char *name)
3038 {
3039 const char *verb = 0;
3040 const char *opts = "";
3041 char *result = 0;
3042 size_t len = strlen(name);
3043
3044 switch (which) {
3045 case dcBzip:
3046 verb = GET_PROGRAM(BZCAT_PATH);
3047 if (*verb == EOS) {
3048 verb = GET_PROGRAM(BZIP2_PATH);
3049 opts = "-dc";
3050 }
3051 break;
3052 case dcCompress:
3053 verb = GET_PROGRAM(ZCAT_PATH);
3054 if (*verb == EOS) {
3055 verb = GET_PROGRAM(UNCOMPRESS_PATH);
3056 opts = "-c";
3057 if (*verb == EOS) {
3058 /* not all compress's recognize the options, test this last */
3059 verb = GET_PROGRAM(COMPRESS_PATH);
3060 opts = "-dc";
3061 }
3062 }
3063 break;
3064 case dcGzip:
3065 verb = GET_PROGRAM(GZIP_PATH);
3066 opts = "-dc";
3067 break;
3068 case dcLzma:
3069 verb = GET_PROGRAM(LZCAT_PATH);
3070 opts = "-dc";
3071 break;
3072 case dcPack:
3073 verb = GET_PROGRAM(PCAT_PATH);
3074 break;
3075 case dcXz:
3076 verb = GET_PROGRAM(XZ_PATH);
3077 opts = "-dc";
3078 break;
3079 case dcEmpty:
3080 /* FALLTHRU */
3081 case dcNone:
3082 break;
3083 }
3084 if (verb != 0 && *verb != EOS) {
3085 result = (char *) xmalloc(strlen(verb) + 10 + len);
3086 sprintf(result, "%s %s", verb, opts);
3087 if (*name != EOS) {
3088 sprintf(result + strlen(result), " \"%s\"", name);
3089 }
3090 }
3091 return result;
3092 }
3093
3094 static char *
is_compressed(const char * name)3095 is_compressed(const char *name)
3096 {
3097 size_t len = strlen(name);
3098 Decompress which;
3099
3100 if (len > 2 && !strcmp(name + len - 2, ".Z")) {
3101 which = dcCompress;
3102 } else if (len > 2 && !strcmp(name + len - 2, ".z")) {
3103 which = dcPack;
3104 } else if (len > 3 && !strcmp(name + len - 3, ".gz")) {
3105 which = dcGzip;
3106 } else if (len > 4 && !strcmp(name + len - 4, ".bz2")) {
3107 which = dcBzip;
3108 } else if (len > 5 && !strcmp(name + len - 5, ".lzma")) {
3109 which = dcLzma;
3110 } else if (len > 3 && !strcmp(name + len - 3, ".xz")) {
3111 which = dcXz;
3112 } else {
3113 which = dcNone;
3114 }
3115 return decompressor(which, name);
3116 }
3117
3118 #ifdef HAVE_MKDTEMP
3119 #define MY_MKDTEMP(path) mkdtemp(path)
3120 #else
3121 /*
3122 * mktemp is supposedly marked obsolete at the same point that mkdtemp is
3123 * introduced.
3124 */
3125 static char *
my_mkdtemp(char * path)3126 my_mkdtemp(char *path)
3127 {
3128 char *result = mktemp(path);
3129 if (result != 0) {
3130 if (MKDIR(result, 0700) < 0) {
3131 result = 0;
3132 }
3133 }
3134 return path;
3135 }
3136 #define MY_MKDTEMP(path) my_mkdtemp(path)
3137 #endif
3138
3139 static char *
copy_stdin(char ** dirpath)3140 copy_stdin(char **dirpath)
3141 {
3142 const char *tmp = getenv("TMPDIR");
3143 char *result = 0;
3144 if (tmp == 0)
3145 tmp = "/tmp/";
3146 *dirpath = xmalloc(strlen(tmp) + 12);
3147
3148 strcpy(*dirpath, tmp);
3149 strcat(*dirpath, "/diffXXXXXX");
3150
3151 if (MY_MKDTEMP(*dirpath) != 0) {
3152 FILE *fp;
3153
3154 result = xmalloc(strlen(*dirpath) + 10);
3155 sprintf(result, "%s/stdin", *dirpath);
3156
3157 if ((fp = fopen(result, "w")) != 0) {
3158 int ch;
3159
3160 while ((ch = MY_GETC(stdin)) != EOF) {
3161 fputc(ch, fp);
3162 }
3163 (void) fclose(fp);
3164 } else {
3165 free(result);
3166 result = 0;
3167 rmdir(*dirpath); /* Assume that the /stdin file was not created */
3168 free(*dirpath);
3169 *dirpath = 0;
3170 }
3171 } else {
3172 free(*dirpath);
3173 *dirpath = 0;
3174 }
3175 return result;
3176 }
3177 #endif
3178
3179 static void
set_path_opt(char * value,int destination)3180 set_path_opt(char *value, int destination)
3181 {
3182 path_opt = value;
3183 path_dest = destination;
3184 if (*path_opt != 0) {
3185 if (is_dir(path_opt)) {
3186 num_marks = 4;
3187 } else {
3188 (void) fflush(stdout);
3189 fprintf(stderr, "Not a directory:%s\n", path_opt);
3190 exit(EXIT_FAILURE);
3191 }
3192 }
3193 }
3194
3195 static void
usage(FILE * fp)3196 usage(FILE *fp)
3197 {
3198 static const char *msg[] =
3199 {
3200 "Usage: diffstat [options] [files]",
3201 "",
3202 "Reads from one or more input files which contain output from 'diff',",
3203 "producing a histogram of total lines changed for each file referenced.",
3204 "If no filename is given on the command line, reads from standard input.",
3205 "",
3206 "Options:",
3207 " -b ignore lines matching \"Binary files XXX and YYY differ\"",
3208 " -c prefix each line with comment (#)",
3209 " -C add SGR color escape sequences to highlight the histogram",
3210 #if OPT_TRACE
3211 " -d debug - prints a lot of information",
3212 #endif
3213 " -D PATH specify location of patched files, use for unchanged-count",
3214 " -e FILE redirect standard error to FILE",
3215 " -E trim escape-sequences, e.g., from colordiff",
3216 " -f NUM format (0=concise, 1=normal, 2=filled, 4=values)",
3217 " -h print this message",
3218 " -k do not merge filenames",
3219 " -K resolve ambiguity of \"only\" filenames",
3220 " -l list filenames only",
3221 " -m merge insert/delete data in chunks as modified-lines",
3222 " -n NUM specify minimum width for the filenames (default: auto)",
3223 " -N NUM specify maximum width for the filenames (default: auto)",
3224 " -o FILE redirect standard output to FILE",
3225 " -p NUM specify number of pathname-separators to strip (default: common)",
3226 " -q suppress the \"0 files changed\" message for empty diffs",
3227 " -r NUM specify rounding for histogram (0=none, 1=simple, 2=adjusted)",
3228 " -R assume patch was created with old and new files swapped",
3229 " -s show only the summary line",
3230 " -S PATH specify location of original files, use for unchanged-count",
3231 " -t print a table (comma-separated-values) rather than histogram",
3232 " -T print amounts (like -t option) in addition to histogram",
3233 " -u do not sort the input list",
3234 " -v show progress if output is redirected to a file",
3235 " -V prints the version number",
3236 " -w NUM specify maximum width of the output (default: 80)",
3237 };
3238 unsigned j;
3239 for (j = 0; j < sizeof(msg) / sizeof(msg[0]); j++)
3240 fprintf(fp, "%s\n", msg[j]);
3241 }
3242
3243 /* Wrapper around getopt that also parses "--help" and "--version".
3244 * argc, argv, opts, return value, and globals optarg, optind,
3245 * opterr, and optopt are as in getopt(). help and version designate
3246 * what should be returned if --help or --version are encountered. */
3247 static int
getopt_helper(int argc,char * const argv[],const char * opts,int help,int version)3248 getopt_helper(int argc, char *const argv[], const char *opts,
3249 int help, int version)
3250 {
3251 if (optind < argc && argv[optind] != NULL) {
3252 if (strcmp(argv[optind], "--help") == 0) {
3253 optind++;
3254 return help;
3255 } else if (strcmp(argv[optind], "--version") == 0) {
3256 optind++;
3257 return version;
3258 }
3259 }
3260 return getopt(argc, argv, opts);
3261 }
3262
3263 static int
getopt_value(void)3264 getopt_value(void)
3265 {
3266 char *next = 0;
3267 long value = strtol(optarg, &next, 0);
3268 if (next == 0 || *next != EOS) {
3269 (void) fflush(stdout);
3270 fprintf(stderr, "expected a number, have '%s'\n", optarg);
3271 exit(EXIT_FAILURE);
3272 }
3273 return (int) value;
3274 }
3275
3276 int
main(int argc,char * argv[])3277 main(int argc, char *argv[])
3278 {
3279 int j;
3280 char version[80];
3281
3282 #if defined(HAVE_TCGETATTR) && defined(TIOCGWINSZ)
3283 if (isatty(fileno(stdout))) {
3284 struct winsize data;
3285 if (ioctl(fileno(stdout), TIOCGWINSZ, &data) == 0) {
3286 max_width = data.ws_col;
3287 }
3288 }
3289 #endif
3290
3291 #ifdef HAVE_MBSTOWCWIDTH
3292 setlocale(LC_CTYPE, "");
3293 #endif
3294
3295 while ((j = getopt_helper(argc, argv,
3296 "bcCdD:e:Ef:hkKlmn:N:o:p:qr:RsS:tTuvVw:", 'h', 'V'))
3297 != -1) {
3298 switch (j) {
3299 case 'b':
3300 suppress_binary = 1;
3301 break;
3302 case 'c':
3303 comment_opt = "#";
3304 break;
3305 case 'C':
3306 show_colors = 1;
3307 break;
3308 #if OPT_TRACE
3309 case 'd':
3310 trace_opt = 1;
3311 break;
3312 #endif
3313 case 'D':
3314 D_option = optarg;
3315 break;
3316 case 'e':
3317 if (freopen(optarg, "w", stderr) == 0)
3318 failed(optarg);
3319 break;
3320 case 'E':
3321 trim_escapes = 1;
3322 break;
3323 case 'f':
3324 format_opt = getopt_value();
3325 break;
3326 case 'h':
3327 usage(stdout);
3328 return (EXIT_SUCCESS);
3329 case 'k':
3330 merge_names = 0;
3331 break;
3332 case 'K':
3333 count_files = 1;
3334 break;
3335 case 'l':
3336 names_only = 1;
3337 break;
3338 case 'm':
3339 merge_opt = 1;
3340 break;
3341 case 'n':
3342 min_name_wide = getopt_value();
3343 break;
3344 case 'N':
3345 max_name_wide = getopt_value();
3346 break;
3347 case 'o':
3348 if (freopen(optarg, "w", stdout) == 0)
3349 failed(optarg);
3350 break;
3351 case 'p':
3352 prefix_opt = getopt_value();
3353 break;
3354 case 'r':
3355 round_opt = getopt_value();
3356 break;
3357 case 'R':
3358 reverse_opt = 1;
3359 break;
3360 case 's':
3361 summary_only = 1;
3362 break;
3363 case 'S':
3364 S_option = optarg;
3365 break;
3366 case 't':
3367 table_opt = 1;
3368 break;
3369 case 'T':
3370 table_opt = 2;
3371 break;
3372 case 'u':
3373 sort_names = 0;
3374 break;
3375 case 'v':
3376 verbose = 1;
3377 break;
3378 case 'V':
3379 #ifndef NO_IDENT
3380 if (!sscanf(Id, "%*s %*s %30s", version))
3381 #endif
3382 (void) strcpy(version, "?");
3383 printf("diffstat version %s\n", version);
3384 return (EXIT_SUCCESS);
3385 case 'w':
3386 max_width = getopt_value();
3387 break;
3388 case 'q':
3389 quiet = 1;
3390 break;
3391 default:
3392 usage(stderr);
3393 return (EXIT_FAILURE);
3394 }
3395 }
3396
3397 /*
3398 * The numbers from -S/-D options will only be useful if the merge option
3399 * is added.
3400 */
3401 if (S_option)
3402 set_path_opt(S_option, 0);
3403 if (D_option)
3404 set_path_opt(D_option, 1);
3405 if (path_opt)
3406 merge_opt = 1;
3407
3408 show_progress = verbose && (!isatty(fileno(stdout))
3409 && isatty(fileno(stderr)));
3410
3411 #ifdef HAVE_TSEARCH
3412 use_tsearch = (sort_names && merge_names);
3413 #endif
3414
3415 if (optind < argc) {
3416 while (optind < argc) {
3417 FILE *fp;
3418 char *name = argv[optind++];
3419 #ifdef HAVE_POPEN
3420 char *command = is_compressed(name);
3421 if (command != 0) {
3422 if ((fp = popen(command, "r")) != 0) {
3423 if (show_progress) {
3424 (void) fflush(stdout);
3425 (void) fprintf(stderr, "%s\n", name);
3426 (void) fflush(stderr);
3427 }
3428 do_file(fp, name);
3429 (void) pclose(fp);
3430 }
3431 free(command);
3432 } else
3433 #endif
3434 if ((fp = fopen(name, "rb")) != 0) {
3435 if (show_progress) {
3436 (void) fflush(stdout);
3437 (void) fprintf(stderr, "%s\n", name);
3438 (void) fflush(stderr);
3439 }
3440 do_file(fp, name);
3441 (void) fclose(fp);
3442 } else {
3443 failed(name);
3444 }
3445 }
3446 } else {
3447 #ifdef HAVE_POPEN
3448 Decompress which = dcEmpty;
3449 char *stdin_dir = 0;
3450 char *myfile;
3451 char sniff[8];
3452 int ch;
3453 unsigned got = 0;
3454
3455 if ((ch = MY_GETC(stdin)) != EOF) {
3456 which = dcNone;
3457 if (ch == 'B') { /* perhaps bzip2 (poor magic design...) */
3458 sniff[got++] = (char) ch;
3459 while (got < 5) {
3460 if ((ch = MY_GETC(stdin)) == EOF)
3461 break;
3462 sniff[got++] = (char) ch;
3463 }
3464 if (got == 5
3465 && !strncmp(sniff, "BZh", (size_t) 3)
3466 && isdigit(UC(sniff[3]))
3467 && isdigit(UC(sniff[4]))) {
3468 which = dcBzip;
3469 }
3470 } else if (ch == ']') { /* perhaps lzma */
3471 sniff[got++] = (char) ch;
3472 while (got < 4) {
3473 if ((ch = MY_GETC(stdin)) == EOF)
3474 break;
3475 sniff[got++] = (char) ch;
3476 }
3477 if (got == 4
3478 && !memcmp(sniff, "]\0\0\200", (size_t) 4)) {
3479 which = dcLzma;
3480 }
3481 } else if (ch == 0xfd) { /* perhaps xz */
3482 sniff[got++] = (char) ch;
3483 while (got < 6) {
3484 if ((ch = MY_GETC(stdin)) == EOF)
3485 break;
3486 sniff[got++] = (char) ch;
3487 }
3488 if (got == 6
3489 && !memcmp(sniff, "\3757zXZ\0", (size_t) 6)) {
3490 which = dcXz;
3491 }
3492 } else if (ch == '\037') { /* perhaps compress, etc. */
3493 sniff[got++] = (char) ch;
3494 if ((ch = MY_GETC(stdin)) != EOF) {
3495 sniff[got++] = (char) ch;
3496 switch (ch) {
3497 case 0213:
3498 which = dcGzip;
3499 break;
3500 case 0235:
3501 which = dcCompress;
3502 break;
3503 case 0036:
3504 which = dcPack;
3505 break;
3506 }
3507 }
3508 } else {
3509 sniff[got++] = (char) ch;
3510 }
3511 }
3512 /*
3513 * The C standard only guarantees one ungetc;
3514 * virtually everyone allows more.
3515 */
3516 while (got != 0) {
3517 ungetc(sniff[--got], stdin);
3518 }
3519 if (which != dcNone
3520 && which != dcEmpty
3521 && (myfile = copy_stdin(&stdin_dir)) != 0) {
3522 FILE *fp;
3523 char *command;
3524
3525 /* open pipe to decompress temporary file */
3526 command = decompressor(which, myfile);
3527 if ((fp = popen(command, "r")) != 0) {
3528 do_file(fp, "stdin");
3529 (void) pclose(fp);
3530 }
3531 free(command);
3532
3533 unlink(myfile);
3534 free(myfile);
3535 myfile = 0;
3536 rmdir(stdin_dir);
3537 free(stdin_dir);
3538 stdin_dir = 0;
3539 } else if (which != dcEmpty)
3540 #endif
3541 do_file(stdin, "stdin");
3542 }
3543 summarize();
3544 #if defined(NO_LEAKS)
3545 while (all_data != 0) {
3546 delink(all_data);
3547 }
3548 #endif
3549 return (EXIT_SUCCESS);
3550 }
3551