1#!/usr/local/bin/perl -w
2
3########################################################################
4#                                                                      #
5# ColorDiff - a wrapper/replacement for 'diff' producing               #
6#             colourful output                                         #
7#                                                                      #
8# Copyright (C)2002-2020 Dave Ewart (davee@sungate.co.uk)              #
9#                                                                      #
10########################################################################
11#                                                                      #
12# This program is free software; you can redistribute it and/or modify #
13# it under the terms of the GNU General Public License as published by #
14# the Free Software Foundation; either version 2 of the License, or    #
15# (at your option) any later version.                                  #
16#                                                                      #
17# This program is distributed in the hope that it will be useful,      #
18# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
20# GNU General Public License for more details.                         #
21#                                                                      #
22########################################################################
23
24use strict;
25use Getopt::Long qw(:config pass_through no_auto_abbrev);
26
27my $app_name     = 'colordiff';
28my $version      = '1.0.19';
29my $author       = 'Dave Ewart';
30my $author_email = 'davee@sungate.co.uk';
31my $app_www      = 'http://www.colordiff.org/';
32my $copyright    = '(C)2002-2020';
33my $show_banner  = 1;
34my $color_patch  = undef;
35my $diff_cmd     = "diff";
36
37# ANSI sequences for colours
38my %colour;
39$colour{white}       = "\033[1;37m";
40$colour{yellow}      = "\033[1;33m";
41$colour{green}       = "\033[1;32m";
42$colour{blue}        = "\033[1;34m";
43$colour{cyan}        = "\033[1;36m";
44$colour{red}         = "\033[1;31m";
45$colour{magenta}     = "\033[1;35m";
46$colour{black}       = "\033[1;30m";
47$colour{darkwhite}   = "\033[0;37m";
48$colour{darkyellow}  = "\033[0;33m";
49$colour{darkgreen}   = "\033[0;32m";
50$colour{darkblue}    = "\033[0;34m";
51$colour{darkcyan}    = "\033[0;36m";
52$colour{darkred}     = "\033[0;31m";
53$colour{darkmagenta} = "\033[0;35m";
54$colour{darkblack}   = "\033[0;30m";
55$colour{off}         = "\033[0;0m";
56
57# Default colours if /usr/local/etc/colordiffrc or ~/.colordiffrc do not exist
58my $plain_text = $colour{white};
59my $file_old   = $colour{red};
60my $file_new   = $colour{blue};
61my $diff_stuff = $colour{magenta};
62my $diff_file  = $diff_stuff;
63my $cvs_stuff  = $colour{green};
64
65# Locations for personal and system-wide colour configurations
66my $HOME   = $ENV{HOME};
67my $etcdir = '/usr/local/etc';
68my ($setting, $value);
69my @config_files = ("$etcdir/colordiffrc");
70if (defined $ENV{XDG_CONFIG_HOME} && $ENV{XDG_CONFIG_HOME} ne '') {
71    push (@config_files, "$ENV{XDG_CONFIG_HOME}/colordiff/colordiffrc")
72}
73elsif (defined $ENV{HOME}) {
74    push (@config_files, "$ENV{HOME}/.config/colordiff/colordiffrc")
75}
76push (@config_files, "$ENV{HOME}/.colordiffrc") if (defined $ENV{HOME});
77my $config_file;
78my $diff_type = 'unknown';
79
80# Convert tabs to spaces
81sub expand_tabs_to_spaces ($) {
82    my ($s) = @_;
83    while ((my $i = index ($s, "\t")) > -1) {
84        substr (
85            $s, $i, 1,    # range to replace
86            (' ' x (8 - ($i % 8))),    # string to replace with
87            );
88    }
89    $s;
90}
91
92sub check_for_file_arguments {
93    my $nonopts = 0;
94    my $ddash = 0;
95
96    while (defined(my $arg = shift)) {
97        if ($arg eq "--") {
98            $ddash = 1;
99            next;
100        }
101        if ($ddash || $arg eq "-") {
102            $nonopts++;
103            next;
104        }
105        if ($arg !~ /^-/) {
106            $nonopts++;
107        }
108        if ($arg eq "--help" || $arg eq "--version" || $arg eq "-v") {
109            $nonopts++;
110        }
111    }
112    return $nonopts;
113}
114
115sub detect_diff_type {
116    # Two parameters:
117    #    $record      is line in which a diff format has to be detected
118    #    $allow_diffy is flag indicating whether diffy is a
119    #                   permitted diff type
120    my $record = shift;
121    my $allow_diffy = shift;
122
123    # This may not be perfect - should identify most reasonably
124    # formatted diffs and patches
125
126    # Unified diffs are the only flavour having '+++ ' or '--- '
127    # at the start of a line
128    if ($record =~ /^(\+\+\+ |--- |@@ )/) {
129        return 'diffu';
130    }
131    # Context diffs are the only flavour having '***'
132    # at the start of a line
133    elsif ($record =~ /^\*\*\*/) {
134        return 'diffc';
135    }
136    # Plain diffs have NcN, NdN and NaN etc.
137    elsif ($record =~ /^[0-9,]+[acd][0-9,]+$/) {
138        return 'diff';
139    }
140    # FIXME - This is not very specific, since the regex matches could
141    # easily match non-diff output.
142    # However, given that we have not yet matched any of the *other* diff
143    # types, this might be good enough
144    #
145    # Only pick diffy if our flag parameter indicates so
146    elsif ( ($allow_diffy == 1) && ($record =~ /(\s\|\s|\s<$|\s>\s)/) ) {
147        return 'diffy';
148    }
149    # wdiff deleted/added patterns
150    # should almost always be pairwise?
151    elsif ($record =~ /\[-.*?-\]/s
152            || $record =~ /\{\+.*?\+\}/s) {
153        return 'wdiff';
154    }
155    # FIXME - This is a bit risky, but if we haven't matched any other
156    # diff type by this stage, this line usually indicates we have
157    # debdiff output
158    elsif ($record =~ /^Control files: lines which differ/) {
159        return 'debdiff';
160    }
161
162    return 'unknown';
163}
164
165my $enable_verifymode;
166my $specified_difftype;
167my $enable_fakeexitcode;
168my $color_mode = "auto";
169my $color_term_output_only = "no";
170GetOptions(
171    # --enable-verifymode option is for testing behaviour of colordiff
172    # against standard test diffs
173    "verifymode" => \$enable_verifymode,
174    "fakeexitcode" => \$enable_fakeexitcode,
175    "difftype=s" => \$specified_difftype,
176    "color=s" => \$color_mode,
177    "color-term-output-only=s" => \$color_term_output_only
178);
179
180$_ = $specified_difftype;
181if (defined $_ and not /^diff[cuy]?|(deb|w)diff$/) {
182    print STDERR "Invalid --difftype value\n";
183}
184
185if (defined $enable_verifymode) {
186    # When in verify mode, to ensure consistent output we don't source
187    # any external config files, so we use built-in defaults
188    # and we add colour to our patches
189    @config_files = ();
190    $color_patch  = 1;
191    $show_banner  = 0;
192}
193
194foreach $config_file (@config_files) {
195    if (open (COLORDIFFRC, "<$config_file")) {
196        while (<COLORDIFFRC>) {
197            my $colourval;
198
199            chop;
200            next if (/^#/ || /^$/);
201            s/\s+//g;
202            ($setting, $value) = split ('=');
203            if (!defined $value) {
204                print STDERR "Invalid configuration line ($_) in $config_file\n";
205                next;
206            }
207            if ($setting eq 'banner') {
208                if ($value eq 'no') {
209                    $show_banner = 0;
210                }
211                next;
212            }
213            if ($setting eq 'color_patches') {
214                if ($value eq 'yes') {
215                    $color_patch = 1;
216                } elsif ($value eq 'no') {
217                    $color_patch = 0;
218                }
219                next;
220            }
221            if ($setting eq 'diff_cmd') {
222                $diff_cmd = $value;
223                next;
224            }
225            $setting =~ tr/A-Z/a-z/;
226            $value   =~ tr/A-Z/a-z/;
227            if (($value eq 'normal') || ($value eq 'none')) {
228                $value = 'off';
229            }
230            # Find full 24-bit colour spec string
231            if (( $value =~ m/^([0-9];)*([34]8);2;([0-9]+);([0-9]+);([0-9]+)$/) &&
232                    ($3 >= 0) && ($3 <= 255) &&
233                    ($4 >= 0) && ($4 <= 255) &&
234                    ($5 >= 0) && ($5 <= 255)
235                    ) {
236                $colourval = "\033[$1$2;2;$3;$4;$5m";
237            }
238            # 256 colour single value
239            elsif ($value =~ m/^[0-9]+$/ && $value >= 0 && $value <= 255) {
240                # Numeric color
241                if( $value < 8 ) {
242                    $colourval = "\033[0;3${value}m";
243                }
244                elsif( $value < 15 ) {
245                    $colourval = "\033[0;9" . (${value} - 8) . "m";
246                }
247                else {
248                    $colourval = "\033[0;38;5;${value}m";
249                }
250            }
251            elsif (defined($colour{$value})) {
252                $colourval = $colour{$value};
253            }
254            else {
255                print STDERR "Invalid colour specification for setting $setting ($value) in $config_file\n";
256                next;
257            }
258            if ($setting eq 'plain') {
259                $plain_text = $colourval;
260            }
261            elsif ($setting eq 'oldtext') {
262                $file_old = $colourval;
263            }
264            elsif ($setting eq 'newtext') {
265                $file_new = $colourval;
266            }
267            elsif ($setting eq 'diffstuff') {
268                $diff_stuff = $colourval;
269            }
270            elsif ($setting eq 'difffile') {
271                $diff_file = $colourval;
272            }
273            elsif ($setting eq 'cvsstuff') {
274                $cvs_stuff = $colourval;
275            }
276            else {
277                print STDERR "Unknown option in $config_file: $setting\n";
278            }
279        }
280        close COLORDIFFRC;
281    }
282}
283
284# --color=(yes|no|always|never|auto) will override the color_patches setting
285if ($color_mode eq "yes" || $color_mode eq "always") {
286    $color_patch = 1;
287} elsif ($color_mode eq "no" || $color_mode eq "never") {
288    $color_patch = 0;
289} elsif ($color_mode eq "auto") {
290    $color_patch = undef;
291}
292
293# If output is to a file, switch off colours unless overriden by $color_patch.
294# Relates to http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=378563
295# Relates to http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=862878
296if ( (!$color_patch && (defined $color_patch || -f STDOUT)) ||
297     ($color_term_output_only eq 'yes' && !-t STDOUT) )
298{
299    $plain_text  = '';
300    $file_old    = '';
301    $file_new    = '';
302    $diff_file   = '';
303    $diff_stuff  = '';
304    $cvs_stuff   = '';
305    $plain_text  = '';
306    $colour{off} = '';
307}
308
309# Disable output buffering. This allows "producer | colordiff | less" to output
310# earlier without having to wait for 'producer' to finish.
311select STDOUT;
312$| = 1;
313
314# ----------------------------------------------------------------------------
315
316if ($show_banner == 1) {
317    print STDERR "$app_name $version ($app_www)\n";
318    print STDERR "$copyright $author, $author_email\n\n";
319}
320
321my $operating_methodology;
322
323if (check_for_file_arguments (@ARGV)) {
324    $operating_methodology = 1; # we have files as arg, so we act as diff
325} else {
326    $operating_methodology = 2; # no files as args, so operate as filter
327}
328
329my @inputstream;
330
331my $inputhandle;
332my $pid;
333if ($operating_methodology == 1) {
334    # Feed stdin of colordiff with output from the diff program
335    $pid = open($inputhandle, "-|", "$diff_cmd", @ARGV);
336} else {
337    $inputhandle = \*STDIN;
338}
339
340# Input stream has been read - need to examine it
341# to determine type of diff we have.
342
343# $lastline is false if the input is EOF. If true, then either more data is
344# available, or the last read succeeded (and the next read may return EOF).
345# Initially assume that the input is not EOF (for obvious reasons).
346my $lastline = 1;
347my $record;
348
349if (defined $specified_difftype) {
350    $diff_type = $specified_difftype;
351    # diffy needs at least one line to look at
352    if ($diff_type eq 'diffy') {
353        if (defined($_ = <$inputhandle>)) {
354            push @inputstream, $_;
355        }
356        $lastline = $_;
357    }
358}
359else {
360    # Detect diff type, diffy is permitted
361    while (<$inputhandle>) {
362        push @inputstream, $_;
363        $diff_type = detect_diff_type($_, 1);
364        last if $diff_type ne 'unknown';
365    }
366    $lastline = $_;
367}
368
369my $inside_file_old = 1;
370
371# ------------------------------------------------------------------------------
372# Special pre-processing for side-by-side diffs
373# Figure out location of central markers: these will be a consecutive set of
374# three columns where the first and third always consist of spaces and the
375# second consists only of spaces, '<', '>' and '|'
376# This is not a 100% certain match, but should be good enough
377my $diffy_sep_col  = 0;
378my $mostlikely_sum = 0;
379
380if ($diff_type eq 'diffy') {
381    # Not very elegant, but does the job
382
383    my $longest_record = -1;
384    my %separator_col  = ();
385    my %candidate_col  = ();
386    my $possible_cols = 0;
387    my @checkbuffer;
388
389    (@checkbuffer, @inputstream) = (@inputstream, @checkbuffer);
390
391    while (@checkbuffer) {
392        $_ = shift @checkbuffer;
393        push @inputstream, $_;
394        $_ = expand_tabs_to_spaces $_;
395
396        if (length ($_) > $longest_record) {
397            my $i = $longest_record + 1;
398
399            $longest_record = length ($_);
400            while ($i <= $longest_record) {
401                $separator_col{$i} = 1;
402                $candidate_col{$i} = 0;
403                $i++;
404            }
405        }
406
407        for (my $i = 0 ; $i < (length ($_) - 2) ; $i++) {
408            next if ($separator_col{$i} == 0);
409            next if ($_ =~ /^(Index: |={4,}|RCS file: |retrieving |diff )/);
410            my $subsub = substr ($_, $i, 2);
411            if ($subsub !~ / [ (|<>]/) {
412                $separator_col{$i} = 0;
413                if ($candidate_col{$i} > 0) {
414                    $possible_cols--;
415                }
416            }
417            if ($subsub =~ / [|<>]/) {
418                $candidate_col{$i}++;
419                if ($candidate_col{$i} == 1) {
420                    $possible_cols++;
421                }
422            }
423        }
424
425        if ( !@checkbuffer ) {
426            if (! (defined $specified_difftype) and
427                $possible_cols == 0 && detect_diff_type($_, 0) ne 'unknown') {
428                $diff_type = detect_diff_type($_, 0);
429                last;
430            }
431            if (defined ($_ = <$inputhandle>)) {
432                push @checkbuffer, $_;
433            }
434            $lastline = $_;
435        }
436    }
437
438    for (my $i = 0 ; $i < $longest_record - 2 ; $i++) {
439        if ($separator_col{$i} == 1) {
440            if ($candidate_col{$i} > $mostlikely_sum) {
441                $diffy_sep_col  = $i;
442                $mostlikely_sum = $i;
443            }
444        }
445    }
446    # If we don't find a suitable separator column then
447    # we've probably misidentified the input as diffy
448    # Search stream again, this time excluding diffy
449    # as a possible outcome
450    if ($diffy_sep_col == 0) {
451        # Detect diff type, diffy is NOT permitted
452        foreach (@inputstream) {
453            $diff_type = detect_diff_type($_, 0);
454            last if $diff_type ne 'unknown';
455        }
456    }
457}
458# ------------------------------------------------------------------------------
459
460while (defined( $_ = @inputstream ? shift @inputstream : ($lastline and <$inputhandle>) )) {
461    if (/^Binary files (.*) and (.*) differ$/) {
462        print "Binary files $file_old$1$plain_text and $file_new$2$plain_text differ\n";
463        next;
464    }
465    if ($diff_type eq 'diff') {
466        if (/^</) {
467            print "$file_old";
468        }
469        elsif (/^>/) {
470            print "$file_new";
471        }
472        elsif (/^[0-9]/) {
473            print "$diff_stuff";
474        }
475        elsif (/^(Index: |={4,}|RCS file: |retrieving |diff )/) {
476            print "$cvs_stuff";
477        }
478        elsif (/^Only in/) {
479            print "$diff_file";
480        }
481        else {
482            print "$plain_text";
483        }
484    }
485    elsif ($diff_type eq 'diffc') {
486        if (/^- /) {
487            print "$file_old";
488        }
489        elsif (/^\+ /) {
490            print "$file_new";
491        }
492        elsif (/^\*{4,}/) {
493            print "$diff_file";
494        }
495        elsif (/^Only in/) {
496            print "$diff_file";
497        }
498        elsif (/^\*\*\* [0-9]+,[0-9]+/) {
499            print "$diff_file";
500            $inside_file_old = 1;
501        }
502        elsif (/^\*\*\* /) {
503            print "$file_old";
504        }
505        elsif (/^--- [0-9]+,[0-9]+/) {
506            print "$diff_file";
507            $inside_file_old = 0;
508        }
509        elsif (/^--- /) {
510            print "$file_new";
511        }
512        elsif (/^!/) {
513            if ($inside_file_old == 1) {
514                print "$file_old";
515            }
516            else {
517                print "$file_new";
518            }
519        }
520        elsif (/^(Index: |={4,}|RCS file: |retrieving |diff )/) {
521            print "$cvs_stuff";
522        }
523        else {
524            print "$plain_text";
525        }
526    }
527    elsif ($diff_type eq 'diffu') {
528        if (/^(---|\+\+\+) /) {
529            print "$diff_file";
530        }
531        elsif (/^-/) {
532            print "$file_old";
533        }
534        elsif (/^\+/) {
535            print "$file_new";
536        }
537        elsif (/^\@/) {
538            print "$diff_stuff";
539        }
540        elsif (/^Only in/) {
541            print "$diff_file";
542        }
543        elsif (/^(Index: |={4,}|RCS file: |retrieving |diff )/) {
544            print "$cvs_stuff";
545        }
546        else {
547            print "$plain_text";
548        }
549    }
550    # Works with previously-identified column containing the diff-y
551    # separator characters
552    elsif ($diff_type eq 'diffy') {
553        $_ = expand_tabs_to_spaces $_;
554        if (length ($_) > ($diffy_sep_col + 2)) {
555            my $sepchars = substr ($_, $diffy_sep_col, 2);
556            if ($sepchars eq ' <') {
557                print "$file_old";
558            }
559            elsif ($sepchars eq ' |') {
560                print "$diff_stuff";
561            }
562            elsif ($sepchars eq ' >') {
563                print "$file_new";
564            }
565            else {
566                print "$plain_text";
567            }
568        }
569        elsif (/^Only in/) {
570            print "$diff_file";
571        }
572        else {
573            print "$plain_text";
574        }
575    }
576    elsif ($diff_type eq 'wdiff') {
577        $_ =~ s/(\[-.+?-\])/$file_old$1$colour{off}/g;
578        $_ =~ s/(\{\+.+?\+\})/$file_new$1$colour{off}/g;
579    }
580    elsif ($diff_type eq 'debdiff') {
581        $_ =~ s/(\[-.+?-\])/$file_old$1$colour{off}/g;
582        $_ =~ s/(\{\+.+?\+\})/$file_new$1$colour{off}/g;
583    }
584    s/$/$colour{off}/;
585    print "$_";
586}
587
588my $exitcode = 0;
589if ($operating_methodology == 1) {
590    waitpid $pid, 0;
591    $exitcode=$? >> 8;
592}
593if (defined $enable_fakeexitcode) {
594    exit 0;
595}
596else {
597    exit $exitcode;
598}
599