1#!/usr/bin/env perl
2
3use warnings;
4use Encode;
5use Text::Tabs;
6use Getopt::Std;
7use strict;
8
9sub HELP_MESSAGE
10{
11    my $fh = shift;
12    print $fh <<"EOF"
13Usage: $0 [options] [files]
14
15Correct whitespace errors in the Crawl repository.
16
17If no files are specified, defaults to files found beneath the current
18directory, modified by the following options (which have no effect if file
19arguments were provided):
20  -a          Check the whole repository, not just the current directory.
21  -m          Check only files that have been modified and added to the index.
22  -M          Check only files that have been modified.
23
24Other options are:
25  -n          Dry run: Do not actually modify files.
26  -t <N>      When expanding tabs, assume N-column tab stops (default 8).
27  -r          Do not remove carriage returns.
28  -h, --help  Display this help and exit.
29
30Exit status:
31  0 if there were no whitespace errors, or if errors were corrected.
32  1 if there are still whitespace errors (only with the -n option).
33  2 if an unknown option was supplied.
34EOF
35}
36
37our ($opt_a, $opt_m, $opt_M, $opt_n, $opt_t, $opt_r, $opt_h);
38
39# Send --help to stdout, and exit (with success) when it is provided.
40$Getopt::Std::STANDARD_HELP_VERSION = 1;
41# Fail, and display help to stderr, on a bad option.
42getopts('amMnt:rh')
43    or do { HELP_MESSAGE(\*STDERR); exit 2; };
44# Make -h work the same as --help.
45$opt_h and do { HELP_MESSAGE(\*STDOUT); exit 0; };
46
47my $top_level = $opt_a;
48my $modified_only = $opt_m || $opt_M;
49my $modified_cached = $opt_m && !$opt_M;
50my $dry_run = $opt_n;
51my $any_bad = 0;
52# Imported from Text::Tabs, so use our, not my.
53our $tabstop = $opt_t if ($opt_t);
54my $strip_cr = !$opt_r;
55
56my @files = @ARGV;
57unless (@files)
58{
59    if ($top_level)
60    {
61        my $tldir = `git rev-parse --show-toplevel`;
62        chomp $tldir;
63        chdir($tldir) or die "(-a) cannot chdir to $tldir: $!";
64    }
65
66    if ($modified_only)
67    {
68        my $cached = $modified_cached ? "--cached" : "";
69        open FLIST, "git diff-index -M --name-only $cached --relative HEAD|"
70            or die "Can't run git diff-index";
71    }
72    else
73    {
74        open FLIST, "git ls-files|" or die "Can't run git ls-files";
75    }
76    @files = <FLIST>;
77    close FLIST;
78}
79
80for (@files)
81{
82    chomp;
83    next if -d $_;
84    next if -l $_;
85    -f $_ or (print(STDERR "Can't read $_\n"), next);
86    next if /webserver\/static\/scripts\/contrib\//i;
87    next if /util\/server\//i;
88    next if /dat\/dist_bones\//i;
89    next if /\.(png|gif|xpm|ttf|ico|icns|fig|tex|eps|pdf|psd)$/i;
90    next if /\.(sln|vim|pbxproj|vsprops|plist|csproj|config|cs)$/i;
91    next if /\.(vcproj|vcproj\.user|vcxproj|vcxproj\.filters|terminal)$/i;
92    next if m[(^|/)\.git(modules|attributes)$];
93    next if /\.(lex|tab)\./;
94    # catch.hpp is autogenerated by the catch2 team, but manually placed
95    # into the dcss tree. Altering it to fit crawl's coding style would
96    # just be needlessly confusing.
97    next if /catch\.hpp/;
98    next if !/\./ and !/util\//;
99    my $tab = /Makefile/i;      # Allow tabs for makefiles.
100    my $bom = /\.js$/;          # And BOM for these.
101    my $french = /\.des$/;      # these may need it for functionality
102    $bom = 1 if /CREDITS/;
103    undef local $/;
104    open F, "<$_" or die "Can't open $_";
105    my $file = $_;
106    my $cont=$_=<F>;
107    close F;
108
109    eval{decode("UTF-8", "$_", Encode::FB_CROAK)};
110    if ($@)
111    {
112        print "invalid UTF-8: $file\n";
113        # We don't know the actual encoding, assume a Windows-using american/
114        # frenchman/german/finn. Sorry, polacks and russkies.
115        Encode::from_to($_, "CP1252", "UTF-8");
116    }
117    $_.="\n", print "missing newline at eof: $file\n" unless /\n$/s or /^$/;
118    print "extra newlines at eof: $file\n" if s/\n+\n$/\n/s;
119    $_=expand $_, print "tab: $file\n" if !$tab && /\t/;
120    print "spaces at eol: $file\n" if s/ +\n/\n/sg;
121    print "CR: $file\n" if $strip_cr and s/\r//sg;
122    # Note: it's a byte string, as we had to handle invalid encodings above,
123    # and $cont may be invalid.
124    print "zero width space: $file\n" if s/\xe2\x80\x8b//sg; # U+200B
125    print "BOM: $file\n" if !$bom && s/\xef\xbb\xbf//sg; # U+FFEF
126    print "Control statement space: $file\n" if $file =~ /\.(cc|h|js)$/i
127        && s/\b(if|while|for|switch|catch)\(/$1 (/sg;
128    print "French spacing fix: $file\n" if !$french && s/\. ( [[:upper:]])/.$1/sg;
129
130    if ($_ ne $cont)
131    {
132        $any_bad = 1;
133        if (!$dry_run)
134        {
135            open F, ">$file" or die;
136            print F;
137            close F;
138        }
139    }
140}
141
142if ($dry_run and $any_bad) {
143    print "Found unnecessary whitespace in the above files.\n";
144    print "Re-run this command (without -n) to automatically remove it.\n";
145    exit 1;
146}
147