1#!/usr/bin/env perl 2 3use warnings; 4use Encode; 5use Text::Tabs; 6use Getopt::Std; 7use strict; 8 9sub HELP_MESSAGE 10{ 11 my $fh = shift; 12 print $fh <<"EOF" 13Usage: $0 [options] [files] 14 15Correct whitespace errors in the Crawl repository. 16 17If no files are specified, defaults to files found beneath the current 18directory, modified by the following options (which have no effect if file 19arguments were provided): 20 -a Check the whole repository, not just the current directory. 21 -m Check only files that have been modified and added to the index. 22 -M Check only files that have been modified. 23 24Other options are: 25 -n Dry run: Do not actually modify files. 26 -t <N> When expanding tabs, assume N-column tab stops (default 8). 27 -r Do not remove carriage returns. 28 -h, --help Display this help and exit. 29 30Exit status: 31 0 if there were no whitespace errors, or if errors were corrected. 32 1 if there are still whitespace errors (only with the -n option). 33 2 if an unknown option was supplied. 34EOF 35} 36 37our ($opt_a, $opt_m, $opt_M, $opt_n, $opt_t, $opt_r, $opt_h); 38 39# Send --help to stdout, and exit (with success) when it is provided. 40$Getopt::Std::STANDARD_HELP_VERSION = 1; 41# Fail, and display help to stderr, on a bad option. 42getopts('amMnt:rh') 43 or do { HELP_MESSAGE(\*STDERR); exit 2; }; 44# Make -h work the same as --help. 45$opt_h and do { HELP_MESSAGE(\*STDOUT); exit 0; }; 46 47my $top_level = $opt_a; 48my $modified_only = $opt_m || $opt_M; 49my $modified_cached = $opt_m && !$opt_M; 50my $dry_run = $opt_n; 51my $any_bad = 0; 52# Imported from Text::Tabs, so use our, not my. 53our $tabstop = $opt_t if ($opt_t); 54my $strip_cr = !$opt_r; 55 56my @files = @ARGV; 57unless (@files) 58{ 59 if ($top_level) 60 { 61 my $tldir = `git rev-parse --show-toplevel`; 62 chomp $tldir; 63 chdir($tldir) or die "(-a) cannot chdir to $tldir: $!"; 64 } 65 66 if ($modified_only) 67 { 68 my $cached = $modified_cached ? "--cached" : ""; 69 open FLIST, "git diff-index -M --name-only $cached --relative HEAD|" 70 or die "Can't run git diff-index"; 71 } 72 else 73 { 74 open FLIST, "git ls-files|" or die "Can't run git ls-files"; 75 } 76 @files = <FLIST>; 77 close FLIST; 78} 79 80for (@files) 81{ 82 chomp; 83 next if -d $_; 84 next if -l $_; 85 -f $_ or (print(STDERR "Can't read $_\n"), next); 86 next if /webserver\/static\/scripts\/contrib\//i; 87 next if /util\/server\//i; 88 next if /dat\/dist_bones\//i; 89 next if /\.(png|gif|xpm|ttf|ico|icns|fig|tex|eps|pdf|psd)$/i; 90 next if /\.(sln|vim|pbxproj|vsprops|plist|csproj|config|cs)$/i; 91 next if /\.(vcproj|vcproj\.user|vcxproj|vcxproj\.filters|terminal)$/i; 92 next if m[(^|/)\.git(modules|attributes)$]; 93 next if /\.(lex|tab)\./; 94 # catch.hpp is autogenerated by the catch2 team, but manually placed 95 # into the dcss tree. Altering it to fit crawl's coding style would 96 # just be needlessly confusing. 97 next if /catch\.hpp/; 98 next if !/\./ and !/util\//; 99 my $tab = /Makefile/i; # Allow tabs for makefiles. 100 my $bom = /\.js$/; # And BOM for these. 101 my $french = /\.des$/; # these may need it for functionality 102 $bom = 1 if /CREDITS/; 103 undef local $/; 104 open F, "<$_" or die "Can't open $_"; 105 my $file = $_; 106 my $cont=$_=<F>; 107 close F; 108 109 eval{decode("UTF-8", "$_", Encode::FB_CROAK)}; 110 if ($@) 111 { 112 print "invalid UTF-8: $file\n"; 113 # We don't know the actual encoding, assume a Windows-using american/ 114 # frenchman/german/finn. Sorry, polacks and russkies. 115 Encode::from_to($_, "CP1252", "UTF-8"); 116 } 117 $_.="\n", print "missing newline at eof: $file\n" unless /\n$/s or /^$/; 118 print "extra newlines at eof: $file\n" if s/\n+\n$/\n/s; 119 $_=expand $_, print "tab: $file\n" if !$tab && /\t/; 120 print "spaces at eol: $file\n" if s/ +\n/\n/sg; 121 print "CR: $file\n" if $strip_cr and s/\r//sg; 122 # Note: it's a byte string, as we had to handle invalid encodings above, 123 # and $cont may be invalid. 124 print "zero width space: $file\n" if s/\xe2\x80\x8b//sg; # U+200B 125 print "BOM: $file\n" if !$bom && s/\xef\xbb\xbf//sg; # U+FFEF 126 print "Control statement space: $file\n" if $file =~ /\.(cc|h|js)$/i 127 && s/\b(if|while|for|switch|catch)\(/$1 (/sg; 128 print "French spacing fix: $file\n" if !$french && s/\. ( [[:upper:]])/.$1/sg; 129 130 if ($_ ne $cont) 131 { 132 $any_bad = 1; 133 if (!$dry_run) 134 { 135 open F, ">$file" or die; 136 print F; 137 close F; 138 } 139 } 140} 141 142if ($dry_run and $any_bad) { 143 print "Found unnecessary whitespace in the above files.\n"; 144 print "Re-run this command (without -n) to automatically remove it.\n"; 145 exit 1; 146} 147