1#!/usr/local/bin/perl
2
3# Licensed to the Apache Software Foundation (ASF) under one or more
4# contributor license agreements.  See the NOTICE file distributed with
5# this work for additional information regarding copyright ownership.
6# The ASF licenses this file to You under the Apache License, Version 2.0
7# (the "License"); you may not use this file except in compliance with
8# the License.  You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18#
19#  Heuristically converts line endings to the current OS's preferred format
20#
21#  All existing line endings must be identical (e.g. lf's only, or even
22#  the accedental cr.cr.lf sequence.)  If some lines end lf, and others as
23#  cr.lf, the file is presumed binary.  If the cr character appears anywhere
24#  except prefixed to an lf, the file is presumed binary.  If there is no
25#  change in the resulting file size, or the file is binary, the conversion
26#  is discarded.
27#
28#  Todo: Handle NULL stdin characters gracefully.
29#
30
31use IO::File;
32use File::Find;
33
34# The ignore list is '-' separated, with this leading hyphen and
35# trailing hyphens in ever concatinated list below.
36$ignore = "-";
37
38# Image formats
39$ignore .= "gif-jpg-jpeg-png-ico-bmp-";
40
41# Archive formats
42$ignore .= "tar-gz-z-zip-jar-war-bz2-tgz-";
43
44# Many document formats
45$ignore .= "eps-psd-pdf-ai-";
46
47# Some encodings
48$ignore .= "ucs2-ucs4-";
49
50# Some binary objects
51$ignore .= "class-so-dll-exe-obj-a-o-lo-slo-sl-dylib-";
52
53# Some build env files
54$ignore .= "mcp-xdc-ncb-opt-pdb-ilk-sbr-";
55
56$preservedate = 1;
57
58$forceending = 0;
59
60$givenpaths = 0;
61
62$notnative = 0;
63
64while (defined @ARGV[0]) {
65    if (@ARGV[0] eq '--touch') {
66        $preservedate = 0;
67    }
68    elsif (@ARGV[0] eq '--nocr') {
69        $notnative = -1;
70    }
71    elsif (@ARGV[0] eq '--cr') {
72        $notnative = 1;
73    }
74    elsif (@ARGV[0] eq '--force') {
75        $forceending = 1;
76    }
77    elsif (@ARGV[0] eq '--FORCE') {
78        $forceending = 2;
79    }
80    elsif (@ARGV[0] =~ m/^-/) {
81        die "What is " . @ARGV[0] . " supposed to mean?\n\n"
82	  . "Syntax:\t$0 [option()s] [path(s)]\n\n" . <<'OUTCH'
83Where:	paths specifies the top level directory to convert (default of '.')
84	options are;
85
86	  --cr     keep/add one ^M
87	  --nocr   remove ^M's
88	  --touch  the datestamp (default: keeps date/attribs)
89	  --force  mismatched corrections (unbalanced ^M's)
90	  --FORCE  all files regardless of file name!
91
92OUTCH
93    }
94    else {
95        find(\&totxt, @ARGV[0]);
96	print "scanned " . @ARGV[0] . "\n";
97	$givenpaths = 1;
98    }
99    shift @ARGV;
100}
101
102if (!$givenpaths) {
103    find(\&totxt, '.');
104    print "did .\n";
105}
106
107sub totxt {
108        $oname = $_;
109	$tname = '.#' . $_;
110        if (!-f) {
111            return;
112        }
113	@exts = split /\./;
114	if ($forceending < 2) {
115            while ($#exts && ($ext = pop(@exts))) {
116                if ($ignore =~ m|-$ext-|i) {
117                    return;
118                }
119	    }
120        }
121	@ostat = stat($oname);
122        $srcfl = new IO::File $oname, "r" or die;
123	$dstfl = new IO::File $tname, "w" or die;
124        binmode $srcfl;
125	if ($notnative) {
126            binmode $dstfl;
127	}
128	undef $t;
129        while (<$srcfl>) {
130            if (s/(\r*)\n$/\n/) {
131		$n = length $1;
132		if (!defined $t) {
133		    $t = $n;
134		}
135		if (!$forceending && (($n != $t) || m/\r/)) {
136		    print "mismatch in " .$oname. ":" .$n. " expected " .$t. "\n";
137		    undef $t;
138		    last;
139		}
140	        elsif ($notnative > 0) {
141                    s/\n$/\r\n/;
142                }
143            }
144	    print $dstfl $_;
145	}
146	if (defined $t && (tell $srcfl == tell $dstfl)) {
147	    undef $t;
148	}
149	undef $srcfl;
150	undef $dstfl;
151	if (defined $t) {
152            unlink $oname or die;
153            rename $tname, $oname or die;
154            @anames = ($oname);
155            if ($preservedate) {
156                utime $ostat[9], $ostat[9], @anames;
157            }
158            chmod $ostat[2] & 07777, @anames;
159            chown $ostat[5], $ostat[6], @anames;
160            print "Converted file " . $oname . " to text in " . $File::Find::dir . "\n";
161	}
162	else {
163	    unlink $tname or die;
164	}
165}
166