1#!/usr/local/bin/perl 2 3# Licensed to the Apache Software Foundation (ASF) under one or more 4# contributor license agreements. See the NOTICE file distributed with 5# this work for additional information regarding copyright ownership. 6# The ASF licenses this file to You under the Apache License, Version 2.0 7# (the "License"); you may not use this file except in compliance with 8# the License. You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17 18# 19# Heuristically converts line endings to the current OS's preferred format 20# 21# All existing line endings must be identical (e.g. lf's only, or even 22# the accedental cr.cr.lf sequence.) If some lines end lf, and others as 23# cr.lf, the file is presumed binary. If the cr character appears anywhere 24# except prefixed to an lf, the file is presumed binary. If there is no 25# change in the resulting file size, or the file is binary, the conversion 26# is discarded. 27# 28# Todo: Handle NULL stdin characters gracefully. 29# 30 31use IO::File; 32use File::Find; 33 34# The ignore list is '-' separated, with this leading hyphen and 35# trailing hyphens in ever concatinated list below. 36$ignore = "-"; 37 38# Image formats 39$ignore .= "gif-jpg-jpeg-png-ico-bmp-"; 40 41# Archive formats 42$ignore .= "tar-gz-z-zip-jar-war-bz2-tgz-"; 43 44# Many document formats 45$ignore .= "eps-psd-pdf-ai-"; 46 47# Some encodings 48$ignore .= "ucs2-ucs4-"; 49 50# Some binary objects 51$ignore .= "class-so-dll-exe-obj-a-o-lo-slo-sl-dylib-"; 52 53# Some build env files 54$ignore .= "mcp-xdc-ncb-opt-pdb-ilk-sbr-"; 55 56$preservedate = 1; 57 58$forceending = 0; 59 60$givenpaths = 0; 61 62$notnative = 0; 63 64while (defined @ARGV[0]) { 65 if (@ARGV[0] eq '--touch') { 66 $preservedate = 0; 67 } 68 elsif (@ARGV[0] eq '--nocr') { 69 $notnative = -1; 70 } 71 elsif (@ARGV[0] eq '--cr') { 72 $notnative = 1; 73 } 74 elsif (@ARGV[0] eq '--force') { 75 $forceending = 1; 76 } 77 elsif (@ARGV[0] eq '--FORCE') { 78 $forceending = 2; 79 } 80 elsif (@ARGV[0] =~ m/^-/) { 81 die "What is " . @ARGV[0] . " supposed to mean?\n\n" 82 . "Syntax:\t$0 [option()s] [path(s)]\n\n" . <<'OUTCH' 83Where: paths specifies the top level directory to convert (default of '.') 84 options are; 85 86 --cr keep/add one ^M 87 --nocr remove ^M's 88 --touch the datestamp (default: keeps date/attribs) 89 --force mismatched corrections (unbalanced ^M's) 90 --FORCE all files regardless of file name! 91 92OUTCH 93 } 94 else { 95 find(\&totxt, @ARGV[0]); 96 print "scanned " . @ARGV[0] . "\n"; 97 $givenpaths = 1; 98 } 99 shift @ARGV; 100} 101 102if (!$givenpaths) { 103 find(\&totxt, '.'); 104 print "did .\n"; 105} 106 107sub totxt { 108 $oname = $_; 109 $tname = '.#' . $_; 110 if (!-f) { 111 return; 112 } 113 @exts = split /\./; 114 if ($forceending < 2) { 115 while ($#exts && ($ext = pop(@exts))) { 116 if ($ignore =~ m|-$ext-|i) { 117 return; 118 } 119 } 120 } 121 @ostat = stat($oname); 122 $srcfl = new IO::File $oname, "r" or die; 123 $dstfl = new IO::File $tname, "w" or die; 124 binmode $srcfl; 125 if ($notnative) { 126 binmode $dstfl; 127 } 128 undef $t; 129 while (<$srcfl>) { 130 if (s/(\r*)\n$/\n/) { 131 $n = length $1; 132 if (!defined $t) { 133 $t = $n; 134 } 135 if (!$forceending && (($n != $t) || m/\r/)) { 136 print "mismatch in " .$oname. ":" .$n. " expected " .$t. "\n"; 137 undef $t; 138 last; 139 } 140 elsif ($notnative > 0) { 141 s/\n$/\r\n/; 142 } 143 } 144 print $dstfl $_; 145 } 146 if (defined $t && (tell $srcfl == tell $dstfl)) { 147 undef $t; 148 } 149 undef $srcfl; 150 undef $dstfl; 151 if (defined $t) { 152 unlink $oname or die; 153 rename $tname, $oname or die; 154 @anames = ($oname); 155 if ($preservedate) { 156 utime $ostat[9], $ostat[9], @anames; 157 } 158 chmod $ostat[2] & 07777, @anames; 159 chown $ostat[5], $ostat[6], @anames; 160 print "Converted file " . $oname . " to text in " . $File::Find::dir . "\n"; 161 } 162 else { 163 unlink $tname or die; 164 } 165} 166