1#!./perl 2# $Id: piconv,v 2.4 2009/07/08 13:34:15 dankogai Exp $ 3# 4use 5.8.0; 5use strict; 6use Encode ; 7use Encode::Alias; 8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); 9 10use File::Basename; 11my $name = basename($0); 12 13use Getopt::Long qw(:config no_ignore_case); 14 15my %Opt; 16 17help() 18 unless 19 GetOptions(\%Opt, 20 'from|f=s', 21 'to|t=s', 22 'list|l', 23 'string|s=s', 24 'check|C=i', 25 'c', 26 'perlqq|p', 27 'htmlcref', 28 'xmlcref', 29 'debug|D', 30 'scheme|S=s', 31 'resolve|r=s', 32 'help', 33 ); 34 35$Opt{help} and help(); 36$Opt{list} and list_encodings(); 37my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; 38defined $Opt{resolve} and resolve_encoding($Opt{resolve}); 39$Opt{from} || $Opt{to} || help(); 40my $from = $Opt{from} || $locale or help("from_encoding unspecified"); 41my $to = $Opt{to} || $locale or help("to_encoding unspecified"); 42$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit; 43my $scheme = do { 44 if (defined $Opt{scheme}) { 45 if (!exists $Scheme{$Opt{scheme}}) { 46 warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n"; 47 'from_to'; 48 } else { 49 $Opt{scheme}; 50 } 51 } else { 52 'from_to'; 53 } 54}; 55 56$Opt{check} ||= $Opt{c}; 57$Opt{perlqq} and $Opt{check} = Encode::PERLQQ; 58$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF; 59$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF; 60 61if ($Opt{debug}){ 62 my $cfrom = Encode->getEncoding($from)->name; 63 my $cto = Encode->getEncoding($to)->name; 64 print <<"EOT"; 65Scheme: $scheme 66From: $from => $cfrom 67To: $to => $cto 68EOT 69} 70 71my %use_bom = map { $_ => 1 } qw/UTF-16 UTF-32/; 72 73# we do not use <> (or ARGV) for the sake of binmode() 74@ARGV or push @ARGV, \*STDIN; 75 76unless ( $scheme eq 'perlio' ) { 77 binmode STDOUT; 78 my $need2slurp = $use_bom{ find_encoding($to)->name }; 79 for my $argv (@ARGV) { 80 my $ifh = ref $argv ? $argv : undef; 81 $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next; 82 $ifh or open $ifh, "<", $argv or next; 83 binmode $ifh; 84 if ( $scheme eq 'from_to' ) { # default 85 if ($need2slurp){ 86 local $/; 87 $_ = <$ifh>; 88 Encode::from_to( $_, $from, $to, $Opt{check} ); 89 print; 90 }else{ 91 while (<$ifh>) { 92 Encode::from_to( $_, $from, $to, $Opt{check} ); 93 print; 94 } 95 } 96 } 97 elsif ( $scheme eq 'decode_encode' ) { # step-by-step 98 if ($need2slurp){ 99 local $/; 100 $_ = <$ifh>; 101 my $decoded = decode( $from, $_, $Opt{check} ); 102 my $encoded = encode( $to, $decoded ); 103 print $encoded; 104 }else{ 105 while (<$ifh>) { 106 my $decoded = decode( $from, $_, $Opt{check} ); 107 my $encoded = encode( $to, $decoded ); 108 print $encoded; 109 } 110 } 111 } 112 else { # won't reach 113 die "$name: unknown scheme: $scheme"; 114 } 115 } 116} 117else { 118 119 # NI-S favorite 120 binmode STDOUT => "raw:encoding($to)"; 121 for my $argv (@ARGV) { 122 my $ifh = ref $argv ? $argv : undef; 123 $ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next; 124 $ifh or open $ifh, "<", $argv or next; 125 binmode $ifh => "raw:encoding($from)"; 126 print while (<$ifh>); 127 } 128} 129 130sub list_encodings { 131 print join( "\n", Encode->encodings(":all") ), "\n"; 132 exit 0; 133} 134 135sub resolve_encoding { 136 if ( my $alias = Encode::resolve_alias( $_[0] ) ) { 137 print $alias, "\n"; 138 exit 0; 139 } 140 else { 141 warn "$name: $_[0] is not known to Encode\n"; 142 exit 1; 143 } 144} 145 146sub help { 147 my $message = shift; 148 $message and print STDERR "$name error: $message\n"; 149 print STDERR <<"EOT"; 150$name [-f from_encoding] [-t to_encoding] [-s string] [files...] 151$name -l 152$name -r encoding_alias 153 -l,--list 154 lists all available encodings 155 -r,--resolve encoding_alias 156 resolve encoding to its (Encode) canonical name 157 -f,--from from_encoding 158 when omitted, the current locale will be used 159 -t,--to to_encoding 160 when omitted, the current locale will be used 161 -s,--string string 162 "string" will be the input instead of STDIN or files 163The following are mainly of interest to Encode hackers: 164 -D,--debug show debug information 165 -C N | -c check the validity of the input 166 -S,--scheme scheme use the scheme for conversion 167Those are handy when you can only see ascii characters: 168 -p,--perlqq 169 --htmlcref 170 --xmlcref 171EOT 172 exit; 173} 174 175__END__ 176 177=head1 NAME 178 179piconv -- iconv(1), reinvented in perl 180 181=head1 SYNOPSIS 182 183 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...] 184 piconv -l 185 piconv [-C N|-c|-p] 186 piconv -S scheme ... 187 piconv -r encoding 188 piconv -D ... 189 piconv -h 190 191=head1 DESCRIPTION 192 193B<piconv> is perl version of B<iconv>, a character encoding converter 194widely available for various Unixen today. This script was primarily 195a technology demonstrator for Perl 5.8.0, but you can use piconv in the 196place of iconv for virtually any case. 197 198piconv converts the character encoding of either STDIN or files 199specified in the argument and prints out to STDOUT. 200 201Here is the list of options. Each option can be in short format (-f) 202or long (--from). 203 204=over 4 205 206=item -f,--from from_encoding 207 208Specifies the encoding you are converting from. Unlike B<iconv>, 209this option can be omitted. In such cases, the current locale is used. 210 211=item -t,--to to_encoding 212 213Specifies the encoding you are converting to. Unlike B<iconv>, 214this option can be omitted. In such cases, the current locale is used. 215 216Therefore, when both -f and -t are omitted, B<piconv> just acts 217like B<cat>. 218 219=item -s,--string I<string> 220 221uses I<string> instead of file for the source of text. 222 223=item -l,--list 224 225Lists all available encodings, one per line, in case-insensitive 226order. Note that only the canonical names are listed; many aliases 227exist. For example, the names are case-insensitive, and many standard 228and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850" 229instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> 230for a full discussion. 231 232=item -C,--check I<N> 233 234Check the validity of the stream if I<N> = 1. When I<N> = -1, something 235interesting happens when it encounters an invalid character. 236 237=item -c 238 239Same as C<-C 1>. 240 241=item -p,--perlqq 242 243=item --htmlcref 244 245=item --xmlcref 246 247Applies PERLQQ, HTMLCREF, XMLCREF, respectively. Try 248 249 piconv -f utf8 -t ascii --perlqq 250 251To see what it does. 252 253=item -h,--help 254 255Show usage. 256 257=item -D,--debug 258 259Invokes debugging mode. Primarily for Encode hackers. 260 261=item -S,--scheme scheme 262 263Selects which scheme is to be used for conversion. Available schemes 264are as follows: 265 266=over 4 267 268=item from_to 269 270Uses Encode::from_to for conversion. This is the default. 271 272=item decode_encode 273 274Input strings are decode()d then encode()d. A straight two-step 275implementation. 276 277=item perlio 278 279The new perlIO layer is used. NI-S' favorite. 280 281You should use this option if you are using UTF-16 and others which 282linefeed is not $/. 283 284=back 285 286Like the I<-D> option, this is also for Encode hackers. 287 288=back 289 290=head1 SEE ALSO 291 292L<iconv(1)> 293L<locale(3)> 294L<Encode> 295L<Encode::Supported> 296L<Encode::Alias> 297L<PerlIO> 298 299=cut 300