xref: /openbsd/gnu/usr.bin/perl/cpan/Encode/bin/piconv (revision 5af055cd)
1#!./perl
2# $Id: piconv,v 2.6 2014/03/28 02:37:42 dankogai Exp $
3#
4use 5.8.0;
5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme =  map {$_ => 1} qw(from_to decode_encode perlio);
9
10use File::Basename;
11my $name = basename($0);
12
13use Getopt::Long qw(:config no_ignore_case);
14
15my %Opt;
16
17help()
18    unless
19      GetOptions(\%Opt,
20         'from|f=s',
21         'to|t=s',
22         'list|l',
23         'string|s=s',
24         'check|C=i',
25         'c',
26         'perlqq|p',
27         'htmlcref',
28         'xmlcref',
29         'debug|D',
30         'scheme|S=s',
31         'resolve|r=s',
32         'help',
33         );
34
35$Opt{help} and help();
36$Opt{list} and list_encodings();
37my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
38defined $Opt{resolve} and resolve_encoding($Opt{resolve});
39$Opt{from} || $Opt{to} || help();
40my $from = $Opt{from} || $locale or help("from_encoding unspecified");
41my $to   = $Opt{to}   || $locale or help("to_encoding unspecified");
42$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
43my $scheme = do {
44    if (defined $Opt{scheme}) {
45	if (!exists $Scheme{$Opt{scheme}}) {
46	    warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n";
47	    'from_to';
48	} else {
49	    $Opt{scheme};
50	}
51    } else {
52	'from_to';
53    }
54};
55
56$Opt{check} ||= $Opt{c};
57$Opt{perlqq}   and $Opt{check} = Encode::PERLQQ;
58$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
59$Opt{xmlcref}  and $Opt{check} = Encode::XMLCREF;
60
61if ($Opt{debug}){
62    my $cfrom = Encode->getEncoding($from)->name;
63    my $cto   = Encode->getEncoding($to)->name;
64    print <<"EOT";
65Scheme: $scheme
66From:   $from => $cfrom
67To:     $to => $cto
68EOT
69}
70
71my %use_bom =
72  map { $_ => 1 } qw/UTF-16 UTF-16BE UTF-16LE UTF-32 UTF-32BE UTF-32LE/;
73
74# we do not use <> (or ARGV) for the sake of binmode()
75@ARGV or push @ARGV, \*STDIN;
76
77unless ( $scheme eq 'perlio' ) {
78    binmode STDOUT;
79    my $need2slurp = $use_bom{ find_encoding($to)->name }
80      || $use_bom{ find_encoding($from)->name };
81    for my $argv (@ARGV) {
82        my $ifh = ref $argv ? $argv : undef;
83	$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
84        $ifh or open $ifh, "<", $argv or next;
85        binmode $ifh;
86        if ( $scheme eq 'from_to' ) {    # default
87	    if ($need2slurp){
88		local $/;
89		$_ = <$ifh>;
90		Encode::from_to( $_, $from, $to, $Opt{check} );
91		print;
92	    }else{
93		while (<$ifh>) {
94		    Encode::from_to( $_, $from, $to, $Opt{check} );
95		    print;
96		}
97	    }
98        }
99        elsif ( $scheme eq 'decode_encode' ) {    # step-by-step
100	    if ($need2slurp){
101		local $/;
102		$_ = <$ifh>;
103                my $decoded = decode( $from, $_, $Opt{check} );
104                my $encoded = encode( $to, $decoded );
105                print $encoded;
106	    }else{
107		while (<$ifh>) {
108		    my $decoded = decode( $from, $_, $Opt{check} );
109		    my $encoded = encode( $to, $decoded );
110		    print $encoded;
111		}
112	    }
113	}
114	else {                                    # won't reach
115            die "$name: unknown scheme: $scheme";
116        }
117    }
118}
119else {
120
121    # NI-S favorite
122    binmode STDOUT => "raw:encoding($to)";
123    for my $argv (@ARGV) {
124        my $ifh = ref $argv ? $argv : undef;
125	$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
126        $ifh or open $ifh, "<", $argv or next;
127        binmode $ifh => "raw:encoding($from)";
128        print while (<$ifh>);
129    }
130}
131
132sub list_encodings {
133    print join( "\n", Encode->encodings(":all") ), "\n";
134    exit 0;
135}
136
137sub resolve_encoding {
138    if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
139        print $alias, "\n";
140        exit 0;
141    }
142    else {
143        warn "$name: $_[0] is not known to Encode\n";
144        exit 1;
145    }
146}
147
148sub help {
149    my $message = shift;
150    $message and print STDERR "$name error: $message\n";
151    print STDERR <<"EOT";
152$name [-f from_encoding] [-t to_encoding]
153      [-p|--perlqq|--htmlcref|--xmlcref] [-C N|-c] [-D] [-S scheme]
154      [-s string|file...]
155$name -l
156$name -r encoding_alias
157$name -h
158Common options:
159  -l,--list
160     lists all available encodings
161  -r,--resolve encoding_alias
162    resolve encoding to its (Encode) canonical name
163  -f,--from from_encoding
164     when omitted, the current locale will be used
165  -t,--to to_encoding
166     when omitted, the current locale will be used
167  -s,--string string
168     "string" will be the input instead of STDIN or files
169The following are mainly of interest to Encode hackers:
170  -C N | -c           check the validity of the input
171  -D,--debug          show debug information
172  -S,--scheme scheme  use the scheme for conversion
173Those are handy when you can only see ASCII characters:
174  -p,--perlqq         transliterate characters missing in encoding to \\x{HHHH}
175                      where HHHH is the hexadecimal Unicode code point
176  --htmlcref          transliterate characters missing in encoding to &#NNN;
177                      where NNN is the decimal Unicode code point
178  --xmlcref           transliterate characters missing in encoding to &#xHHHH;
179                      where HHHH is the hexadecimal Unicode code point
180
181EOT
182    exit;
183}
184
185__END__
186
187=head1 NAME
188
189piconv -- iconv(1), reinvented in perl
190
191=head1 SYNOPSIS
192
193  piconv [-f from_encoding] [-t to_encoding]
194         [-p|--perlqq|--htmlcref|--xmlcref] [-C N|-c] [-D] [-S scheme]
195         [-s string|file...]
196  piconv -l
197  piconv -r encoding_alias
198  piconv -h
199
200=head1 DESCRIPTION
201
202B<piconv> is perl version of B<iconv>, a character encoding converter
203widely available for various Unixen today.  This script was primarily
204a technology demonstrator for Perl 5.8.0, but you can use piconv in the
205place of iconv for virtually any case.
206
207piconv converts the character encoding of either STDIN or files
208specified in the argument and prints out to STDOUT.
209
210Here is the list of options.  Some options can be in short format (-f)
211or long (--from) one.
212
213=over 4
214
215=item -f,--from I<from_encoding>
216
217Specifies the encoding you are converting from.  Unlike B<iconv>,
218this option can be omitted.  In such cases, the current locale is used.
219
220=item -t,--to I<to_encoding>
221
222Specifies the encoding you are converting to.  Unlike B<iconv>,
223this option can be omitted.  In such cases, the current locale is used.
224
225Therefore, when both -f and -t are omitted, B<piconv> just acts
226like B<cat>.
227
228=item -s,--string I<string>
229
230uses I<string> instead of file for the source of text.
231
232=item -l,--list
233
234Lists all available encodings, one per line, in case-insensitive
235order.  Note that only the canonical names are listed; many aliases
236exist.  For example, the names are case-insensitive, and many standard
237and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
238instead of "cp850", or "winlatin1" for "cp1252".  See L<Encode::Supported>
239for a full discussion.
240
241=item -r,--resolve I<encoding_alias>
242
243Resolve I<encoding_alias> to Encode canonical encoding name.
244
245=item -C,--check I<N>
246
247Check the validity of the stream if I<N> = 1.  When I<N> = -1, something
248interesting happens when it encounters an invalid character.
249
250=item -c
251
252Same as C<-C 1>.
253
254=item -p,--perlqq
255
256Transliterate characters missing in encoding to \x{HHHH} where HHHH is the
257hexadecimal Unicode code point.
258
259=item --htmlcref
260
261Transliterate characters missing in encoding to &#NNN; where NNN is the
262decimal Unicode code point.
263
264=item --xmlcref
265
266Transliterate characters missing in encoding to &#xHHHH; where HHHH is the
267hexadecimal Unicode code point.
268
269=item -h,--help
270
271Show usage.
272
273=item -D,--debug
274
275Invokes debugging mode.  Primarily for Encode hackers.
276
277=item -S,--scheme I<scheme>
278
279Selects which scheme is to be used for conversion.  Available schemes
280are as follows:
281
282=over 4
283
284=item from_to
285
286Uses Encode::from_to for conversion.  This is the default.
287
288=item decode_encode
289
290Input strings are decode()d then encode()d.  A straight two-step
291implementation.
292
293=item perlio
294
295The new perlIO layer is used.  NI-S' favorite.
296
297You should use this option if you are using UTF-16 and others which
298linefeed is not $/.
299
300=back
301
302Like the I<-D> option, this is also for Encode hackers.
303
304=back
305
306=head1 SEE ALSO
307
308L<iconv(1)>
309L<locale(3)>
310L<Encode>
311L<Encode::Supported>
312L<Encode::Alias>
313L<PerlIO>
314
315=cut
316