xref: /openbsd/gnu/usr.bin/perl/cpan/Encode/bin/piconv (revision 3d8817e4)
1#!./perl
2# $Id: piconv,v 2.4 2009/07/08 13:34:15 dankogai Exp $
3#
4use 5.8.0;
5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme =  map {$_ => 1} qw(from_to decode_encode perlio);
9
10use File::Basename;
11my $name = basename($0);
12
13use Getopt::Long qw(:config no_ignore_case);
14
15my %Opt;
16
17help()
18    unless
19      GetOptions(\%Opt,
20         'from|f=s',
21         'to|t=s',
22         'list|l',
23         'string|s=s',
24         'check|C=i',
25         'c',
26         'perlqq|p',
27         'htmlcref',
28         'xmlcref',
29         'debug|D',
30         'scheme|S=s',
31         'resolve|r=s',
32         'help',
33         );
34
35$Opt{help} and help();
36$Opt{list} and list_encodings();
37my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
38defined $Opt{resolve} and resolve_encoding($Opt{resolve});
39$Opt{from} || $Opt{to} || help();
40my $from = $Opt{from} || $locale or help("from_encoding unspecified");
41my $to   = $Opt{to}   || $locale or help("to_encoding unspecified");
42$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
43my $scheme = do {
44    if (defined $Opt{scheme}) {
45	if (!exists $Scheme{$Opt{scheme}}) {
46	    warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n";
47	    'from_to';
48	} else {
49	    $Opt{scheme};
50	}
51    } else {
52	'from_to';
53    }
54};
55
56$Opt{check} ||= $Opt{c};
57$Opt{perlqq}   and $Opt{check} = Encode::PERLQQ;
58$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
59$Opt{xmlcref}  and $Opt{check} = Encode::XMLCREF;
60
61if ($Opt{debug}){
62    my $cfrom = Encode->getEncoding($from)->name;
63    my $cto   = Encode->getEncoding($to)->name;
64    print <<"EOT";
65Scheme: $scheme
66From:   $from => $cfrom
67To:     $to => $cto
68EOT
69}
70
71my %use_bom = map { $_ => 1 } qw/UTF-16 UTF-32/;
72
73# we do not use <> (or ARGV) for the sake of binmode()
74@ARGV or push @ARGV, \*STDIN;
75
76unless ( $scheme eq 'perlio' ) {
77    binmode STDOUT;
78    my $need2slurp = $use_bom{ find_encoding($to)->name };
79    for my $argv (@ARGV) {
80        my $ifh = ref $argv ? $argv : undef;
81	$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
82        $ifh or open $ifh, "<", $argv or next;
83        binmode $ifh;
84        if ( $scheme eq 'from_to' ) {    # default
85	    if ($need2slurp){
86		local $/;
87		$_ = <$ifh>;
88		Encode::from_to( $_, $from, $to, $Opt{check} );
89		print;
90	    }else{
91		while (<$ifh>) {
92		    Encode::from_to( $_, $from, $to, $Opt{check} );
93		    print;
94		}
95	    }
96        }
97        elsif ( $scheme eq 'decode_encode' ) {    # step-by-step
98	    if ($need2slurp){
99		local $/;
100		$_ = <$ifh>;
101                my $decoded = decode( $from, $_, $Opt{check} );
102                my $encoded = encode( $to, $decoded );
103                print $encoded;
104	    }else{
105		while (<$ifh>) {
106		    my $decoded = decode( $from, $_, $Opt{check} );
107		    my $encoded = encode( $to, $decoded );
108		    print $encoded;
109		}
110	    }
111	}
112	else {                                    # won't reach
113            die "$name: unknown scheme: $scheme";
114        }
115    }
116}
117else {
118
119    # NI-S favorite
120    binmode STDOUT => "raw:encoding($to)";
121    for my $argv (@ARGV) {
122        my $ifh = ref $argv ? $argv : undef;
123	$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
124        $ifh or open $ifh, "<", $argv or next;
125        binmode $ifh => "raw:encoding($from)";
126        print while (<$ifh>);
127    }
128}
129
130sub list_encodings {
131    print join( "\n", Encode->encodings(":all") ), "\n";
132    exit 0;
133}
134
135sub resolve_encoding {
136    if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
137        print $alias, "\n";
138        exit 0;
139    }
140    else {
141        warn "$name: $_[0] is not known to Encode\n";
142        exit 1;
143    }
144}
145
146sub help {
147    my $message = shift;
148    $message and print STDERR "$name error: $message\n";
149    print STDERR <<"EOT";
150$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
151$name -l
152$name -r encoding_alias
153  -l,--list
154     lists all available encodings
155  -r,--resolve encoding_alias
156    resolve encoding to its (Encode) canonical name
157  -f,--from from_encoding
158     when omitted, the current locale will be used
159  -t,--to to_encoding
160     when omitted, the current locale will be used
161  -s,--string string
162     "string" will be the input instead of STDIN or files
163The following are mainly of interest to Encode hackers:
164  -D,--debug          show debug information
165  -C N | -c           check the validity of the input
166  -S,--scheme scheme  use the scheme for conversion
167Those are handy when you can only see ascii characters:
168  -p,--perlqq
169  --htmlcref
170  --xmlcref
171EOT
172    exit;
173}
174
175__END__
176
177=head1 NAME
178
179piconv -- iconv(1), reinvented in perl
180
181=head1 SYNOPSIS
182
183  piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
184  piconv -l
185  piconv [-C N|-c|-p]
186  piconv -S scheme ...
187  piconv -r encoding
188  piconv -D ...
189  piconv -h
190
191=head1 DESCRIPTION
192
193B<piconv> is perl version of B<iconv>, a character encoding converter
194widely available for various Unixen today.  This script was primarily
195a technology demonstrator for Perl 5.8.0, but you can use piconv in the
196place of iconv for virtually any case.
197
198piconv converts the character encoding of either STDIN or files
199specified in the argument and prints out to STDOUT.
200
201Here is the list of options.  Each option can be in short format (-f)
202or long (--from).
203
204=over 4
205
206=item -f,--from from_encoding
207
208Specifies the encoding you are converting from.  Unlike B<iconv>,
209this option can be omitted.  In such cases, the current locale is used.
210
211=item -t,--to to_encoding
212
213Specifies the encoding you are converting to.  Unlike B<iconv>,
214this option can be omitted.  In such cases, the current locale is used.
215
216Therefore, when both -f and -t are omitted, B<piconv> just acts
217like B<cat>.
218
219=item -s,--string I<string>
220
221uses I<string> instead of file for the source of text.
222
223=item -l,--list
224
225Lists all available encodings, one per line, in case-insensitive
226order.  Note that only the canonical names are listed; many aliases
227exist.  For example, the names are case-insensitive, and many standard
228and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
229instead of "cp850", or "winlatin1" for "cp1252".  See L<Encode::Supported>
230for a full discussion.
231
232=item -C,--check I<N>
233
234Check the validity of the stream if I<N> = 1.  When I<N> = -1, something
235interesting happens when it encounters an invalid character.
236
237=item -c
238
239Same as C<-C 1>.
240
241=item -p,--perlqq
242
243=item --htmlcref
244
245=item --xmlcref
246
247Applies PERLQQ, HTMLCREF, XMLCREF, respectively.  Try
248
249  piconv -f utf8 -t ascii --perlqq
250
251To see what it does.
252
253=item -h,--help
254
255Show usage.
256
257=item -D,--debug
258
259Invokes debugging mode.  Primarily for Encode hackers.
260
261=item -S,--scheme scheme
262
263Selects which scheme is to be used for conversion.  Available schemes
264are as follows:
265
266=over 4
267
268=item from_to
269
270Uses Encode::from_to for conversion.  This is the default.
271
272=item decode_encode
273
274Input strings are decode()d then encode()d.  A straight two-step
275implementation.
276
277=item perlio
278
279The new perlIO layer is used.  NI-S' favorite.
280
281You should use this option if you are using UTF-16 and others which
282linefeed is not $/.
283
284=back
285
286Like the I<-D> option, this is also for Encode hackers.
287
288=back
289
290=head1 SEE ALSO
291
292L<iconv(1)>
293L<locale(3)>
294L<Encode>
295L<Encode::Supported>
296L<Encode::Alias>
297L<PerlIO>
298
299=cut
300