1#
2# $Id: Jcode.pm,v 1.4 2004/02/29 01:07:41 takezoe Exp $
3#
4
5=head1 NAME
6
7Jcode - Japanese Charset Handler
8
9=head1 SYNOPSIS
10
11 use Jcode;
12 #
13 # traditional
14 Jcode::convert(\$str, $ocode, $icode, "z");
15 # or OOP!
16 print Jcode->new($str)->h2z->tr($from, $to)->utf8;
17
18=cut
19
20=head1 DESCRIPTION
21
22Jcode.pm supports both object and traditional approach.
23With object approach, you can go like;
24
25$iso_2022_jp = Jcode->new($str)->h2z->jis;
26
27Which is more elegant than;
28
29$iso_2022_jp = &jcode::convert(\$str,'jis',jcode::getcode(\str), "z");
30
31For those unfamiliar with objects, Jcode.pm still supports getcode()
32and convert().
33
34=cut
35
36package Jcode;
37use 5.004;
38use Carp;
39use Jcode::H2Z;
40use Jcode::Tr;
41use strict;
42use vars qw($RCSID $VERSION $DEBUG);
43
44$RCSID = q$Id: Jcode.pm,v 1.4 2004/02/29 01:07:41 takezoe Exp $;
45$VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
46$DEBUG = 0;
47
48use Exporter;
49use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
50@ISA         = qw(Exporter);
51@EXPORT      = qw(jcode getcode);
52@EXPORT_OK   = qw($RCSID $VERSION $DEBUG);
53%EXPORT_TAGS = ( all => [ @EXPORT_OK, @EXPORT ] );
54
55
56use vars qw($USE_CACHE $NOXS);
57
58$USE_CACHE = 1;
59$NOXS = 1;
60
61print $RCSID, "\n" if $DEBUG;
62
63use Jcode::Constants qw(:all);
64
65use overload
66    q("") => sub { ${$_[0]->[0]} },
67    q(==) => sub {overload::StrVal($_[0]) eq overload::StrVal($_[1])},
68    q(=)  => sub { $_[0]->set( $_[1] ) },
69    q(.=) => sub { $_[0]->append( $_[1] ) },
70    fallback => 1,
71    ;
72
73=head1 Methods
74
75Methods mentioned here all return Jcode object unless otherwise mentioned.
76
77=over 4
78
79=item $j = Jcode-E<gt>new($str [, $icode]);
80
81Creates Jcode object $j from $str.  Input code is automatically checked
82unless you explicitly set $icode. For available charset, see L<getcode>
83below.
84
85The object keeps the string in EUC format enternaly.  When the object
86itself is evaluated, it returns the EUC-converted string so you can
87"print $j;" without calling access method if you are using EUC
88(thanks to function overload).
89
90=item Passing Reference
91
92Instead of scalar value, You can use reference as
93
94Jcode->new(\$str);
95
96This saves time a little bit.  In exchange of the value of $str being
97converted. (In a way, $str is now "tied" to jcode object).
98
99=item $j-E<gt>set($str [, $icode]);
100
101Sets $j's internal string to $str.  Handy when you use Jcode object repeatedly
102(saves time and memory to create object).
103
104 # converts mailbox to SJIS format
105 my $jconv = new Jcode;
106 $/ = 00;
107 while(&lt;&gt;){
108     print $jconv->set(\$_)->mime_decode->sjis;
109 }
110
111=item $j-E<gt>append($str [, $icode]);
112
113Appends $str to $j's internal string.
114
115=back
116
117=cut
118
119sub new {
120    my $class = shift;
121    my ($thingy, $icode) = @_;
122    my $r_str = ref $thingy ? $thingy : \$thingy;
123    my $nmatch;
124    ($icode, $nmatch) = getcode($r_str) unless $icode;
125    convert($r_str, 'euc', $icode);
126    my $self = [
127	$r_str,
128	$icode,
129	$nmatch,
130    ];
131    carp "Object of class $class created" if $DEBUG >= 2;
132    bless $self, $class;
133}
134
135sub r_str  { $_[0]->[0] }
136sub icode  { $_[0]->[1] }
137sub nmatch { $_[0]->[2] }
138
139sub set {
140    my $self = shift;
141    my ($thingy, $icode) = @_;
142    my $r_str = ref $thingy ? $thingy : \$thingy;
143    my $nmatch;
144    ($icode, $nmatch) = getcode($r_str) unless $icode;
145    convert($r_str, 'euc', $icode);
146    $self->[0] = $r_str;
147    $self->[1] = $icode;
148    $self->[2] = $nmatch;
149    return $self;
150}
151
152sub append {
153    my $self = shift;
154    my ($thingy, $icode) = @_;
155    my $r_str = ref $thingy ? $thingy : \$thingy;
156    my $nmatch;
157    ($icode, $nmatch) = getcode($r_str) unless $icode;
158    convert($r_str, 'euc', $icode);
159    ${$self->[0]} .= $$r_str;
160    $self->[1] = $icode;
161    $self->[2] = $nmatch;
162    return $self;
163}
164
165=over 4
166
167=item $j = jcode($str [, $icode]);
168
169shortcut for Jcode->new() so you can go like;
170
171$sjis = jcode($str)->sjis;
172
173=item $euc = $j-E<gt>euc;
174
175=item $jis = $j-E<gt>jis;
176
177=item $sjis = $j-E<gt>sjis;
178
179What you code is what you get :)
180
181=item $iso_2022_jp = $j-E<gt>iso_2022_jp
182
183Same as $j->z2h->jis.
184Hankaku Kanas are forcibly converted to Zenkaku.
185
186=back
187
188=cut
189
190sub jcode { return Jcode->new(@_) }
191sub euc   { return ${$_[0]->[0]} }
192sub jis   { return  &euc_jis(${$_[0]->[0]})}
193sub sjis  { return &euc_sjis(${$_[0]->[0]})}
194sub iso_2022_jp{return $_[0]->h2z->jis}
195
196=over 4
197
198=item [@lines =] $jcode-E<gt>jfold([$bytes_per_line, $newline_str]);
199
200folds lines in jcode string every $bytes_per_line (default: 72)
201in a way that does not clobber the multibyte string.
202(Sorry, no Kinsoku done!)
203with a newline string spified by $newline_str (default: \n).
204
205=back
206
207=cut
208
209sub jfold{
210    my $self = shift;
211    my ($bpl, $nl) = @_;
212    $bpl ||= 72;
213    $nl  ||= "\n";
214    my $r_str = $self->[0];
215    my (@lines, $len, $i);
216    while ($$r_str =~
217	   m/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|[\x00-\xff])/sgo)
218    {
219	if ($len + length($1) > $bpl){ # fold!
220	    $i++;
221	    $len = 0;
222	}
223	$lines[$i] .= $1;
224	$len += length($1);
225    }
226    defined($lines[$i]) or pop @lines;
227    $$r_str = join($nl, @lines);
228    return wantarray ? @lines : $self;
229}
230
231=pod
232
233=over 4
234
235=item $length = $jcode-E<gt>jlength();
236
237returns character length properly, rather than byte length.
238
239=back
240
241=cut
242
243sub jlength {
244    my $self = shift;
245    my $r_str = $self->[0];
246    return scalar (my @char = $$r_str =~ m/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|[\x00-\xff])/sgo);
247}
248
249=head2 Methods that use MIME::Base64
250
251To use methods below, you need MIME::Base64.  To install, simply
252
253   perl -MCPAN -e 'CPAN::Shell->install("MIME::Base64")'
254
255=over 4
256
257=item $mime_header = $j-E<gt>mime_encode([$lf, $bpl]);
258
259Converts $str to MIME-Header documented in RFC1522.
260When $lf is specified, it uses $lf to fold line (default: \n).
261When $bpl is specified, it uses $bpl for the number of bytes (default: 76;
262this number must be smaller than 76).
263
264=item $j-E<gt>mime_decode;
265
266Decodes MIME-Header in Jcode object.
267
268You can retrieve the number of matches via $j->nmatch;
269
270=back
271
272=cut
273
274sub mime_encode{
275    my $self = shift;
276    my $r_str = $self->[0];
277    my $lf  = shift || "\n";
278    my $bpl = shift || 76;
279
280    my ($trailing_crlf) = ($$r_str =~ /(\n|\r|\x0d\x0a)$/o);
281    my $str  = _mime_unstructured_header($$r_str, $lf, $bpl);
282    not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o;
283    $str;
284}
285
286#
287# shamelessly stolen from
288# http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
289#
290
291sub _add_encoded_word {
292    require MIME::Base64;
293    my($str, $line, $bpl) = @_;
294    my $result = '';
295    while (length($str)) {
296	my $target = $str;
297	$str = '';
298	if (length($line) + 22 +
299	    ($target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o) * 8 > $bpl) {
300	    $line =~ s/[ \t\n\r]*$/\n/;
301	    $result .= $line;
302	    $line = ' ';
303	}
304	while (1) {
305	    my $iso_2022_jp = jcode($target, 'euc')->iso_2022_jp;
306	    if (my $count = ($iso_2022_jp =~ tr/\x80-\xff//d)){
307		$DEBUG and warn $count;
308		$target = jcode($iso_2022_jp, 'iso_2022_jp')->euc;
309	    }
310	    my $encoded = '=?ISO-2022-JP?B?' .
311	      MIME::Base64::encode_base64($iso_2022_jp, '')
312		  . '?=';
313	    if (length($encoded) + length($line) > $bpl) {
314		$target =~
315		    s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o;
316		$str = $1 . $str;
317	    } else {
318		$line .= $encoded;
319		last;
320	    }
321	}
322    }
323    return $result . $line;
324}
325
326sub _mime_unstructured_header {
327    my ($oldheader, $lf, $bpl) = @_;
328    my(@words, @wordstmp, $i);
329    my $header = '';
330    $oldheader =~ s/\s+$//;
331    @wordstmp = split /\s+/, $oldheader;
332    for ($i = 0; $i < $#wordstmp; $i++) {
333	if ($wordstmp[$i] !~ /^[\x21-\x7E]+$/ and
334	    $wordstmp[$i + 1] !~ /^[\x21-\x7E]+$/) {
335	    $wordstmp[$i + 1] = "$wordstmp[$i] $wordstmp[$i + 1]";
336	} else {
337	    push(@words, $wordstmp[$i]);
338	}
339    }
340    push(@words, $wordstmp[-1]);
341    for my $word (@words) {
342	if ($word =~ /^[\x21-\x7E]+$/) {
343	    $header =~ /(?:.*\n)*(.*)/;
344	    if (length($1) + length($word) > $bpl) {
345		$header .= "$lf $word";
346	    } else {
347		$header .= $word;
348	    }
349	} else {
350	    $header = _add_encoded_word($word, $header, $bpl);
351	}
352	$header =~ /(?:.*\n)*(.*)/;
353	if (length($1) == $bpl) {
354	    $header .= "$lf ";
355	} else {
356	    $header .= ' ';
357	}
358    }
359    $header =~ s/\n? $/\n/;
360    $header;
361}
362
363# see http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
364#$lws = '(?:(?:\x0d\x0a)?[ \t])+';
365#$ew_regex = '=\?ISO-2022-JP\?B\?([A-Za-z0-9+/]+=*)\?=';
366#$str =~ s/($ew_regex)$lws(?=$ew_regex)/$1/gio;
367#$str =~ s/$lws/ /go; $str =~ s/$ew_regex/decode_base64($1)/egio;
368
369sub mime_decode{
370    require MIME::Base64; # not use
371    my $self = shift;
372    my $r_str = $self->[0];
373    my $re_lws = '(?:(?:\r|\n|\x0d\x0a)?[ \t])+';
374    my $re_ew = '=\?[Ii][Ss][Oo]-2022-[Jj][Pp]\?[Bb]\?([A-Za-z0-9+/]+=*)\?=';
375    $$r_str =~ s/($re_ew)$re_lws(?=$re_ew)/$1/sgo;
376    $$r_str =~ s/$re_lws/ /go;
377    $self->[2] =
378	($$r_str =~
379	 s/$re_ew/jis_euc(MIME::Base64::decode_base64($1))/ego
380	 );
381    $self;
382}
383
384
385=head2 Methods implemented by Jcode::H2Z
386
387Methods below are actually implemented in Jcode::H2Z.
388
389=over 4
390
391=item $j-E<gt>h2z([$keep_dakuten]);
392
393Converts X201 kana (Hankaku) to X208 kana (Zenkaku).
394When $keep_dakuten is set, it leaves dakuten as is
395(That is, "ka + dakuten" is left as is instead of
396being converted to "ga")
397
398You can retrieve the number of matches via $j->nmatch;
399
400=item $j-E<gt>z2h;
401
402Converts X208 kana (Zenkaku) to X201 kana (Hankaku).
403
404You can retrieve the number of matches via $j->nmatch;
405
406=back
407
408=cut
409
410sub h2z {
411    require Jcode::H2Z; # not use
412    my $self = shift;
413    $self->[2] = Jcode::H2Z::h2z($self->[0], @_);
414    return $self;
415}
416
417
418sub z2h {
419    require Jcode::H2Z; # not use
420    my $self = shift;
421    $self->[2] =  &Jcode::H2Z::z2h($self->[0], @_);
422    return $self;
423}
424
425
426=head2 Methods implemented in Jcode::Tr
427
428Methods here are actually implemented in Jcode::Tr.
429
430=over 4
431
432=item  $j-E<gt>tr($from, $to);
433
434Applies tr on Jcode object. $from and $to can contain EUC Japanese.
435
436You can retrieve the number of matches via $j->nmatch;
437
438=back
439
440=cut
441
442sub tr{
443    require Jcode::Tr; # not use
444    my $self = shift;
445    $self->[2] = Jcode::Tr::tr($self->[0], @_);
446    return $self;
447}
448
449#
450# load needed module depending on the configuration just once!
451#
452
453use vars qw(%PKG_LOADED);
454sub load_module{
455    my $pkg = shift;
456    return $pkg if $PKG_LOADED{$pkg}++;
457    unless ($NOXS){
458	eval qq( require $pkg; );
459	unless ($@){
460	    carp "$pkg loaded." if $DEBUG;
461	    return $pkg;
462	}
463    }
464    $pkg .= "::NoXS";
465    eval qq( require $pkg; );
466    unless ($@){
467	carp "$pkg loaded" if $DEBUG;
468    }else{
469	croak "Loading $pkg failed!";
470    }
471    $pkg;
472}
473
474=head2 Methods implemented in Jcode::Unicode
475
476If your perl does not support XS (or you can't C<perl Makefile.PL>,
477Jcode::Unicode::NoXS will be used.
478
479See L<Jcode::Unicode> and L<Jcode::Unicode::NoXS> for details
480
481=over 4
482
483=item $ucs2 = $j-E<gt>ucs2;
484
485Returns UCS2 (Raw Unicode) string.
486
487=item $ucs2 = $j-E<gt>utf8;
488
489Returns utf8 String.
490
491=back
492
493=cut
494
495sub ucs2{
496    load_module("Jcode::Unicode");
497    euc_ucs2(${$_[0]->[0]});
498}
499
500sub utf8{
501    load_module("Jcode::Unicode");
502    euc_utf8(${$_[0]->[0]});
503}
504
505=head2 Instance Variables
506
507If you need to access instance variables of Jcode object, use access
508methods below instead of directly accessing them (That's what OOP
509is all about)
510
511FYI, Jcode uses a ref to array instead of ref to hash (common way) to
512optimize speed (Actually you don't have to know as long as you use
513access methods instead;  Once again, that's OOP)
514
515=over 4
516
517=item $j-E<gt>r_str
518
519Reference to the EUC-coded String.
520
521=item $j-E<gt>icode
522
523Input charcode in recent operation.
524
525=item $j-E<gt>nmatch
526
527Number of matches (Used in $j->tr, etc.)
528
529=back
530
531=cut
532
533=head1 Subroutines
534
535=over 4
536
537=item ($code, [$nmatch]) = getcode($str);
538
539Returns char code of $str. Return codes are as follows
540
541 ascii   Ascii (Contains no Japanese Code)
542 binary  Binary (Not Text File)
543 euc     EUC-JP
544 sjis    SHIFT_JIS
545 jis     JIS (ISO-2022-JP)
546 ucs2    UCS2 (Raw Unicode)
547 utf8    UTF8
548
549When array context is used instead of scaler, it also returns how many
550character codes are found.  As mentioned above, $str can be \$str
551instead.
552
553B<jcode.pl Users:>  This function is 100% upper-conpatible with
554jcode::getcode() -- well, almost;
555
556 * When its return value is an array, the order is the opposite;
557   jcode::getcode() returns $nmatch first.
558
559 * jcode::getcode() returns 'undef' when the number of EUC characters
560   is equal to that of SJIS.  Jcode::getcode() returns EUC.  for
561   Jcode.pm there is no in-betweens.
562
563=item Jcode::convert($str, [$ocode, $icode, $opt]);
564
565Converts $str to char code specified by $ocode.  When $icode is specified
566also, it assumes $icode for input string instead of the one checked by
567getcode(). As mentioned above, $str can be \$str instead.
568
569B<jcode.pl Users:>  This function is 100% upper-conpatible with
570jcode::convert() !
571
572=back
573
574=cut
575
576sub getcode {
577    my $thingy = shift;
578    my $r_str = ref $thingy ? $thingy : \$thingy;
579
580    my ($code, $nmatch, $sjis, $euc, $utf8) = ("", 0, 0, 0, 0);
581    if ($$r_str =~ /$RE{BIN}/o) {	# 'binary'
582	my $ucs2;
583	$ucs2 += length($1)
584	    while $$r_str =~ /(\x00$RE{ASCII})+/go;
585	if ($ucs2){      # smells like raw unicode
586	    ($code, $nmatch) = ('ucs2', $ucs2);
587	}else{
588	    ($code, $nmatch) = ('binary', 0);
589	 }
590    }
591    elsif ($$r_str !~ /[\e\x80-\xff]/o) {	# not Japanese
592	($code, $nmatch) = ('ascii', 1);
593    }				# 'jis'
594    elsif ($$r_str =~
595	   m[
596	     $RE{JIS_0208}|$RE{JIS_0212}|$RE{JIS_ASC}|$RE{JIS_KANA}
597	   ]ox)
598    {
599	($code, $nmatch) = ('jis', 1);
600    }
601    else { # should be euc|sjis|utf8
602	# use of (?:) by Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp>
603	$sjis += length($1)
604	    while $$r_str =~ /((?:$RE{SJIS_C})+)/go;
605	$euc  += length($1)
606	    while $$r_str =~ /((?:$RE{EUC_C}|$RE{EUC_KANA}|$RE{EUC_0212})+)/go;
607	$utf8  += length($1)
608	    while $$r_str =~ /((?:$RE{UTF8})+)/go;
609	$nmatch = _max($utf8, $sjis, $euc);
610	carp ">DEBUG:sjis = $sjis, euc = $euc, utf8 = $utf8" if $DEBUG >= 3;
611	$code =
612	    ($euc > $sjis and $euc > $utf8) ? 'euc' :
613		($sjis > $euc and $sjis > $utf8) ? 'sjis' :
614		    ($utf8 > $euc and $utf8 > $sjis) ? 'utf8' : undef;
615    }
616    return wantarray ? ($code, $nmatch) : $code;
617}
618
619sub convert{
620    my $thingy = shift;
621    my $r_str = ref $thingy ? $thingy : \$thingy;
622    my ($ocode, $icode, $opt) = @_;
623
624    my $nmatch;
625    ($icode, $nmatch) = getcode($r_str) unless $icode;
626
627    return $$r_str if $icode eq $ocode and !defined $opt; # do nothin'
628
629    no strict qw(refs);
630    my $method;
631
632    # convert to EUC
633
634    load_module("Jcode::Unicode") if $icode =~ /ucs2|utf8/o;
635    if ($icode and defined &{$method = $icode . "_euc"}){
636	carp "Dispatching \&$method" if $DEBUG >= 2;
637	&{$method}($r_str) ;
638    }
639
640    # h2z or z2h
641
642    if ($opt){
643	my $cmd = ($opt =~ /^z/o) ? "h2z" : ($opt =~ /^h/o) ? "z2h" : undef;
644	if ($cmd){
645	    require Jcode::H2Z;
646	    &{'Jcode::H2Z::' . $cmd}($r_str);
647	}
648    }
649
650    # convert to $ocode
651
652    load_module("Jcode::Unicode") if $ocode =~ /ucs2|utf8/o;
653    if ($ocode and defined &{$method = "euc_" . $ocode}){
654	carp "Dispatching \&$method" if $DEBUG >= 2;
655	&{$method}($r_str) ;
656    }
657    $$r_str;
658}
659
660# JIS<->EUC
661
662sub jis_euc {
663    my $thingy = shift;
664    my $r_str = ref $thingy ? $thingy : \$thingy;
665    $$r_str =~ s(
666		 ($RE{JIS_0212}|$RE{JIS_0208}|$RE{JIS_ASC}|$RE{JIS_KANA})
667		 ([^\e]*)
668		 )
669    {
670	my ($esc, $str) = ($1, $2);
671	if ($esc !~ /$RE{JIS_ASC}/o) {
672	    $str =~ tr/\x21-\x7e/\xa1-\xfe/;
673	    if ($esc =~ /$RE{JIS_KANA}/o) {
674		$str =~ s/([\xa1-\xdf])/\x8e$1/og;
675	    }
676	    elsif ($esc =~ /$RE{JIS_0212}/o) {
677		$str =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og;
678	    }
679	}
680	$str;
681    }geox;
682    $$r_str;
683}
684
685#
686# euc_jis
687#
688# Based upon the contribution of
689# Kazuto Ichimura <ichimura@shimada.nuee.nagoya-u.ac.jp>
690# optimized by <ohzaki@iod.ricoh.co.jp>
691
692sub euc_jis{
693    my $thingy = shift;
694    my $r_str = ref $thingy ? $thingy : \$thingy;
695    $$r_str =~ s{
696	((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
697	}{
698	    my $str = $1;
699	    my $esc =
700		( $str =~ tr/\x8E//d ) ? $ESC{KANA} :
701		    ( $str =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
702			$ESC{JIS_0208};
703	    $str =~ tr/\xA1-\xFE/\x21-\x7E/;
704	    $esc . $str . $ESC{ASC};
705	}geox;
706    $$r_str =~
707	s/\Q$ESC{ASC}\E
708	    (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox;
709    $$r_str;
710}
711
712# EUC<->SJIS
713
714my %_S2E = ();
715my %_E2S = ();
716
717sub sjis_euc {
718    my $thingy = shift;
719    my $r_str = ref $thingy ? $thingy : \$thingy;
720    $$r_str =~ s(
721		 ($RE{SJIS_C}|$RE{SJIS_KANA})
722	     )
723    {
724	my $str = $1;
725	unless ($_S2E{$1}){
726	    my ($c1, $c2) = unpack('CC', $str);
727	    if (0xa1 <= $c1 && $c1 <= 0xdf) {
728		$c2 = $c1;
729		$c1 = 0x8e;
730	    } elsif (0x9f <= $c2) {
731		$c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60);
732		$c2 += 2;
733	    } else {
734		$c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61);
735		$c2 += 0x60 + ($c2 < 0x7f);
736	    }
737	    $_S2E{$str} = pack('CC', $c1, $c2);
738	}
739	$_S2E{$str};
740    }geox;
741    $$r_str;
742}
743
744#
745
746sub euc_sjis {
747    my $thingy = shift;
748    my $r_str = ref $thingy ? $thingy : \$thingy;
749    $$r_str =~ s(
750		 ($RE{EUC_C}|$RE{EUC_KANA}|$RE{EUC_0212})
751		 )
752    {
753	my $str = $1;
754	unless ($_E2S{$str}){
755	    my ($c1, $c2) = unpack('CC', $str);
756	    if ($c1 == 0x8e) {          # SS2
757		$_E2S{$str} = chr($c2);
758	    } elsif ($c1 == 0x8f) {     # SS3
759		$_E2S{$str} = $CHARCODE{UNDEF_SJIS};
760	    }else { #SS1 or X0208
761		if ($c1 % 2) {
762		    $c1 = ($c1>>1) + ($c1 < 0xdf ? 0x31 : 0x71);
763		    $c2 -= 0x60 + ($c2 < 0xe0);
764		} else {
765		    $c1 = ($c1>>1) + ($c1 < 0xdf ? 0x30 : 0x70);
766		    $c2 -= 2;
767		}
768		$_E2S{$str} = pack('CC', $c1, $c2);
769	    }
770	}
771	$_E2S{$str};
772    }geox;
773    $$r_str;
774}
775
776#
777# Util. Functions
778#
779
780sub _max {
781    my $result = shift;
782    for my $n (@_){
783	$result = $n if $n > $result;
784    }
785    return $result;
786}
787
7881;
789
790__END__
791
792=head1 BUGS
793
794Unicode support by Jcode is far from efficient!
795
796=head1 IN FUTURE
797
798Hopefully Jcode will be superceded by Encode module that is part of
799the standard module on Perl 5.7 and up
800
801=head1 ACKNOWLEDGEMENTS
802
803This package owes a lot in motivation, design, and code, to the jcode.pl
804for Perl4 by Kazumasa Utashiro <utashiro@iij.ad.jp>.
805
806Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp> has helped me polish regexp from the
807very first stage of development.
808
809And folks at Jcode Mailing list <jcode5@ring.gr.jp>.  Without them, I
810couldn't have coded this far.
811
812=head1 SEE ALSO
813
814L<Jcode::Unicode>
815
816L<Jcode::Unicode::NoXS>
817
818http://www.iana.org/assignments/character-sets
819
820L<Encode>
821
822=head1 COPYRIGHT
823
824Copyright 1999 Dan Kogai <dankogai@dan.co.jp>
825
826This library is free software; you can redistribute it
827and/or modify it under the same terms as Perl itself.
828
829=cut
830