1# 2# $Id: Jcode.pm,v 1.4 2004/02/29 01:07:41 takezoe Exp $ 3# 4 5=head1 NAME 6 7Jcode - Japanese Charset Handler 8 9=head1 SYNOPSIS 10 11 use Jcode; 12 # 13 # traditional 14 Jcode::convert(\$str, $ocode, $icode, "z"); 15 # or OOP! 16 print Jcode->new($str)->h2z->tr($from, $to)->utf8; 17 18=cut 19 20=head1 DESCRIPTION 21 22Jcode.pm supports both object and traditional approach. 23With object approach, you can go like; 24 25$iso_2022_jp = Jcode->new($str)->h2z->jis; 26 27Which is more elegant than; 28 29$iso_2022_jp = &jcode::convert(\$str,'jis',jcode::getcode(\str), "z"); 30 31For those unfamiliar with objects, Jcode.pm still supports getcode() 32and convert(). 33 34=cut 35 36package Jcode; 37use 5.004; 38use Carp; 39use Jcode::H2Z; 40use Jcode::Tr; 41use strict; 42use vars qw($RCSID $VERSION $DEBUG); 43 44$RCSID = q$Id: Jcode.pm,v 1.4 2004/02/29 01:07:41 takezoe Exp $; 45$VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 46$DEBUG = 0; 47 48use Exporter; 49use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 50@ISA = qw(Exporter); 51@EXPORT = qw(jcode getcode); 52@EXPORT_OK = qw($RCSID $VERSION $DEBUG); 53%EXPORT_TAGS = ( all => [ @EXPORT_OK, @EXPORT ] ); 54 55 56use vars qw($USE_CACHE $NOXS); 57 58$USE_CACHE = 1; 59$NOXS = 1; 60 61print $RCSID, "\n" if $DEBUG; 62 63use Jcode::Constants qw(:all); 64 65use overload 66 q("") => sub { ${$_[0]->[0]} }, 67 q(==) => sub {overload::StrVal($_[0]) eq overload::StrVal($_[1])}, 68 q(=) => sub { $_[0]->set( $_[1] ) }, 69 q(.=) => sub { $_[0]->append( $_[1] ) }, 70 fallback => 1, 71 ; 72 73=head1 Methods 74 75Methods mentioned here all return Jcode object unless otherwise mentioned. 76 77=over 4 78 79=item $j = Jcode-E<gt>new($str [, $icode]); 80 81Creates Jcode object $j from $str. Input code is automatically checked 82unless you explicitly set $icode. For available charset, see L<getcode> 83below. 84 85The object keeps the string in EUC format enternaly. When the object 86itself is evaluated, it returns the EUC-converted string so you can 87"print $j;" without calling access method if you are using EUC 88(thanks to function overload). 89 90=item Passing Reference 91 92Instead of scalar value, You can use reference as 93 94Jcode->new(\$str); 95 96This saves time a little bit. In exchange of the value of $str being 97converted. (In a way, $str is now "tied" to jcode object). 98 99=item $j-E<gt>set($str [, $icode]); 100 101Sets $j's internal string to $str. Handy when you use Jcode object repeatedly 102(saves time and memory to create object). 103 104 # converts mailbox to SJIS format 105 my $jconv = new Jcode; 106 $/ = 00; 107 while(<>){ 108 print $jconv->set(\$_)->mime_decode->sjis; 109 } 110 111=item $j-E<gt>append($str [, $icode]); 112 113Appends $str to $j's internal string. 114 115=back 116 117=cut 118 119sub new { 120 my $class = shift; 121 my ($thingy, $icode) = @_; 122 my $r_str = ref $thingy ? $thingy : \$thingy; 123 my $nmatch; 124 ($icode, $nmatch) = getcode($r_str) unless $icode; 125 convert($r_str, 'euc', $icode); 126 my $self = [ 127 $r_str, 128 $icode, 129 $nmatch, 130 ]; 131 carp "Object of class $class created" if $DEBUG >= 2; 132 bless $self, $class; 133} 134 135sub r_str { $_[0]->[0] } 136sub icode { $_[0]->[1] } 137sub nmatch { $_[0]->[2] } 138 139sub set { 140 my $self = shift; 141 my ($thingy, $icode) = @_; 142 my $r_str = ref $thingy ? $thingy : \$thingy; 143 my $nmatch; 144 ($icode, $nmatch) = getcode($r_str) unless $icode; 145 convert($r_str, 'euc', $icode); 146 $self->[0] = $r_str; 147 $self->[1] = $icode; 148 $self->[2] = $nmatch; 149 return $self; 150} 151 152sub append { 153 my $self = shift; 154 my ($thingy, $icode) = @_; 155 my $r_str = ref $thingy ? $thingy : \$thingy; 156 my $nmatch; 157 ($icode, $nmatch) = getcode($r_str) unless $icode; 158 convert($r_str, 'euc', $icode); 159 ${$self->[0]} .= $$r_str; 160 $self->[1] = $icode; 161 $self->[2] = $nmatch; 162 return $self; 163} 164 165=over 4 166 167=item $j = jcode($str [, $icode]); 168 169shortcut for Jcode->new() so you can go like; 170 171$sjis = jcode($str)->sjis; 172 173=item $euc = $j-E<gt>euc; 174 175=item $jis = $j-E<gt>jis; 176 177=item $sjis = $j-E<gt>sjis; 178 179What you code is what you get :) 180 181=item $iso_2022_jp = $j-E<gt>iso_2022_jp 182 183Same as $j->z2h->jis. 184Hankaku Kanas are forcibly converted to Zenkaku. 185 186=back 187 188=cut 189 190sub jcode { return Jcode->new(@_) } 191sub euc { return ${$_[0]->[0]} } 192sub jis { return &euc_jis(${$_[0]->[0]})} 193sub sjis { return &euc_sjis(${$_[0]->[0]})} 194sub iso_2022_jp{return $_[0]->h2z->jis} 195 196=over 4 197 198=item [@lines =] $jcode-E<gt>jfold([$bytes_per_line, $newline_str]); 199 200folds lines in jcode string every $bytes_per_line (default: 72) 201in a way that does not clobber the multibyte string. 202(Sorry, no Kinsoku done!) 203with a newline string spified by $newline_str (default: \n). 204 205=back 206 207=cut 208 209sub jfold{ 210 my $self = shift; 211 my ($bpl, $nl) = @_; 212 $bpl ||= 72; 213 $nl ||= "\n"; 214 my $r_str = $self->[0]; 215 my (@lines, $len, $i); 216 while ($$r_str =~ 217 m/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|[\x00-\xff])/sgo) 218 { 219 if ($len + length($1) > $bpl){ # fold! 220 $i++; 221 $len = 0; 222 } 223 $lines[$i] .= $1; 224 $len += length($1); 225 } 226 defined($lines[$i]) or pop @lines; 227 $$r_str = join($nl, @lines); 228 return wantarray ? @lines : $self; 229} 230 231=pod 232 233=over 4 234 235=item $length = $jcode-E<gt>jlength(); 236 237returns character length properly, rather than byte length. 238 239=back 240 241=cut 242 243sub jlength { 244 my $self = shift; 245 my $r_str = $self->[0]; 246 return scalar (my @char = $$r_str =~ m/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|[\x00-\xff])/sgo); 247} 248 249=head2 Methods that use MIME::Base64 250 251To use methods below, you need MIME::Base64. To install, simply 252 253 perl -MCPAN -e 'CPAN::Shell->install("MIME::Base64")' 254 255=over 4 256 257=item $mime_header = $j-E<gt>mime_encode([$lf, $bpl]); 258 259Converts $str to MIME-Header documented in RFC1522. 260When $lf is specified, it uses $lf to fold line (default: \n). 261When $bpl is specified, it uses $bpl for the number of bytes (default: 76; 262this number must be smaller than 76). 263 264=item $j-E<gt>mime_decode; 265 266Decodes MIME-Header in Jcode object. 267 268You can retrieve the number of matches via $j->nmatch; 269 270=back 271 272=cut 273 274sub mime_encode{ 275 my $self = shift; 276 my $r_str = $self->[0]; 277 my $lf = shift || "\n"; 278 my $bpl = shift || 76; 279 280 my ($trailing_crlf) = ($$r_str =~ /(\n|\r|\x0d\x0a)$/o); 281 my $str = _mime_unstructured_header($$r_str, $lf, $bpl); 282 not $trailing_crlf and $str =~ s/(\n|\r|\x0d\x0a)$//o; 283 $str; 284} 285 286# 287# shamelessly stolen from 288# http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64 289# 290 291sub _add_encoded_word { 292 require MIME::Base64; 293 my($str, $line, $bpl) = @_; 294 my $result = ''; 295 while (length($str)) { 296 my $target = $str; 297 $str = ''; 298 if (length($line) + 22 + 299 ($target =~ /^(?:$RE{EUC_0212}|$RE{EUC_C})/o) * 8 > $bpl) { 300 $line =~ s/[ \t\n\r]*$/\n/; 301 $result .= $line; 302 $line = ' '; 303 } 304 while (1) { 305 my $iso_2022_jp = jcode($target, 'euc')->iso_2022_jp; 306 if (my $count = ($iso_2022_jp =~ tr/\x80-\xff//d)){ 307 $DEBUG and warn $count; 308 $target = jcode($iso_2022_jp, 'iso_2022_jp')->euc; 309 } 310 my $encoded = '=?ISO-2022-JP?B?' . 311 MIME::Base64::encode_base64($iso_2022_jp, '') 312 . '?='; 313 if (length($encoded) + length($line) > $bpl) { 314 $target =~ 315 s/($RE{EUC_0212}|$RE{EUC_KANA}|$RE{EUC_C}|$RE{ASCII})$//o; 316 $str = $1 . $str; 317 } else { 318 $line .= $encoded; 319 last; 320 } 321 } 322 } 323 return $result . $line; 324} 325 326sub _mime_unstructured_header { 327 my ($oldheader, $lf, $bpl) = @_; 328 my(@words, @wordstmp, $i); 329 my $header = ''; 330 $oldheader =~ s/\s+$//; 331 @wordstmp = split /\s+/, $oldheader; 332 for ($i = 0; $i < $#wordstmp; $i++) { 333 if ($wordstmp[$i] !~ /^[\x21-\x7E]+$/ and 334 $wordstmp[$i + 1] !~ /^[\x21-\x7E]+$/) { 335 $wordstmp[$i + 1] = "$wordstmp[$i] $wordstmp[$i + 1]"; 336 } else { 337 push(@words, $wordstmp[$i]); 338 } 339 } 340 push(@words, $wordstmp[-1]); 341 for my $word (@words) { 342 if ($word =~ /^[\x21-\x7E]+$/) { 343 $header =~ /(?:.*\n)*(.*)/; 344 if (length($1) + length($word) > $bpl) { 345 $header .= "$lf $word"; 346 } else { 347 $header .= $word; 348 } 349 } else { 350 $header = _add_encoded_word($word, $header, $bpl); 351 } 352 $header =~ /(?:.*\n)*(.*)/; 353 if (length($1) == $bpl) { 354 $header .= "$lf "; 355 } else { 356 $header .= ' '; 357 } 358 } 359 $header =~ s/\n? $/\n/; 360 $header; 361} 362 363# see http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64 364#$lws = '(?:(?:\x0d\x0a)?[ \t])+'; 365#$ew_regex = '=\?ISO-2022-JP\?B\?([A-Za-z0-9+/]+=*)\?='; 366#$str =~ s/($ew_regex)$lws(?=$ew_regex)/$1/gio; 367#$str =~ s/$lws/ /go; $str =~ s/$ew_regex/decode_base64($1)/egio; 368 369sub mime_decode{ 370 require MIME::Base64; # not use 371 my $self = shift; 372 my $r_str = $self->[0]; 373 my $re_lws = '(?:(?:\r|\n|\x0d\x0a)?[ \t])+'; 374 my $re_ew = '=\?[Ii][Ss][Oo]-2022-[Jj][Pp]\?[Bb]\?([A-Za-z0-9+/]+=*)\?='; 375 $$r_str =~ s/($re_ew)$re_lws(?=$re_ew)/$1/sgo; 376 $$r_str =~ s/$re_lws/ /go; 377 $self->[2] = 378 ($$r_str =~ 379 s/$re_ew/jis_euc(MIME::Base64::decode_base64($1))/ego 380 ); 381 $self; 382} 383 384 385=head2 Methods implemented by Jcode::H2Z 386 387Methods below are actually implemented in Jcode::H2Z. 388 389=over 4 390 391=item $j-E<gt>h2z([$keep_dakuten]); 392 393Converts X201 kana (Hankaku) to X208 kana (Zenkaku). 394When $keep_dakuten is set, it leaves dakuten as is 395(That is, "ka + dakuten" is left as is instead of 396being converted to "ga") 397 398You can retrieve the number of matches via $j->nmatch; 399 400=item $j-E<gt>z2h; 401 402Converts X208 kana (Zenkaku) to X201 kana (Hankaku). 403 404You can retrieve the number of matches via $j->nmatch; 405 406=back 407 408=cut 409 410sub h2z { 411 require Jcode::H2Z; # not use 412 my $self = shift; 413 $self->[2] = Jcode::H2Z::h2z($self->[0], @_); 414 return $self; 415} 416 417 418sub z2h { 419 require Jcode::H2Z; # not use 420 my $self = shift; 421 $self->[2] = &Jcode::H2Z::z2h($self->[0], @_); 422 return $self; 423} 424 425 426=head2 Methods implemented in Jcode::Tr 427 428Methods here are actually implemented in Jcode::Tr. 429 430=over 4 431 432=item $j-E<gt>tr($from, $to); 433 434Applies tr on Jcode object. $from and $to can contain EUC Japanese. 435 436You can retrieve the number of matches via $j->nmatch; 437 438=back 439 440=cut 441 442sub tr{ 443 require Jcode::Tr; # not use 444 my $self = shift; 445 $self->[2] = Jcode::Tr::tr($self->[0], @_); 446 return $self; 447} 448 449# 450# load needed module depending on the configuration just once! 451# 452 453use vars qw(%PKG_LOADED); 454sub load_module{ 455 my $pkg = shift; 456 return $pkg if $PKG_LOADED{$pkg}++; 457 unless ($NOXS){ 458 eval qq( require $pkg; ); 459 unless ($@){ 460 carp "$pkg loaded." if $DEBUG; 461 return $pkg; 462 } 463 } 464 $pkg .= "::NoXS"; 465 eval qq( require $pkg; ); 466 unless ($@){ 467 carp "$pkg loaded" if $DEBUG; 468 }else{ 469 croak "Loading $pkg failed!"; 470 } 471 $pkg; 472} 473 474=head2 Methods implemented in Jcode::Unicode 475 476If your perl does not support XS (or you can't C<perl Makefile.PL>, 477Jcode::Unicode::NoXS will be used. 478 479See L<Jcode::Unicode> and L<Jcode::Unicode::NoXS> for details 480 481=over 4 482 483=item $ucs2 = $j-E<gt>ucs2; 484 485Returns UCS2 (Raw Unicode) string. 486 487=item $ucs2 = $j-E<gt>utf8; 488 489Returns utf8 String. 490 491=back 492 493=cut 494 495sub ucs2{ 496 load_module("Jcode::Unicode"); 497 euc_ucs2(${$_[0]->[0]}); 498} 499 500sub utf8{ 501 load_module("Jcode::Unicode"); 502 euc_utf8(${$_[0]->[0]}); 503} 504 505=head2 Instance Variables 506 507If you need to access instance variables of Jcode object, use access 508methods below instead of directly accessing them (That's what OOP 509is all about) 510 511FYI, Jcode uses a ref to array instead of ref to hash (common way) to 512optimize speed (Actually you don't have to know as long as you use 513access methods instead; Once again, that's OOP) 514 515=over 4 516 517=item $j-E<gt>r_str 518 519Reference to the EUC-coded String. 520 521=item $j-E<gt>icode 522 523Input charcode in recent operation. 524 525=item $j-E<gt>nmatch 526 527Number of matches (Used in $j->tr, etc.) 528 529=back 530 531=cut 532 533=head1 Subroutines 534 535=over 4 536 537=item ($code, [$nmatch]) = getcode($str); 538 539Returns char code of $str. Return codes are as follows 540 541 ascii Ascii (Contains no Japanese Code) 542 binary Binary (Not Text File) 543 euc EUC-JP 544 sjis SHIFT_JIS 545 jis JIS (ISO-2022-JP) 546 ucs2 UCS2 (Raw Unicode) 547 utf8 UTF8 548 549When array context is used instead of scaler, it also returns how many 550character codes are found. As mentioned above, $str can be \$str 551instead. 552 553B<jcode.pl Users:> This function is 100% upper-conpatible with 554jcode::getcode() -- well, almost; 555 556 * When its return value is an array, the order is the opposite; 557 jcode::getcode() returns $nmatch first. 558 559 * jcode::getcode() returns 'undef' when the number of EUC characters 560 is equal to that of SJIS. Jcode::getcode() returns EUC. for 561 Jcode.pm there is no in-betweens. 562 563=item Jcode::convert($str, [$ocode, $icode, $opt]); 564 565Converts $str to char code specified by $ocode. When $icode is specified 566also, it assumes $icode for input string instead of the one checked by 567getcode(). As mentioned above, $str can be \$str instead. 568 569B<jcode.pl Users:> This function is 100% upper-conpatible with 570jcode::convert() ! 571 572=back 573 574=cut 575 576sub getcode { 577 my $thingy = shift; 578 my $r_str = ref $thingy ? $thingy : \$thingy; 579 580 my ($code, $nmatch, $sjis, $euc, $utf8) = ("", 0, 0, 0, 0); 581 if ($$r_str =~ /$RE{BIN}/o) { # 'binary' 582 my $ucs2; 583 $ucs2 += length($1) 584 while $$r_str =~ /(\x00$RE{ASCII})+/go; 585 if ($ucs2){ # smells like raw unicode 586 ($code, $nmatch) = ('ucs2', $ucs2); 587 }else{ 588 ($code, $nmatch) = ('binary', 0); 589 } 590 } 591 elsif ($$r_str !~ /[\e\x80-\xff]/o) { # not Japanese 592 ($code, $nmatch) = ('ascii', 1); 593 } # 'jis' 594 elsif ($$r_str =~ 595 m[ 596 $RE{JIS_0208}|$RE{JIS_0212}|$RE{JIS_ASC}|$RE{JIS_KANA} 597 ]ox) 598 { 599 ($code, $nmatch) = ('jis', 1); 600 } 601 else { # should be euc|sjis|utf8 602 # use of (?:) by Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp> 603 $sjis += length($1) 604 while $$r_str =~ /((?:$RE{SJIS_C})+)/go; 605 $euc += length($1) 606 while $$r_str =~ /((?:$RE{EUC_C}|$RE{EUC_KANA}|$RE{EUC_0212})+)/go; 607 $utf8 += length($1) 608 while $$r_str =~ /((?:$RE{UTF8})+)/go; 609 $nmatch = _max($utf8, $sjis, $euc); 610 carp ">DEBUG:sjis = $sjis, euc = $euc, utf8 = $utf8" if $DEBUG >= 3; 611 $code = 612 ($euc > $sjis and $euc > $utf8) ? 'euc' : 613 ($sjis > $euc and $sjis > $utf8) ? 'sjis' : 614 ($utf8 > $euc and $utf8 > $sjis) ? 'utf8' : undef; 615 } 616 return wantarray ? ($code, $nmatch) : $code; 617} 618 619sub convert{ 620 my $thingy = shift; 621 my $r_str = ref $thingy ? $thingy : \$thingy; 622 my ($ocode, $icode, $opt) = @_; 623 624 my $nmatch; 625 ($icode, $nmatch) = getcode($r_str) unless $icode; 626 627 return $$r_str if $icode eq $ocode and !defined $opt; # do nothin' 628 629 no strict qw(refs); 630 my $method; 631 632 # convert to EUC 633 634 load_module("Jcode::Unicode") if $icode =~ /ucs2|utf8/o; 635 if ($icode and defined &{$method = $icode . "_euc"}){ 636 carp "Dispatching \&$method" if $DEBUG >= 2; 637 &{$method}($r_str) ; 638 } 639 640 # h2z or z2h 641 642 if ($opt){ 643 my $cmd = ($opt =~ /^z/o) ? "h2z" : ($opt =~ /^h/o) ? "z2h" : undef; 644 if ($cmd){ 645 require Jcode::H2Z; 646 &{'Jcode::H2Z::' . $cmd}($r_str); 647 } 648 } 649 650 # convert to $ocode 651 652 load_module("Jcode::Unicode") if $ocode =~ /ucs2|utf8/o; 653 if ($ocode and defined &{$method = "euc_" . $ocode}){ 654 carp "Dispatching \&$method" if $DEBUG >= 2; 655 &{$method}($r_str) ; 656 } 657 $$r_str; 658} 659 660# JIS<->EUC 661 662sub jis_euc { 663 my $thingy = shift; 664 my $r_str = ref $thingy ? $thingy : \$thingy; 665 $$r_str =~ s( 666 ($RE{JIS_0212}|$RE{JIS_0208}|$RE{JIS_ASC}|$RE{JIS_KANA}) 667 ([^\e]*) 668 ) 669 { 670 my ($esc, $str) = ($1, $2); 671 if ($esc !~ /$RE{JIS_ASC}/o) { 672 $str =~ tr/\x21-\x7e/\xa1-\xfe/; 673 if ($esc =~ /$RE{JIS_KANA}/o) { 674 $str =~ s/([\xa1-\xdf])/\x8e$1/og; 675 } 676 elsif ($esc =~ /$RE{JIS_0212}/o) { 677 $str =~ s/([\xa1-\xfe][\xa1-\xfe])/\x8f$1/og; 678 } 679 } 680 $str; 681 }geox; 682 $$r_str; 683} 684 685# 686# euc_jis 687# 688# Based upon the contribution of 689# Kazuto Ichimura <ichimura@shimada.nuee.nagoya-u.ac.jp> 690# optimized by <ohzaki@iod.ricoh.co.jp> 691 692sub euc_jis{ 693 my $thingy = shift; 694 my $r_str = ref $thingy ? $thingy : \$thingy; 695 $$r_str =~ s{ 696 ((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+) 697 }{ 698 my $str = $1; 699 my $esc = 700 ( $str =~ tr/\x8E//d ) ? $ESC{KANA} : 701 ( $str =~ tr/\x8F//d ) ? $ESC{JIS_0212} : 702 $ESC{JIS_0208}; 703 $str =~ tr/\xA1-\xFE/\x21-\x7E/; 704 $esc . $str . $ESC{ASC}; 705 }geox; 706 $$r_str =~ 707 s/\Q$ESC{ASC}\E 708 (\Q$ESC{KANA}\E|\Q$ESC{JIS_0212}\E|\Q$ESC{JIS_0208}\E)/$1/gox; 709 $$r_str; 710} 711 712# EUC<->SJIS 713 714my %_S2E = (); 715my %_E2S = (); 716 717sub sjis_euc { 718 my $thingy = shift; 719 my $r_str = ref $thingy ? $thingy : \$thingy; 720 $$r_str =~ s( 721 ($RE{SJIS_C}|$RE{SJIS_KANA}) 722 ) 723 { 724 my $str = $1; 725 unless ($_S2E{$1}){ 726 my ($c1, $c2) = unpack('CC', $str); 727 if (0xa1 <= $c1 && $c1 <= 0xdf) { 728 $c2 = $c1; 729 $c1 = 0x8e; 730 } elsif (0x9f <= $c2) { 731 $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60); 732 $c2 += 2; 733 } else { 734 $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61); 735 $c2 += 0x60 + ($c2 < 0x7f); 736 } 737 $_S2E{$str} = pack('CC', $c1, $c2); 738 } 739 $_S2E{$str}; 740 }geox; 741 $$r_str; 742} 743 744# 745 746sub euc_sjis { 747 my $thingy = shift; 748 my $r_str = ref $thingy ? $thingy : \$thingy; 749 $$r_str =~ s( 750 ($RE{EUC_C}|$RE{EUC_KANA}|$RE{EUC_0212}) 751 ) 752 { 753 my $str = $1; 754 unless ($_E2S{$str}){ 755 my ($c1, $c2) = unpack('CC', $str); 756 if ($c1 == 0x8e) { # SS2 757 $_E2S{$str} = chr($c2); 758 } elsif ($c1 == 0x8f) { # SS3 759 $_E2S{$str} = $CHARCODE{UNDEF_SJIS}; 760 }else { #SS1 or X0208 761 if ($c1 % 2) { 762 $c1 = ($c1>>1) + ($c1 < 0xdf ? 0x31 : 0x71); 763 $c2 -= 0x60 + ($c2 < 0xe0); 764 } else { 765 $c1 = ($c1>>1) + ($c1 < 0xdf ? 0x30 : 0x70); 766 $c2 -= 2; 767 } 768 $_E2S{$str} = pack('CC', $c1, $c2); 769 } 770 } 771 $_E2S{$str}; 772 }geox; 773 $$r_str; 774} 775 776# 777# Util. Functions 778# 779 780sub _max { 781 my $result = shift; 782 for my $n (@_){ 783 $result = $n if $n > $result; 784 } 785 return $result; 786} 787 7881; 789 790__END__ 791 792=head1 BUGS 793 794Unicode support by Jcode is far from efficient! 795 796=head1 IN FUTURE 797 798Hopefully Jcode will be superceded by Encode module that is part of 799the standard module on Perl 5.7 and up 800 801=head1 ACKNOWLEDGEMENTS 802 803This package owes a lot in motivation, design, and code, to the jcode.pl 804for Perl4 by Kazumasa Utashiro <utashiro@iij.ad.jp>. 805 806Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp> has helped me polish regexp from the 807very first stage of development. 808 809And folks at Jcode Mailing list <jcode5@ring.gr.jp>. Without them, I 810couldn't have coded this far. 811 812=head1 SEE ALSO 813 814L<Jcode::Unicode> 815 816L<Jcode::Unicode::NoXS> 817 818http://www.iana.org/assignments/character-sets 819 820L<Encode> 821 822=head1 COPYRIGHT 823 824Copyright 1999 Dan Kogai <dankogai@dan.co.jp> 825 826This library is free software; you can redistribute it 827and/or modify it under the same terms as Perl itself. 828 829=cut 830