1# 2# $Id: NoXS.pm,v 1.1.1.1 2003/08/02 23:39:52 takezoe Exp $ 3# 4 5package Jcode::Unicode::NoXS; 6 7use strict; 8use vars qw($RCSID $VERSION); 9 10$RCSID = q$Id: NoXS.pm,v 1.1.1.1 2003/08/02 23:39:52 takezoe Exp $; 11$VERSION = do { my @r = (q$Revision: 1.1.1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 12 13use Carp; 14 15use Jcode::Constants qw(:all); 16use Jcode::Unicode::Constants; 17 18use vars qw(*_E2U *_U2E $PEDANTIC); 19 20$PEDANTIC = 0; 21 22# Quick and dirty import 23 24*_E2U = *Jcode::Unicode::Constants::_E2U; 25*_U2E = *Jcode::Unicode::Constants::_U2E; 26 27sub _init_u2e{ 28 unless ($PEDANTIC){ 29 $_U2E{"\xff\x3c"} = "\xa1\xc0"; # �� 30 }else{ 31 delete $_U2E{"\xff\x3c"}; 32 $_U2E{"\x00\x5c"} = "\xa1\xc0"; #\ 33 $_U2E{"\x00\x7e"} = "\x8f\xa2\xb7"; # ~ 34 } 35} 36 37sub _init_e2u{ 38 unless (%_E2U){ 39 %_E2U = 40 reverse %_U2E; 41 } 42 unless ($PEDANTIC){ 43 $_E2U{"\xa1\xc0"} = "\xff\x3c"; # �� 44 }else{ 45 delete $_E2U{"\xa1\xc0"}; 46 $_E2U{"\xa1\xc0"} = "\x00\x5c"; #\ 47 $_E2U{"\x8f\xa2\xb7"} = "\x00\x7e"; # ~ 48 } 49} 50 51 52# Yuck! but this is necessary because this module is 'require'd 53# instead of being 'use'd (No package export done) subs below 54# belong to Jcode, not Jcode::Unicode 55 56sub Jcode::ucs2_euc{ 57 my $thingy = shift; 58 my $r_str = ref $thingy ? $thingy : \$thingy; 59 _init_u2e(); 60 61 $$r_str =~ s( 62 ([\x00-\xff][\x00-\xff]) 63 ) 64 { 65 exists $_U2E{$1} ? $_U2E{$1} : $CHARCODE{UNDEF_JIS}; 66 }geox; 67 68 $$r_str; 69} 70 71sub Jcode::euc_ucs2{ 72 my $thingy = shift; 73 my $r_str = ref $thingy ? $thingy : \$thingy; 74 _init_e2u(); 75 76 # 3 bytes 77 $$r_str =~ s( 78 ($RE{EUC_0212}|$RE{EUC_C}|$RE{EUC_KANA}|[\x00-\xff]) 79 ) 80 { 81 exists $_E2U{$1} ? $_E2U{$1} : $CHARCODE{UNDEF_UNICODE}; 82 }geox; 83 84 $$r_str; 85} 86 87sub Jcode::euc_utf8{ 88 my $thingy = shift; 89 my $r_str = ref $thingy ? $thingy : \$thingy; 90 &Jcode::euc_ucs2($r_str); 91 &Jcode::ucs2_utf8($r_str); 92} 93 94sub Jcode::utf8_euc{ 95 my $thingy = shift; 96 my $r_str = ref $thingy ? $thingy : \$thingy; 97 &Jcode::utf8_ucs2($r_str); 98 &Jcode::ucs2_euc($r_str); 99} 100 101sub Jcode::ucs2_utf8{ 102 my $thingy = shift; 103 my $r_str = ref $thingy ? $thingy : \$thingy; 104 my $result; 105 for my $uc (unpack("n*", $$r_str)) { 106 if ($uc < 0x80) { 107 # 1 byte representation 108 $result .= chr($uc); 109 } elsif ($uc < 0x800) { 110 # 2 byte representation 111 $result .= chr(0xC0 | ($uc >> 6)) . 112 chr(0x80 | ($uc & 0x3F)); 113 } else { 114 # 3 byte representation 115 $result .= chr(0xE0 | ($uc >> 12)) . 116 chr(0x80 | (($uc >> 6) & 0x3F)) . 117 chr(0x80 | ($uc & 0x3F)); 118 } 119 120 } 121 $$r_str = $result; 122} 123 124sub Jcode::utf8_ucs2{ 125 my $thingy = shift; 126 my $r_str = ref $thingy ? $thingy : \$thingy; 127 my $result; 128 $$r_str =~ s/^[\200-\277]+//o; # can't start with 10xxxxxx 129 $$r_str =~ 130 s[ 131 ($RE{ASCII} | $RE{UTF8}) 132 ]{ 133 my $str = $1; 134 if (length($str) == 1){ 135 pack("n", unpack("C", $str)); 136 }elsif(length($str) == 2){ 137 my ($c1,$c2) = unpack("C2", $str); 138 pack("n", (($c1 & 0x1F)<<6)|($c2 & 0x3F)); 139 }else{ 140 my ($c1,$c2,$c3) = unpack("C3", $str); 141 pack("n", 142 (($c1 & 0x0F)<<12)|(($c2 & 0x3F)<<6)|($c3 & 0x3F)); 143 } 144 }egox; 145 $$r_str; 146} 147 1481; 149__END__ 150 151=head1 NAME 152 153Jcode::Unicode::NoXS - Non-XS version of Jcode::Unicode 154 155=head1 SYNOPSIS 156 157NONE 158 159=head1 DESCRIPTION 160 161This module is called by Jcode.pm on demand. This module is not intended for 162direct use by users. This modules implements functions related to Unicode. 163Following functions are defined here; 164 165=over 4 166 167=item Jcode::ucs2_euc(); 168 169=item Jcode::euc_ucs2(); 170 171=item Jcode::ucs2_utf8(); 172 173=item Jcode::utf8_ucs2(); 174 175=item Jcode::euc_utf8(); 176 177=item Jcode::utf8_euc(); 178 179=back 180 181=cut 182 183=head1 VARIABLES 184 185=over 4 186 187=item B<$Jcode::Unicode::PEDANTIC> 188 189When set to non-zero, x-to-unicode conversion becomes pedantic. 190That is, '\' (chr(0x5c)) is converted to zenkaku backslash and 191'~" (chr(0x7e)) to JIS-x0212 tilde. 192 193By Default, Jcode::Unicode leaves ascii ([0x00-0x7f]) as it is. 194 195=back 196 197=head1 MODULES 198 199=over 4 200 201=item Jcode::Unicode::Constants 202 203Jumbo hash that contains UCS2-EUC conversion table is there. 204 205=back 206 207=head1 BUGS 208 209 * It's very slow to initialize, due to the size of the conversion 210 table it has to load. Once loaded, however, the perfomance is not 211 too bad (But still much slower than XS version). 212 * Besides that, that is Unicode, Inc. to Blame (Especially JIS0201.TXT). 213 214=head1 SEE ALSO 215 216http://www.unicode.org/ 217 218=head1 COPYRIGHT 219 220Copyright 1999 Dan Kogai <dankogai@dan.co.jp> 221 222This library is free software; you can redistribute it 223and/or modify it under the same terms as Perl itself. 224 225Unicode conversion table used here are based uponon files at 226ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/, 227Copyright (c) 1991-1994 Unicode, Inc. 228 229=cut 230 231