1#
2# $Id: NoXS.pm,v 1.1.1.1 2003/08/02 23:39:52 takezoe Exp $
3#
4
5package Jcode::Unicode::NoXS;
6
7use strict;
8use vars qw($RCSID $VERSION);
9
10$RCSID = q$Id: NoXS.pm,v 1.1.1.1 2003/08/02 23:39:52 takezoe Exp $;
11$VERSION = do { my @r = (q$Revision: 1.1.1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
12
13use Carp;
14
15use Jcode::Constants qw(:all);
16use Jcode::Unicode::Constants;
17
18use vars qw(*_E2U *_U2E $PEDANTIC);
19
20$PEDANTIC = 0;
21
22# Quick and dirty import
23
24*_E2U = *Jcode::Unicode::Constants::_E2U;
25*_U2E = *Jcode::Unicode::Constants::_U2E;
26
27sub _init_u2e{
28    unless ($PEDANTIC){
29	$_U2E{"\xff\x3c"} = "\xa1\xc0"; # ��
30    }else{
31	delete $_U2E{"\xff\x3c"};
32	$_U2E{"\x00\x5c"} = "\xa1\xc0";     #\
33	$_U2E{"\x00\x7e"} = "\x8f\xa2\xb7"; # ~
34    }
35}
36
37sub _init_e2u{
38    unless (%_E2U){
39	%_E2U =
40	    reverse %_U2E;
41    }
42    unless ($PEDANTIC){
43	$_E2U{"\xa1\xc0"} = "\xff\x3c"; # ��
44    }else{
45	delete $_E2U{"\xa1\xc0"};
46	$_E2U{"\xa1\xc0"} = "\x00\x5c";     #\
47	$_E2U{"\x8f\xa2\xb7"} = "\x00\x7e"; # ~
48    }
49}
50
51
52# Yuck! but this is necessary because this module is 'require'd
53# instead of being 'use'd (No package export done) subs below
54# belong to Jcode, not Jcode::Unicode
55
56sub Jcode::ucs2_euc{
57    my $thingy = shift;
58    my $r_str = ref $thingy ? $thingy : \$thingy;
59    _init_u2e();
60
61    $$r_str =~ s(
62		 ([\x00-\xff][\x00-\xff])
63		 )
64    {
65	exists $_U2E{$1} ? $_U2E{$1} : $CHARCODE{UNDEF_JIS};
66    }geox;
67
68    $$r_str;
69}
70
71sub Jcode::euc_ucs2{
72    my $thingy = shift;
73    my $r_str = ref $thingy ? $thingy : \$thingy;
74    _init_e2u();
75
76    # 3 bytes
77    $$r_str =~ s(
78		 ($RE{EUC_0212}|$RE{EUC_C}|$RE{EUC_KANA}|[\x00-\xff])
79		 )
80    {
81	exists $_E2U{$1} ? $_E2U{$1} : $CHARCODE{UNDEF_UNICODE};
82    }geox;
83
84    $$r_str;
85}
86
87sub Jcode::euc_utf8{
88    my $thingy = shift;
89    my $r_str = ref $thingy ? $thingy : \$thingy;
90    &Jcode::euc_ucs2($r_str);
91    &Jcode::ucs2_utf8($r_str);
92}
93
94sub Jcode::utf8_euc{
95    my $thingy = shift;
96    my $r_str = ref $thingy ? $thingy : \$thingy;
97    &Jcode::utf8_ucs2($r_str);
98    &Jcode::ucs2_euc($r_str);
99}
100
101sub Jcode::ucs2_utf8{
102    my $thingy = shift;
103    my $r_str = ref $thingy ? $thingy : \$thingy;
104    my $result;
105    for my $uc (unpack("n*", $$r_str)) {
106        if ($uc < 0x80) {
107            # 1 byte representation
108            $result .= chr($uc);
109        } elsif ($uc < 0x800) {
110            # 2 byte representation
111            $result .= chr(0xC0 | ($uc >> 6)) .
112                chr(0x80 | ($uc & 0x3F));
113        } else {
114            # 3 byte representation
115            $result .= chr(0xE0 | ($uc >> 12)) .
116                chr(0x80 | (($uc >> 6) & 0x3F)) .
117                    chr(0x80 | ($uc & 0x3F));
118        }
119
120    }
121    $$r_str = $result;
122}
123
124sub Jcode::utf8_ucs2{
125    my $thingy = shift;
126    my $r_str = ref $thingy ? $thingy : \$thingy;
127    my $result;
128    $$r_str =~ s/^[\200-\277]+//o;  # can't start with 10xxxxxx
129    $$r_str =~
130	s[
131	  ($RE{ASCII} | $RE{UTF8})
132	  ]{
133	      my $str = $1;
134	      if (length($str) == 1){
135		  pack("n", unpack("C", $str));
136	      }elsif(length($str) == 2){
137		  my ($c1,$c2) = unpack("C2", $str);
138		  pack("n", (($c1 & 0x1F)<<6)|($c2 & 0x3F));
139	      }else{
140		  my ($c1,$c2,$c3) = unpack("C3", $str);
141		  pack("n",
142		       (($c1 & 0x0F)<<12)|(($c2 & 0x3F)<<6)|($c3 & 0x3F));
143	      }
144	  }egox;
145    $$r_str;
146}
147
1481;
149__END__
150
151=head1 NAME
152
153Jcode::Unicode::NoXS - Non-XS version of Jcode::Unicode
154
155=head1 SYNOPSIS
156
157NONE
158
159=head1 DESCRIPTION
160
161This module is called by Jcode.pm on demand.  This module is not intended for
162direct use by users.  This modules implements functions related to Unicode.
163Following functions are defined here;
164
165=over 4
166
167=item Jcode::ucs2_euc();
168
169=item Jcode::euc_ucs2();
170
171=item Jcode::ucs2_utf8();
172
173=item Jcode::utf8_ucs2();
174
175=item Jcode::euc_utf8();
176
177=item Jcode::utf8_euc();
178
179=back
180
181=cut
182
183=head1 VARIABLES
184
185=over 4
186
187=item B<$Jcode::Unicode::PEDANTIC>
188
189When set to non-zero, x-to-unicode conversion becomes pedantic.
190That is, '\' (chr(0x5c)) is converted to zenkaku backslash and
191'~" (chr(0x7e)) to JIS-x0212 tilde.
192
193By Default, Jcode::Unicode leaves ascii ([0x00-0x7f]) as it is.
194
195=back
196
197=head1 MODULES
198
199=over 4
200
201=item Jcode::Unicode::Constants
202
203Jumbo hash that contains UCS2-EUC conversion table is there.
204
205=back
206
207=head1 BUGS
208
209 * It's very slow to initialize, due to the size of the conversion
210   table it has to load.  Once loaded, however, the perfomance is not
211   too bad (But still much slower than XS version).
212 * Besides that, that is Unicode, Inc. to Blame (Especially JIS0201.TXT).
213
214=head1 SEE ALSO
215
216http://www.unicode.org/
217
218=head1 COPYRIGHT
219
220Copyright 1999 Dan Kogai <dankogai@dan.co.jp>
221
222This library is free software; you can redistribute it
223and/or modify it under the same terms as Perl itself.
224
225Unicode conversion table used here are based uponon files at
226ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/,
227Copyright (c) 1991-1994 Unicode, Inc.
228
229=cut
230
231