1package String::Multibyte::GBK; 2 3use vars qw($VERSION); 4$VERSION = '1.12'; 5 6+{ 7 charset => 'GBK', 8 9 regexp => '(?:[\x00-\x80]|[\x81-\xFE][\x40-\x7E\x80-\xFE])', 10 11 cmpchar => sub { $_[0] cmp $_[1] }, 12 13 nextchar => sub { 14 my $ch = shift; 15 my $len = length $ch; 16 if ($len == 1) { 17 return $ch eq "\x80" 18 ? "\x81\x40" 19 : chr(ord($ch)+1); 20 } 21 elsif ($len == 2) { 22 my($c, $d) = unpack('CC', $ch); 23 return $ch eq "\xFE\xFE" 24 ? undef 25 : $d == 0xFE 26 ? chr($c+1)."\x40" 27 : $d == 0x7E 28 ? chr($c) ."\x80" 29 : pack('CC', $c, $d+1); 30 } 31 else { 32 return; 33 } 34 }, 35}; 36 37__END__ 38 39=head1 NAME 40 41String::Multibyte::GBK - internally used by String::Multibyte 42for GBK 43 44=head1 SYNOPSIS 45 46 use String::Multibyte; 47 48 $gbk = String::Multibyte->new('GBK'); 49 $gbk_length = $gbk->length($gbk_string); 50 51=head1 DESCRIPTION 52 53C<String::Multibyte::GBK> is used for manipulation of strings 54in GBK (Guobiao Kuozhan). 55 56Byte range of single-byte characters: 57C<0x00..0x80>. 58 59Leading byte range of double-byte characters: 60C<0x81..0xFE>. 61 62Trailing byte range of double-byte characters: 63C<0x40..0x7E> and C<0x80..0xFE>. 64 65Character order (invalid code points are excluded): 66C<0x00..0x80>, C<0x8140..0xFEFE>. 67 68=head1 CAVEAT 69 70C<0x80> is supported in consideration of Microsoft's CP936. 71(Hmmm, according to IANA Charset Registration for GBK, 72GBK must be identical to CP936.) 73 74=head1 SEE ALSO 75 76L<String::Multibyte> 77 78=cut 79