1package String::Multibyte::GBK;
2
3use vars qw($VERSION);
4$VERSION = '1.12';
5
6+{
7    charset  => 'GBK',
8
9    regexp   => '(?:[\x00-\x80]|[\x81-\xFE][\x40-\x7E\x80-\xFE])',
10
11    cmpchar => sub { $_[0] cmp $_[1] },
12
13    nextchar => sub {
14	my $ch = shift;
15	my $len = length $ch;
16	if ($len == 1) {
17	    return $ch eq "\x80"
18		? "\x81\x40"
19		: chr(ord($ch)+1);
20	}
21	elsif ($len == 2) {
22	    my($c, $d) = unpack('CC', $ch);
23	    return $ch eq "\xFE\xFE"
24		? undef
25		: $d == 0xFE
26		    ? chr($c+1)."\x40"
27		    : $d == 0x7E
28			? chr($c)  ."\x80"
29			: pack('CC', $c, $d+1);
30	}
31	else {
32	    return;
33	}
34    },
35};
36
37__END__
38
39=head1 NAME
40
41String::Multibyte::GBK - internally used by String::Multibyte
42for GBK
43
44=head1 SYNOPSIS
45
46    use String::Multibyte;
47
48    $gbk = String::Multibyte->new('GBK');
49    $gbk_length = $gbk->length($gbk_string);
50
51=head1 DESCRIPTION
52
53C<String::Multibyte::GBK> is used for manipulation of strings
54in GBK (Guobiao Kuozhan).
55
56Byte range of single-byte characters:
57C<0x00..0x80>.
58
59Leading byte range of double-byte characters:
60C<0x81..0xFE>.
61
62Trailing byte range of double-byte characters:
63C<0x40..0x7E> and C<0x80..0xFE>.
64
65Character order (invalid code points are excluded):
66C<0x00..0x80>, C<0x8140..0xFEFE>.
67
68=head1 CAVEAT
69
70C<0x80> is supported in consideration of Microsoft's CP936.
71(Hmmm, according to IANA Charset Registration for GBK,
72GBK must be identical to CP936.)
73
74=head1 SEE ALSO
75
76L<String::Multibyte>
77
78=cut
79