1package Unicode::Unihan; 2 3use 5.008001; 4use strict; 5use warnings; 6 7our $VERSION = do { my @r = (q$Revision: 0.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 8our $DEBUG = 0; 9 10use Carp; 11BEGIN{ @AnyDBM_File::ISA = qw(DB_File GDBM_File SDBM_File) ; } 12use AnyDBM_File; 13use Fcntl; 14 15sub new($;){ 16 my $class = shift; 17 my $dir = __FILE__; $dir =~ s/\.pm//o; 18 -d $dir or die "DB Directory $dir nonexistent!"; 19 return bless { '_dir_' => $dir, @_ } => $class; 20} 21 22sub load($$){ 23 my ($self, $name) = @_; 24 if ($self->{'-savemem'}){ 25 for my $k (keys %$self){ 26 $k eq $name and next; 27 $k =~ /^[A-Z]/o and delete $self->{$k}; 28 } 29 } 30 unless ( $self->{$name} ){ 31 my $file = $self->{_dir_} . "/$name.db"; 32 -f $file or croak "There is no DB for $name"; 33 tie %{$self->{$name}}, 'AnyDBM_File', $file, O_RDONLY, 0444 34 or die "$file: $!"; 35 } 36 $self; 37} 38 39sub unload($;){ 40 my $self = shift; 41 if (@_){ 42 while(my $k = shift) { 43 $k =~ /^[A-Z]/o and delete $self->{$k}; 44 } 45 }else{ 46 for my $k (keys %$self){ 47 $k =~ /^[A-Z]/o and delete $self->{$k}; 48 } 49 } 50 $self; 51} 52 53sub DESTROY { 54 $DEBUG and warn "$_[0] destroyed!"; 55} 56 57sub AUTOLOAD { 58 my $self = shift; 59 my $name = our $AUTOLOAD; 60 $name =~ s/.*:://o; 61 $self->load($name); 62 no strict 'refs'; 63 *$AUTOLOAD = sub { 64 my $self = shift; @_ or return; 65 my $str = shift; length($str) or return; 66 if (wantarray){ 67 my @result = (); 68 for my $ord (unpack("U*", $str)){ 69 push @result, $self->{$name}{$ord}; 70 } 71 return @result; 72 }else{ 73 return $self->{$name}{ord($str)}; 74 } 75 }; 76 return $self->$name(@_); 77} 78 791; 80__END__ 81 82# Below is stub documentation for your module. You'd better edit it! 83 84=encoding utf8 85 86=head1 NAME 87 88Unicode::Unihan - The Unihan Data Base 5.1.0 89 90=head1 SYNOPSIS 91 92 use Unicode::Unihan; 93 my $uh = Unicode::Unihan->new; 94 print join("," => $uh->Mandarin("\x{5c0f}\x{98fc}\x{5f3e}")), "\n"; 95 96=head1 ABSTRACT 97 98This module provides a user-friendly interface to the Unicode Unihan 99Database 5.1. With this module, the Unihan database is as easy as 100shown in the SYNOPSIS above. 101 102=head1 DESCRIPTION 103 104The first thing you do is make the database available. Just say 105 106 use Unicode::Unihan; 107 my $uh = new Unicode::Unihan; 108 109That's all you have to say. After that, you can access the database 110via $uh-E<gt>I<tag>($string) where I<tag> is the tag in the Unihan 111Database, without 'k' prefix. 112 113=over 2 114 115=item $data = $uh-E<gt>I<tag>($string) 116 117=item @data = $uh-E<gt>I<tag>($string) 118 119The first form (scalar context) returns the Unihan Database entry of 120the first character in $string. The second form (array context) 121checks the entry for each character in $string. 122 123 @data = $uh->Mandarin("\x{5c0f}\x{98fc}\x{5f3e}"); 124 # @data is now ('SHAO4 XIAO3','SI4','DAN4') 125 126 @data = $uh->JapaneseKun("\x{5c0f}\x{98fc}\x{5f3e}"); 127 # @data is now ('CHIISAI KO O','KAU YASHINAU','TAMA HAZUMU HIKU') 128 129=back 130 131=head1 FIELDS AND THEIR MEANINGS 132 133For a complete list of tags, their meaning, and information on parsing 134the value field, see L<http://www.unicode.org/reports/tr38/>. 135 136=head1 SEE ALSO 137 138=over 2 139 140=item L<perlunintro> 141 142=item L<perlunicode> 143 144=item The Unihand Database 145 146L<http://www.unicode.org/Public/UNIDATA/Unihan.html> 147 148=back 149 150=head1 AUTHOR 151 152=over 2 153 154=item of the Module 155 156Dan Kogai E<lt>dankogai@dan.co.jpE<gt> 157 158=item of the Source Data 159 160Unicode, Inc. 161 162=back 163 164=head1 COPYRIGHT AND LICENSE 165 166=over 2 167 168=item of the Module 169 170Copyright 2002-2008 by Dan Kogai, All rights reserved. 171 172This library is free software; you can redistribute it and/or modify 173it under the same terms as Perl itself. 174 175=item of the Source Data 176 177Copyright (c) 1996-2008 Unicode, Inc. All Rights reserved. 178 179 Name: Unihan database 180 Unicode version: 5.1.0 181 Table version: 1.1 182 Date: 3 March 2008 183 184=back 185