1# ---------------------------------------------------------------- 2 use strict; 3 use Test::More; 4# ---------------------------------------------------------------- 5{ 6 my $mecab = &load_mecab(); 7 plan skip_all => $mecab if $mecab; 8 plan tests => 18; 9 use_ok('Lingua::JA::Romanize::MeCab'); 10 my $roman = Lingua::JA::Romanize::MeCab->new(); 11 &test_ja( $roman ); 12} 13# ---------------------------------------------------------------- 14sub load_mecab { 15 local $@; 16 eval { require MeCab; }; 17 plan skip_all => "MeCab.pm is not available." if $@; 18 my $mecab; 19 eval { 20 $mecab = MeCab::Tagger->new(@_); 21 }; 22 plan skip_all => "MeCab::Tagger is not available. $@" unless ref $mecab; 23 undef; 24} 25# ---------------------------------------------------------------- 26sub read_data { 27 local $/ = undef; 28 my $all = <DATA>; 29 my $hash = { split( /\s+/, $all ) }; 30 $hash; 31} 32# ---------------------------------------------------------------- 33sub test_ja { 34 my $roman = shift; 35 ok( ref $roman, "new" ); 36 37 my $t = &read_data(); 38# ok( utf8::is_utf8($t->{phrase1}), "source: phrase1 utf8 flaged" ); 39# ok( utf8::is_utf8($t->{phrase2}), "source: phrase2 utf8 flaged" ); 40 41 my $c1 = $roman->char($t->{a}); 42 ok( ! defined $c1, "char: ascii" ); 43 44 my $c2 = $roman->char($t->{hi}); 45 is( $c2, "hi", "char: hiragana hi" ); 46 47 my $c3 = $roman->char($t->{ka}); 48 is( $c3, "ka", "char: katakana ka" ); 49 50 my $c4 = $roman->char($t->{kan}); 51 like( $c4, qr/(^|\W)kan(\W|$)/, "char: kanji kan" ); 52 53 my $c5 = $roman->chars($t->{hello}); 54 $c5 =~ s/\s+//g; 55 $c5 =~ tr/A-Z/a-z/; 56 is( $c5 , "hello,world!", "chars: hello" ); 57 58 my $c6 = $roman->chars($t->{nihongo}); 59 $c6 =~ s/\s+//g; 60 like( $c6, qr/^(nihongo|nippongo|\/)+$/, "chars: nihongo" ); 61 62 my @t1 = $roman->string($t->{kanji}); 63 like( $t1[0][1], qr/(^|\W)kanji(\W|$)/, "string: okuri-nashi kanji" ); 64 65 my @t2 = $roman->string($t->{warau}); 66 like( $t2[0][1], qr/(^|\W)wara(u)?(\W|$)/, "string: okuri-ari warau" ); 67 68 my @t3 = $roman->string($t->{aru}); 69 like( $t3[0][1], qr/(^|\W)a(ru)?(\W|$)/, "string: okuri-ari aru" ); 70 71 my @t4 = $roman->string($t->{yuu}); 72 like( $t4[0][1], qr/(^|\W)(yuu|u|tamotsu)(\W|$)/, "string: okuri-nashi yuu or tamotsu" ); 73 74 my @t5 = $roman->string($t->{sashidasu}); 75 like( $t5[0][1], qr/(^|\W)sashida(su)?(\W|$)/, "string: okuri-ari sashidasu" ); 76 77 my @t6 = $roman->string($t->{sashidashinin}); 78 like( $t6[0][1], qr/(^|\W)sashidashinin(\W|$)/, "string: okuri-nashi sashidashinin" ); 79 80 my @u1 = $roman->string($t->{phrase1}); 81 like( $u1[0]->[1], qr/^u/, "string: phrase1 u..." ); 82 like( $u1[$#u1]->[1], qr/go$/, "string: phrase1 ...go" ); 83# my $u1 = scalar { grep { ! utf8::is_utf8($_->[0]) } @u1 }; 84# ok( $u1 >= 2, "string: phrase1 utf8 flaged" ); 85 my $j1 = join( "", map {$_->[0]} @u1 ); 86 is( $j1, $t->{phrase1}, "string: phrase1 round trip" ); 87 88 my @u2 = $roman->string($t->{phrase2}); 89# my $u2 = scalar { grep { ! utf8::is_utf8($_->[0]) } @u2 }; 90# ok( $u2 >= 6, "string: phrase2 utf8 flaged" ); 91 my $j2 = join( "", map {$_->[0]} @u2 ); 92 is( $j2, $t->{phrase2}, "string: phrase2 round trip" ); 93} 94# ---------------------------------------------------------------- 95;1; 96# ---------------------------------------------------------------- 97__END__ 98a a 99hi ひ 100ka カ 101kan 漢 102hello Hello,world! 103nihongo 日本語 104kanji 漢字 105warau 笑う 106aru 有る 107yuu 有 108sashidasu 差出す 109sashidashinin 差出人 110phrase1 美しい日本語 111phrase2 太郎はこの本を二郎を見た女性に渡した。 112