1BEGIN { 2 require Config; import Config; 3 if ($Config{'extensions'} !~ /\bEncode\b/) { 4 print "1..0 # Skip: Encode was not built\n"; 5 exit 0; 6 } 7 if (ord("A") == 193) { 8 print "1..0 # Skip: EBCDIC\n"; 9 exit 0; 10 } 11 $| = 1; 12} 13 14use strict; 15use File::Basename; 16use File::Spec; 17use Encode qw(decode encode find_encoding _utf8_off); 18 19#use Test::More qw(no_plan); 20use Test::More tests => 32; 21BEGIN { use_ok("Encode::Guess") } 22 23my $ascii = join('' => map {chr($_)}(0x21..0x7e)); 24my $latin1 = join('' => map {chr($_)}(0xa1..0xfe)); 25my $utf8on = join('' => map {chr($_)}(0x3000..0x30fe)); 26my $utf8off = $utf8on; _utf8_off($utf8off); 27my $utf16 = encode('UTF-16', $utf8on); 28my $utf32 = encode('UTF-32', $utf8on); 29 30like(guess_encoding(''), qr/empty string/io, 'empty string'); 31is(guess_encoding($ascii)->name, 'ascii', 'ascii'); 32like(guess_encoding($latin1), qr/No appropriate encoding/io, 'no ascii'); 33is(guess_encoding($latin1, 'latin1')->name, 'iso-8859-1', 'iso-8859-1'); 34is(guess_encoding($utf8on)->name, 'utf8', 'utf8 w/ flag'); 35is(guess_encoding($utf8off)->name, 'utf8', 'utf8 w/o flag'); 36is(guess_encoding($utf16)->name, 'UTF-16', 'UTF-16'); 37is(guess_encoding($utf32)->name, 'UTF-32', 'UTF-32'); 38 39my $jisx0201 = File::Spec->catfile(dirname(__FILE__), 'jisx0201.utf'); 40my $jisx0208 = File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'); 41my $jisx0212 = File::Spec->catfile(dirname(__FILE__), 'jisx0212.utf'); 42 43open my $fh, $jisx0208 or die "$jisx0208: $!"; 44binmode($fh); 45$utf8off = join('' => <$fh>); 46close $fh; 47$utf8on = decode('utf8', $utf8off); 48 49my @jp = qw(7bit-jis shiftjis euc-jp); 50 51Encode::Guess->set_suspects(@jp); 52 53for my $jp (@jp){ 54 my $test = encode($jp, $utf8on); 55 is(guess_encoding($test)->name, $jp, "JP:$jp"); 56} 57 58is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')"); 59eval{ encode('Guess', $utf8on) }; 60like($@, qr/not defined/io, "no encode()"); 61 62{ 63 my $warning; 64 local $SIG{__WARN__} = sub { $warning = shift }; 65 my $euc_jp = my $euc_jp_clone = encode('euc-jp', $utf8on); 66 Encode::from_to($euc_jp, 'Guess', 'euc-jp'); 67 is $euc_jp_clone, $euc_jp, "from_to(..., 'Guess')"; 68 ok !$warning, "no warning"; 69 diag $warning if $warning; 70} 71 72my %CJKT = 73 ( 74 'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'), 75 'euc-jp' => File::Spec->catfile(dirname(__FILE__), 'jisx0208.utf'), 76 'euc-kr' => File::Spec->catfile(dirname(__FILE__), 'ksc5601.utf'), 77 'big5-eten' => File::Spec->catfile(dirname(__FILE__), 'big5-eten.utf'), 78); 79 80Encode::Guess->set_suspects(keys %CJKT); 81 82for my $name (keys %CJKT){ 83 open my $fh, $CJKT{$name} or die "$CJKT{$name}: $!"; 84 binmode($fh); 85 $utf8off = join('' => <$fh>); 86 close $fh; 87 88 my $test = encode($name, decode('utf8', $utf8off)); 89 is(guess_encoding($test)->name, $name, "CJKT:$name"); 90} 91 92my $ambiguous = "\x{5c0f}\x{98fc}\x{5f3e}"; 93my $english = "The quick brown fox jumps over the black lazy dog."; 94for my $utf (qw/UTF-16 UTF-32/){ 95 for my $bl (qw/BE LE/){ 96 my $test = encode("$utf$bl" => $english); 97 is(guess_encoding($test)->name, "$utf$bl", "$utf$bl"); 98 } 99} 100for my $bl (qw/BE LE/){ 101 my $test = encode("UTF-16$bl" => $ambiguous); 102 my $result = guess_encoding($test); 103 ok(! ref($result), "UTF-16$bl:$result"); 104} 105 106 107 108Encode::Guess->set_suspects(); 109for my $jp (@jp){ 110 # intentionally set $1 a priori -- see Changes 111 my $test = "English"; 112 '$1' =~ m/^(.*)/o; 113 is(guess_encoding($test, ($jp))->name, 'ascii', 114 "ascii vs $jp (\$1 messed)"); 115 $test = encode($jp, $test . "\n\x{65e5}\x{672c}\x{8a9e}"); 116 is(guess_encoding($test, ($jp))->name, 117 $jp, "$jp vs ascii (\$1 messed)"); 118} 119 120__END__; 121