1#! /usr/local/bin/perl -w 2 3# vim: syntax=perl 4# vim: tabstop=4 5 6use strict; 7 8use Test; 9 10BEGIN { 11 plan tests => 5; 12} 13 14use Locale::Recode; 15 16sub int2utf8; 17 18my $codes = {}; 19foreach (0 .. 0xcfff 20 # 0 .. 0x11_000, 21 # 0x10_000 .. 0x11_000, 22 # 0x200_000 .. 0x201_000, # :-( # Not supported by Perl 5.6 23 # 0x4_000_000 .. 0x4_001_000, # :-( # Not supported by Perl 5.6 24 ) { 25 $codes->{$_} = int2utf8 $_; 26} 27 28my $cd_int = Locale::Recode->new (from => 'UTF-8', 29 to => 'INTERNAL'); 30ok !$cd_int->getError; 31 32my $cd_rev = Locale::Recode->new (from => 'INTERNAL', 33 to => 'UTF-8'); 34ok !$cd_rev->getError; 35 36# Convert into internal representation. 37my $result_int = 1; 38while (my ($ucs4, $outbuf) = each %$codes) { 39 my $result = $cd_int->recode ($outbuf); 40 unless ($result && $outbuf->[0] == $ucs4) { 41 $result_int = 0; 42 last; 43 } 44} 45ok $result_int; 46 47# Convert from internal representation. 48my $result_rev = 1; 49if (1) { 50 # FIXME: This test only succeeds with use bytes in Perl >= 5.8.0. 51 # However, this will fail with Perl <= Perl 5.6.0. :-( 52 # FIXME: Is it really fixed now? 53while (my ($ucs4, $code) = each %$codes) { 54 my $outbuf = [ $ucs4 ]; 55 my $result = $cd_rev->recode ($outbuf); 56 unless ($result && $code eq $outbuf) { 57 $result_rev = 0; 58 last; 59 } 60} 61} 62ok $result_rev; 63 64# Check handling of unknown characters. This assumes that the 65# character set is a subset of US-ASCII. 66my $test_string1 = "\xffSupergirl\xff"; 67$cd_rev = Locale::Recode->new (from => 'ASCII', 68 to => 'UTF-8', 69 ); 70$result_rev = $cd_rev->recode ($test_string1); 71ok $result_rev && $test_string1 eq "�Supergirl�"; 72 73sub int2utf8 74{ 75 my $ucs4 = shift; 76 77 if ($ucs4 <= 0x7f) { 78 return chr $ucs4; 79 } elsif ($ucs4 <= 0x7ff) { 80 return pack ("C2", 81 (0xc0 | (($ucs4 >> 6) & 0x1f)), 82 (0x80 | ($ucs4 & 0x3f))); 83 } elsif ($ucs4 <= 0xffff) { 84 return pack ("C3", 85 (0xe0 | (($ucs4 >> 12) & 0xf)), 86 (0x80 | (($ucs4 >> 6) & 0x3f)), 87 (0x80 | ($ucs4 & 0x3f))); 88 } elsif ($ucs4 <= 0x1fffff) { 89 return pack ("C4", 90 (0xf0 | (($ucs4 >> 18) & 0x7)), 91 (0x80 | (($ucs4 >> 12) & 0x3f)), 92 (0x80 | (($ucs4 >> 6) & 0x3f)), 93 (0x80 | ($ucs4 & 0x3f))); 94 } elsif ($ucs4 <= 0x3ffffff) { 95 return pack ("C5", 96 (0xf0 | (($ucs4 >> 24) & 0x3)), 97 (0x80 | (($ucs4 >> 18) & 0x3f)), 98 (0x80 | (($ucs4 >> 12) & 0x3f)), 99 (0x80 | (($ucs4 >> 6) & 0x3f)), 100 (0x80 | ($ucs4 & 0x3f))); 101 } else { 102 return pack ("C6", 103 (0xf0 | (($ucs4 >> 30) & 0x3)), 104 (0x80 | (($ucs4 >> 24) & 0x1)), 105 (0x80 | (($ucs4 >> 18) & 0x3f)), 106 (0x80 | (($ucs4 >> 12) & 0x3f)), 107 (0x80 | (($ucs4 >> 6) & 0x3f)), 108 (0x80 | ($ucs4 & 0x3f))); 109 } 110} 111 112# Local Variables: 113# mode: perl 114# perl-indent-level: 4 115# perl-continued-statement-offset: 4 116# perl-continued-brace-offset: 0 117# perl-brace-offset: -4 118# perl-brace-imaginary-offset: 0 119# perl-label-offset: -4 120# cperl-indent-level: 4 121# cperl-continued-statement-offset: 2 122# tab-width: 4 123# End: 124 125