1#! /usr/local/bin/perl -w
2
3# vim: syntax=perl
4# vim: tabstop=4
5
6use strict;
7
8use Test;
9
10BEGIN {
11	plan tests => 5;
12}
13
14use Locale::Recode;
15
16sub int2utf8;
17
18my $codes = {};
19foreach (0 .. 0xcfff
20		 # 0 .. 0x11_000,
21		 # 0x10_000 .. 0x11_000,
22	     # 0x200_000 .. 0x201_000,      # :-(  # Not supported by Perl 5.6
23	     # 0x4_000_000 .. 0x4_001_000,  # :-(  # Not supported by Perl 5.6
24         ) {
25	$codes->{$_} = int2utf8 $_;
26}
27
28my $cd_int = Locale::Recode->new (from => 'UTF-8',
29			     		  		 to => 'INTERNAL');
30ok !$cd_int->getError;
31
32my $cd_rev = Locale::Recode->new (from => 'INTERNAL',
33								 to => 'UTF-8');
34ok !$cd_rev->getError;
35
36# Convert into internal representation.
37my $result_int = 1;
38while (my ($ucs4, $outbuf) = each %$codes) {
39	my $result = $cd_int->recode ($outbuf);
40	unless ($result && $outbuf->[0] == $ucs4) {
41		$result_int = 0;
42		last;
43	}
44}
45ok $result_int;
46
47# Convert from internal representation.
48my $result_rev = 1;
49if (1) {
50	# FIXME: This test only succeeds with use bytes in Perl >= 5.8.0.
51	# However, this will fail with Perl <= Perl 5.6.0. :-(
52	# FIXME: Is it really fixed now?
53while (my ($ucs4, $code) = each %$codes) {
54    my $outbuf = [ $ucs4 ];
55    my $result = $cd_rev->recode ($outbuf);
56    unless ($result && $code eq $outbuf) {
57        $result_rev = 0;
58        last;
59    }
60}
61}
62ok $result_rev;
63
64# Check handling of unknown characters.  This assumes that the
65# character set is a subset of US-ASCII.
66my $test_string1 = "\xffSupergirl\xff";
67$cd_rev = Locale::Recode->new (from => 'ASCII',
68							   to => 'UTF-8',
69							  );
70$result_rev = $cd_rev->recode ($test_string1);
71ok $result_rev && $test_string1 eq "�Supergirl�";
72
73sub int2utf8
74{
75    my $ucs4 = shift;
76
77    if ($ucs4 <= 0x7f) {
78		return chr $ucs4;
79    } elsif ($ucs4 <= 0x7ff) {
80		return pack ("C2",
81			(0xc0 | (($ucs4 >> 6) & 0x1f)),
82			(0x80 | ($ucs4 & 0x3f)));
83    } elsif ($ucs4 <= 0xffff) {
84		return pack ("C3",
85			(0xe0 | (($ucs4 >> 12) & 0xf)),
86			(0x80 | (($ucs4 >> 6) & 0x3f)),
87			(0x80 | ($ucs4 & 0x3f)));
88    } elsif ($ucs4 <= 0x1fffff) {
89		return pack ("C4",
90			(0xf0 | (($ucs4 >> 18) & 0x7)),
91			(0x80 | (($ucs4 >> 12) & 0x3f)),
92			(0x80 | (($ucs4 >> 6) & 0x3f)),
93			(0x80 | ($ucs4 & 0x3f)));
94    } elsif ($ucs4 <= 0x3ffffff) {
95		return pack ("C5",
96			(0xf0 | (($ucs4 >> 24) & 0x3)),
97			(0x80 | (($ucs4 >> 18) & 0x3f)),
98			(0x80 | (($ucs4 >> 12) & 0x3f)),
99			(0x80 | (($ucs4 >> 6) & 0x3f)),
100			(0x80 | ($ucs4 & 0x3f)));
101    } else {
102		return pack ("C6",
103			(0xf0 | (($ucs4 >> 30) & 0x3)),
104			(0x80 | (($ucs4 >> 24) & 0x1)),
105			(0x80 | (($ucs4 >> 18) & 0x3f)),
106			(0x80 | (($ucs4 >> 12) & 0x3f)),
107			(0x80 | (($ucs4 >> 6) & 0x3f)),
108			(0x80 | ($ucs4 & 0x3f)));
109    }
110}
111
112# Local Variables:
113# mode: perl
114# perl-indent-level: 4
115# perl-continued-statement-offset: 4
116# perl-continued-brace-offset: 0
117# perl-brace-offset: -4
118# perl-brace-imaginary-offset: 0
119# perl-label-offset: -4
120# cperl-indent-level: 4
121# cperl-continued-statement-offset: 2
122# tab-width: 4
123# End:
124
125