1#!perl -w
2
3use strict;
4use Test::More;
5use Encode;
6
7# Bug in Encode, non chars are rejected
8use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed
9                   utf8_to_utf16 utf8_to_utf16_reversed);
10
11for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD,
12	     0x10000, 0x10FC00, 0x103FF, 0x10FFFD) {
13    my $chr = chr $ord;
14    for my $prefix ('', "\0", 'Perl rules') {
15	for my $suffix ('', "\0", "Moo!") {
16	    my $string = $prefix . $chr . $suffix;
17	    my $name = sprintf "for chr $ord prefix %d, suffix %d",
18		               length $prefix, length $suffix;
19	    my $as_utf8 = $string;
20	    utf8::encode($as_utf8);
21            my $be_16 = encode('UTF-16BE', $string);
22            my $le_16 = encode('UTF-16LE', $string);
23	    is(utf16_to_utf8($be_16), $as_utf8, "utf16_to_utf8 $name");
24	    is(utf8_to_utf16($as_utf8), $be_16, "utf8_to_utf16 $name");
25	    is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8,
26	       "utf16_to_utf8_reversed $name");
27	    is(utf8_to_utf16_reversed($as_utf8), $le_16,
28               "utf8_to_utf16_reversed $name");
29	}
30    }
31}
32
33foreach ("\0", 'N', 'Perl rules!') {
34    my $length = length $_;
35    my $got = eval {utf16_to_utf8($_)};
36    like($@, qr/^panic: utf16_to_utf8: odd bytelen $length at/,
37	 "Odd byte length panics for '$_'");
38    is($got, undef, 'hence eval returns undef');
39}
40
41for (["\xD8\0\0\0", 'NULs'],
42     ["\xD8\0\xD8\0", '2 Lows'],
43     ["\xDC\0\0\0", 'High NUL'],
44     ["\xDC\0\xD8\0", 'High Low'],
45     ["\xDC\0\xDC\0", 'High High'],
46    ) {
47    my ($malformed, $name) = @$_;
48    my $got = eval {utf16_to_utf8($malformed)};
49    like($@, qr/^Malformed UTF-16 surrogate at/,
50	 "Malformed surrogate $name croaks for utf16_to_utf8");
51    is($got, undef, 'hence eval returns undef');
52
53    $malformed =~ s/(.)(.)/$2$1/gs;
54    $got = eval {utf16_to_utf8_reversed($malformed)};
55    like($@, qr/^Malformed UTF-16 surrogate at/,
56	 "Malformed surrogate $name croaks for utf16_to_utf8_reversed");
57    is($got, undef, 'hence eval returns undef');
58}
59
60my $in = "NA";
61my $got = eval {utf16_to_utf8_reversed($in, 1)};
62like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/,
63     'Odd byte length panics');
64is($got, undef, 'hence eval returns undef');
65is($in, "NA", 'and input unchanged');
66
67$in = "\xD8\0\xDC\0";
68$got = eval {utf16_to_utf8($in, 2)};
69like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks');
70(ok(!defined $got, 'hence eval returns undef')) or
71    diag(join ', ', map {ord $_} split //, $got);
72
73{   # This example is published by Unicode, so verifies we aren't just
74    # internally consistent; we conform to the Standard
75    my $utf16_of_U10302 = utf8_to_utf16(chr 0x10302);
76    is(substr($utf16_of_U10302, 0, 1), chr 0xD8);
77    is(substr($utf16_of_U10302, 1, 1), chr 0x00);
78    is(substr($utf16_of_U10302, 2, 1), chr 0xDF);
79    is(substr($utf16_of_U10302, 3, 1), chr 0x02);
80
81    $utf16_of_U10302 = utf8_to_utf16_reversed(chr 0x10302);
82    is(substr($utf16_of_U10302, 0, 1), chr 0x00);
83    is(substr($utf16_of_U10302, 1, 1), chr 0xD8);
84    is(substr($utf16_of_U10302, 2, 1), chr 0x02);
85    is(substr($utf16_of_U10302, 3, 1), chr 0xDF);
86}
87
88done_testing;
89