1#!perl -w 2 3use strict; 4use Test::More; 5use Encode; 6 7# Bug in Encode, non chars are rejected 8use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed 9 utf8_to_utf16 utf8_to_utf16_reversed); 10 11for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0xFFFD, 12 0x10000, 0x10FC00, 0x103FF, 0x10FFFD) { 13 my $chr = chr $ord; 14 for my $prefix ('', "\0", 'Perl rules') { 15 for my $suffix ('', "\0", "Moo!") { 16 my $string = $prefix . $chr . $suffix; 17 my $name = sprintf "for chr $ord prefix %d, suffix %d", 18 length $prefix, length $suffix; 19 my $as_utf8 = $string; 20 utf8::encode($as_utf8); 21 my $be_16 = encode('UTF-16BE', $string); 22 my $le_16 = encode('UTF-16LE', $string); 23 is(utf16_to_utf8($be_16), $as_utf8, "utf16_to_utf8 $name"); 24 is(utf8_to_utf16($as_utf8), $be_16, "utf8_to_utf16 $name"); 25 is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, 26 "utf16_to_utf8_reversed $name"); 27 is(utf8_to_utf16_reversed($as_utf8), $le_16, 28 "utf8_to_utf16_reversed $name"); 29 } 30 } 31} 32 33foreach ("\0", 'N', 'Perl rules!') { 34 my $length = length $_; 35 my $got = eval {utf16_to_utf8($_)}; 36 like($@, qr/^panic: utf16_to_utf8: odd bytelen $length at/, 37 "Odd byte length panics for '$_'"); 38 is($got, undef, 'hence eval returns undef'); 39} 40 41for (["\xD8\0\0\0", 'NULs'], 42 ["\xD8\0\xD8\0", '2 Lows'], 43 ["\xDC\0\0\0", 'High NUL'], 44 ["\xDC\0\xD8\0", 'High Low'], 45 ["\xDC\0\xDC\0", 'High High'], 46 ) { 47 my ($malformed, $name) = @$_; 48 my $got = eval {utf16_to_utf8($malformed)}; 49 like($@, qr/^Malformed UTF-16 surrogate at/, 50 "Malformed surrogate $name croaks for utf16_to_utf8"); 51 is($got, undef, 'hence eval returns undef'); 52 53 $malformed =~ s/(.)(.)/$2$1/gs; 54 $got = eval {utf16_to_utf8_reversed($malformed)}; 55 like($@, qr/^Malformed UTF-16 surrogate at/, 56 "Malformed surrogate $name croaks for utf16_to_utf8_reversed"); 57 is($got, undef, 'hence eval returns undef'); 58} 59 60my $in = "NA"; 61my $got = eval {utf16_to_utf8_reversed($in, 1)}; 62like($@, qr/^panic: utf16_to_utf8_reversed: odd bytelen 1 at/, 63 'Odd byte length panics'); 64is($got, undef, 'hence eval returns undef'); 65is($in, "NA", 'and input unchanged'); 66 67$in = "\xD8\0\xDC\0"; 68$got = eval {utf16_to_utf8($in, 2)}; 69like($@, qr/^Malformed UTF-16 surrogate at/, 'Lone surrogate croaks'); 70(ok(!defined $got, 'hence eval returns undef')) or 71 diag(join ', ', map {ord $_} split //, $got); 72 73{ # This example is published by Unicode, so verifies we aren't just 74 # internally consistent; we conform to the Standard 75 my $utf16_of_U10302 = utf8_to_utf16(chr 0x10302); 76 is(substr($utf16_of_U10302, 0, 1), chr 0xD8); 77 is(substr($utf16_of_U10302, 1, 1), chr 0x00); 78 is(substr($utf16_of_U10302, 2, 1), chr 0xDF); 79 is(substr($utf16_of_U10302, 3, 1), chr 0x02); 80 81 $utf16_of_U10302 = utf8_to_utf16_reversed(chr 0x10302); 82 is(substr($utf16_of_U10302, 0, 1), chr 0x00); 83 is(substr($utf16_of_U10302, 1, 1), chr 0xD8); 84 is(substr($utf16_of_U10302, 2, 1), chr 0x02); 85 is(substr($utf16_of_U10302, 3, 1), chr 0xDF); 86} 87 88done_testing; 89