1BEGIN { 2 if ($ENV{'PERL_CORE'}){ 3 chdir 't'; 4 unshift @INC, '../lib'; 5 } 6 require Config; import Config; 7 if ($Config{'extensions'} !~ /\bEncode\b/) { 8 print "1..0 # Skip: Encode was not built\n"; 9 exit 0; 10 } 11 $| = 1; 12} 13 14use strict; 15use utf8; 16use Test::More tests => 780; 17use Encode; 18use Encode::GSM0338; 19 20# The specification of GSM 03.38 is not awfully clear. 21# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT) 22# The various combinations of 0x00 and 0x1B as leading bytes 23# are unclear, as is the semantics of those bytes as standalone 24# or as final single bytes. 25 26 27my $chk = Encode::LEAVE_SRC(); 28 29# escapes 30# see http://www.csoft.co.uk/sms/character_sets/gsm.htm 31my %esc_seq = ( 32 "\x{20ac}" => "\x1b\x65", 33 "\x0c" => "\x1b\x0A", 34 "[" => "\x1b\x3C", 35 "\\" => "\x1b\x2F", 36 "]" => "\x1b\x3E", 37 "^" => "\x1b\x14", 38 "{" => "\x1b\x28", 39 "|" => "\x1b\x40", 40 "}" => "\x1b\x29", 41 "~" => "\x1b\x3D", 42); 43 44my %unesc_seq = reverse %esc_seq; 45 46 47sub eu{ 48 $_[0] =~ /[\x00-\x1f]/ ? 49 sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]); 50 51} 52 53for my $c ( map { chr } 0 .. 127 ) { 54 my $u = $Encode::GSM0338::GSM2UNI{$c}; 55 56 # default character set 57 is decode( "gsm0338", $c, $chk ), $u, 58 sprintf( "decode \\x%02X", ord($c) ); 59 eval { decode( "gsm0338", $c . "\xff", $chk ) }; 60 ok( $@, $@ ); 61 is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) ); 62 eval { encode( "gsm0338", $u . "\x{3000}", $chk ) }; 63 ok( $@, $@ ); 64 65 # nasty atmark 66 if ( $c eq "\x00" ) { 67 is decode( "gsm0338", "\x00" . $c, $chk ), "\x00", 68 sprintf( '@@ =>: \x00+\x%02X', ord($c) ); 69 } 70 else { 71 is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ), 72 sprintf( '@: decode \x00+\x%02X', ord($c) ); 73 } 74 75 # escape seq. 76 my $ecs = "\x1b" . $c; 77 if ( $unesc_seq{$ecs} ) { 78 is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs}, 79 sprintf( "ESC: decode ESC+\\x%02X", ord($c) ); 80 is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs, 81 sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) ); 82 } 83 else { 84 is decode( "gsm0338", $ecs, $chk ), 85 "\xA0" . decode( "gsm0338", $c ), 86 sprintf( "decode ESC+\\x%02X", ord($c) ); 87 } 88} 89 90# https://rt.cpan.org/Ticket/Display.html?id=75670 91is decode("gsm0338", "\x09") => chr(0xC7), 'RT75670: decode'; 92is encode("gsm0338", chr(0xC7)) => "\x09", 'RT75670: encode'; 93 94__END__ 95for my $c (map { chr } 0..127){ 96 my $b = "\x1b$c"; 97 my $u = $Encode::GSM0338::GSM2UNI{$b}; 98 next unless $u; 99 $u ||= "\xA0" . $Encode::GSM0338::GSM2UNI{$c}; 100 is decode("gsm0338", $b), $u, sprintf("decode ESC+\\x%02X", ord($c) ); 101} 102 103__END__ 104# old test follows 105ub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) } 106 107# t("\x00", "\x00"); # ??? 108 109# "Round-trip". 110t("\x41", "\x41"); 111 112t("\x01", "\xA3"); 113t("\x02", "\x24"); 114t("\x03", "\xA5"); 115t("\x09", "\xE7"); 116 117t("\x00\x00", "\x00\x00"); # Maybe? 118t("\x00\x1B", "\x40\xA0"); # Maybe? 119t("\x00\x41", "\x40\x41"); 120 121# t("\x1B", "\x1B"); # ??? 122 123# Escape with no special second byte is just a NBSP. 124t("\x1B\x41", "\xA0\x41"); 125 126t("\x1B\x00", "\xA0\x40"); # Maybe? 127 128# Special escape characters. 129t("\x1B\x0A", "\x0C"); 130t("\x1B\x14", "\x5E"); 131t("\x1B\x28", "\x7B"); 132t("\x1B\x29", "\x7D"); 133t("\x1B\x2F", "\x5C"); 134t("\x1B\x3C", "\x5B"); 135t("\x1B\x3D", "\x7E"); 136t("\x1B\x3E", "\x5D"); 137t("\x1B\x40", "\x7C"); 138t("\x1B\x40", "\x7C"); 139t("\x1B\x65", "\x{20AC}"); 140