1use strict; 2use warnings; 3use Encode (); 4 5use Test::More; 6if (ord("A") != 65) { 7 # pad_scalar() requires constant input. To port this to EBCDIC would 8 # require copy, paste, and changing all the values for each code page. 9 plan skip_all => "ASCII-centric tests"; 10} 11else { 12 plan tests => 77; 13} 14 15use XS::APItest qw( fetch_pad_names pad_scalar ); 16 17local $SIG{__WARN__} = sub { warn $_[0] unless $_[0] =~ /Wide character in print at/ }; 18 19ok defined &fetch_pad_names, "sub imported"; 20ok defined &pad_scalar; 21 22my $cv = sub { 23 my $test; 24}; 25 26ok fetch_pad_names($cv), "Fetch working."; 27is ref fetch_pad_names($cv), ref [], 'Fetch returns an arrayref'; 28is @{fetch_pad_names($cv)}, 1, 'Sub has one lexical.'; 29is fetch_pad_names($cv)->[0], '$test', "Fetching a simple scalar works."; 30 31$cv = sub { 32 use utf8; 33 34 my $zest = 'invariant'; 35 my $zèst = 'latin-1'; 36 37 return [pad_scalar(1, "zèst"), pad_scalar(1, "z\350st"), pad_scalar(1, "z\303\250st")]; 38}; 39 40my $names_av = fetch_pad_names($cv); 41my $flagged = my $unflagged = "\$z\x{c3}\x{a8}st"; 42Encode::_utf8_on($flagged); 43 44general_tests( $cv->(), $names_av, { 45 results => [ 46 { cmp => 'latin-1', msg => 'Fetches through UTF-8.' }, 47 { cmp => 'latin-1', msg => 'Fetches through Latin-1.' }, 48 { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." }, 49 ], 50 pad_size => { 51 total => { cmp => 2, msg => 'Sub has two lexicals.' }, 52 utf8 => { cmp => 2, msg => 'Sub has only UTF-8 vars.' }, 53 invariant => { cmp => 0, msg => 'Sub has no invariant vars.' }, 54 }, 55 vars => [ 56 { name => '$zest', msg => 'Sub has [\$zest].', type => 'ok' }, 57 { name => "\$z\x{e8}st", msg => "Sub has [\$t\x{e8}st].", type => 'ok' }, 58 { name => $unflagged, msg => "Sub doesn't have [$unflagged].", type => 'not ok' }, 59 { name => $flagged, msg => "But does have it when flagged.", type => 'ok' }, 60 ], 61}); 62 63$cv = do { 64 my $ascii = 'Defined'; 65 sub { 66 use utf8; 67 my $партнеры = $ascii; 68 return [$партнеры, pad_scalar(1, "партнеры"), pad_scalar(1, "\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213")]; 69 }; 70}; 71 72$names_av = fetch_pad_names($cv); 73my $hex_var = "\$\x{43f}\x{430}\x{440}\x{442}\x{43d}\x{435}\x{440}\x{44b}"; 74$flagged = $unflagged = "\$\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213"; 75Encode::_utf8_on($flagged); 76 77my $russian_var = do { 78 use utf8; 79 '$партнеры'; 80}; 81 82general_tests( $cv->(), $names_av, { 83 results => [ 84 { cmp => 'Defined', msg => 'UTF-8 fetching works.' }, 85 { cmp => 'Defined', msg => 'pad_scalar fetch.' }, 86 { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." }, 87 ], 88 pad_size => { 89 total => { cmp => 2, msg => 'Sub has two lexicals, including those it closed over.' }, 90 utf8 => { cmp => 2, msg => 'UTF-8 in the pad.' }, 91 invariant => { cmp => 0, msg => '' }, 92 }, 93 vars => [ 94 { name => '$ascii', msg => 'Sub has [$ascii].', type => 'ok' }, 95 { name => $russian_var, msg => "Sub has [$russian_var].", type => 'ok' }, 96 { name => $hex_var, msg => "Sub has [$hex_var].", type => 'ok' }, 97 { name => $unflagged, msg => "Sub doesn't have [$unflagged]", type => 'not ok' }, 98 { name => $flagged, msg => "But does have it when flagged.", type => 'ok' }, 99 ], 100}); 101 102my $leon1 = "\$L\x{e9}on"; 103my $leon2 = my $leon3 = "\$L\x{c3}\x{a9}on"; 104Encode::_utf8_on($leon2); 105 106local $@; 107$cv = eval <<"END"; 108 sub { 109 use utf8; 110 my \$Leon = 'Invariant'; 111 my $leon1 = 'Latin-1'; 112 return [ \$Leon, $leon1, $leon2, pad_scalar(1, "L\x{e9}on"), pad_scalar(1, "L\x{c3}\x{a9}on")]; 113 }; 114END 115 116my $err = $@; 117ok !$err, $@; 118 119$names_av = fetch_pad_names($cv); 120 121general_tests( $cv->(), $names_av, { 122 results => [ 123 { cmp => 'Invariant', msg => '' }, 124 { cmp => 'Latin-1', msg => "Fetched through [$leon1]" }, 125 { cmp => 'Latin-1', msg => "Fetched through [$leon2]" }, 126 { cmp => 'Latin-1', msg => 'pad_scalar fetch.' }, 127 { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." }, 128 ], 129 pad_size => { 130 total => { cmp => 2, msg => 'Sub has two lexicals' }, 131 utf8 => { cmp => 2, msg => 'Latin-1 got upgraded to UTF-8.' }, 132 invariant => { cmp => 0, msg => '' }, 133 }, 134 vars => [ 135 { name => '$Leon', msg => 'Sub has [$Leon].', type => 'ok' }, 136 { name => $leon1, msg => "Sub has [$leon1].", type => 'ok' }, 137 { name => $leon2, msg => "Sub has [$leon2].", type => 'ok' }, 138 { name => $leon3, msg => "Sub doesn't have [$leon3]", type => 'not ok' }, 139 ], 140}); 141 142 143{ 144 use utf8; 145 my $Cèon = 4; 146 my $str1 = "\$C\x{e8}on"; 147 my $str2 = my $str3 = "\$C\x{c3}\x{a8}on"; 148 Encode::_utf8_on($str2); 149 150 local $@; 151 $cv = eval <<"END_EVAL"; 152 sub { [ \$Cèon, $str1, $str2 ] }; 153END_EVAL 154 155 $err = $@; 156 ok !$err; 157 158 $names_av = fetch_pad_names($cv); 159 160 general_tests( $cv->(), $names_av, { 161 results => [ ({ SKIP => 1 }) x 3 ], 162 pad_size => { 163 total => { cmp => 1, msg => 'Sub has one lexical, which it closed over.' }, 164 utf8 => { cmp => 1, msg => '' }, 165 invariant => { cmp => 0, msg => '' }, 166 }, 167 vars => [ 168 { name => '$Ceon', msg => "Sub doesn't have [\$Ceon].", type => 'not ok' }, 169 map({ { name => $_, msg => "Sub has [$_].", type => 'ok' } } $str1, $str2 ), 170 { name => $str3, msg => "Sub doesn't have [$str3]", type => 'not ok' }, 171 ], 172 }); 173 174} 175 176$cv = sub { 177 use utf8; 178 our $戦国 = 10; 179 { 180 no strict 'refs'; 181 my ($symref, $encoded_sym) = (__PACKAGE__ . "::戦国") x 2; 182 utf8::encode($encoded_sym); 183 return [ $戦国, ${$symref}, ${$encoded_sym} ]; 184 } 185}; 186 187my $flagged_our = my $unflagged_our = "\$\346\210\246\345\233\275"; 188Encode::_utf8_on($flagged_our); 189 190$names_av = fetch_pad_names($cv); 191 192general_tests( $cv->(), $names_av, { 193 results => [ 194 { cmp => '10', msg => 'Fetched UTF-8 our var.' }, 195 { cmp => '10', msg => "Symref fetch of an our works." }, 196 { cmp => undef, msg => "..and using the encoded form yields undef." }, 197 ], 198 pad_size => { 199 total => { cmp => 3, msg => 'Sub has three lexicals.' }, 200 utf8 => { cmp => 3, msg => 'Japanese stored as UTF-8.' }, 201 invariant => { cmp => 0, msg => '' }, 202 }, 203 vars => [ 204 { name => "\$\x{6226}\x{56fd}", msg => "Sub has [\$\x{6226}\x{56fd}].", type => 'ok' }, 205 { name => $flagged_our, msg => "Sub has [$flagged_our].", type => 'ok' }, 206 { name => $unflagged_our, msg => "Sub doesn't have [$unflagged_our]", type => 'not ok' }, 207 ], 208}); 209 210 211{ 212 213use utf8; 214{ 215 my $test; 216 BEGIN { 217 $test = "t\x{c3}\x{a8}st"; 218 Encode::_utf8_on($test); 219 } 220 use constant test => $test; 221} 222 223$cv = sub { 224 my $tèst = 'Good'; 225 226 return [ 227 $tèst, 228 pad_scalar(1, "tèst"), #"UTF-8" 229 pad_scalar(1, "t\350st"), #"Latin-1" 230 pad_scalar(1, "t\x{c3}\x{a8}st"), #"Octal" 231 pad_scalar(1, test()), #'UTF-8 enc' 232 ]; 233}; 234 235$names_av = fetch_pad_names($cv); 236 237general_tests( $cv->(), $names_av, { 238 results => [ 239 { cmp => 'Good', msg => 'Fetched through Perl.' }, 240 { cmp => 'Good', msg => "pad_scalar: UTF-8 works." }, 241 { cmp => 'Good', msg => "pad_scalar: Latin-1 works." }, 242 { cmp => 'NOT_IN_PAD', msg => "pad_scalar: Doesn't fetch through octets." }, 243 { cmp => 'Good', msg => "pad_scalar: UTF-8-through-encoding works." }, 244 ], 245 pad_size => { 246 total => { cmp => 1, msg => 'Sub has one lexical.' }, 247 utf8 => { cmp => 1, msg => '' }, 248 invariant => { cmp => 0, msg => '' }, 249 }, 250 vars => [], 251}); 252 253} 254 255$cv = do { 256 use utf8; 257 sub { 258 my $ニコニコ = 'katakana'; 259 my $にこにこ = 'hiragana'; 260 261 return [ 262 $ニコニコ, 263 $にこにこ, 264 pad_scalar(1, "にこにこ"), 265 pad_scalar(1, "\x{306b}\x{3053}\x{306b}\x{3053}"), 266 pad_scalar(1, "\343\201\253\343\201\223\343\201\253\343\201\223"), 267 pad_scalar(1, "ニコニコ"), 268 pad_scalar(1, "\x{30cb}\x{30b3}\x{30cb}\x{30b3}"), 269 pad_scalar(1, "\343\203\213\343\202\263\343\203\213\343\202\263"), 270 ]; 271 } 272}; 273 274$names_av = fetch_pad_names($cv); 275 276general_tests( $cv->(), $names_av, { 277 results => [ 278 { cmp => 'katakana', msg => '' }, 279 { cmp => 'hiragana', msg => '' }, 280 { cmp => 'hiragana', msg => '' }, 281 { cmp => 'hiragana', msg => '' }, 282 { cmp => 'NOT_IN_PAD', msg => '' }, 283 { cmp => 'katakana', msg => '' }, 284 { cmp => 'katakana', msg => '' }, 285 { cmp => 'NOT_IN_PAD', msg => '' }, 286 ], 287 pad_size => { 288 total => { cmp => 2, msg => 'Sub has two lexicals.' }, 289 utf8 => { cmp => 2, msg => '' }, 290 invariant => { cmp => 0, msg => '' }, 291 }, 292 vars => [], 293}); 294 295{ 296 { 297 my $utf8_e; 298 BEGIN { 299 $utf8_e = "e"; 300 Encode::_utf8_on($utf8_e); 301 } 302 use constant utf8_e => $utf8_e; 303 } 304 my $e = 'Invariant'; 305 is pad_scalar(1, "e"), pad_scalar(1, utf8_e), 'Fetches the same thing, even if invariant but with differing utf8ness.'; 306} 307 308 309sub general_tests { 310 my ($results, $names_av, $tests) = @_; 311 312 for my $i (0..$#$results) { 313 next if $tests->{results}[$i]{SKIP}; 314 is $results->[$i], $tests->{results}[$i]{cmp}, $tests->{results}[$i]{msg}; 315 } 316 317 is @$names_av, $tests->{pad_size}{total}{cmp}, $tests->{pad_size}{total}{msg}; 318 is grep( Encode::is_utf8($_), @$names_av), 319 $tests->{pad_size}{utf8}{cmp}, $tests->{pad_size}{utf8}{msg}; 320 is grep( !Encode::is_utf8($_), @$names_av), $tests->{pad_size}{invariant}{cmp}, 321 $tests->{pad_size}{invariant}{msg}; 322 323 for my $var (@{$tests->{vars}}) { 324 if ($var->{type} eq 'ok') { 325 ok +(grep { $_ eq $var->{name} } @$names_av), $var->{msg}; 326 } else { 327 ok !(grep { $_ eq $var->{name} } @$names_av), $var->{msg}; 328 } 329 } 330 331} 332