1#!./perl 2 3# Checks if the parser behaves correctly in edge cases 4# (including weird syntax errors) 5 6BEGIN { 7 chdir 't' if -d 't'; 8 require './test.pl'; 9 require './charset_tools.pl'; 10 skip_all_without_unicode_tables(); 11} 12 13plan (tests => 58); 14 15use utf8; 16use open qw( :utf8 :std ); 17 18is *tèst, "*main::tèst", "sanity check."; 19ok $::{"tèst"}, "gets the right glob in the stash."; 20 21my $glob_by_sub = sub { *main::method }->(); 22 23is *main::method, "*main::method", "glob stringy works"; 24is "" . *main::method, "*main::method", "glob stringify-through-concat works"; 25is $glob_by_sub, "*main::method", "glob stringy works"; 26is "" . $glob_by_sub, "*main::method", ""; 27 28sub gimme_glob { 29 no strict 'refs'; 30 is *{$_[0]}, "*main::$_[0]"; 31 *{$_[0]}; 32} 33 34is "" . gimme_glob("下郎"), "*main::下郎"; 35$a = *下郎; 36is "" . $a, "*main::下郎"; 37 38*{gimme_glob("下郎")} = sub {}; 39 40{ 41 ok defined *{"下郎"}{CODE}; 42 ok !defined *{"\344\270\213\351\203\216"}{CODE}; 43} 44 45$Lèon = 1; 46is ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,"; 47ok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one."; 48 49my $a = "foo" . chr(190); 50my $b = $a . chr(256); 51chop $b; # $b is $a with utf8 on 52 53is $a, $b, '$a equals $b'; 54 55*$b = sub { 5 }; 56 57is eval { main->$a }, 5, q!$a can call $b's sub!; 58ok !$@, "..and there's no error."; 59 60my $c = $b; 61utf8::encode($c); 62ok $b ne $c, '$b unequal $c'; 63eval { main->$c }; 64ok $@, q!$c can't call $b's sub.!; 65 66# Now define another sub under the downgraded name: 67*$a = sub { 6 }; 68# Call it: 69is eval { main->$a }, 6, "Adding a new sub to *a and calling it works,"; 70ok !$@, "..without errors."; 71eval { main->$c }; 72ok $@, "but it's still unreachable through *c"; 73 74*$b = \10; 75is ${*$a{SCALAR}}, 10; 76is ${*$b{SCALAR}}, 10; 77is ${*$c{SCALAR}}, undef; 78 79opendir FÒÒ, "."; 80closedir FÒÒ; 81::ok($::{"FÒÒ"}, "Bareword generates the right glob."); 82::ok(!$::{"F\303\222\303\222"}); 83 84sub участники { 1 } 85 86ok $::{"участники"}, "non-const sub declarations generate the right glob"; 87is $::{"участники"}->(), 1; 88 89sub 原 () { 1 } 90 91is grep({ $_ eq "\x{539f}" } keys %::), 1, "Constant subs generate the right glob."; 92is grep({ $_ eq "\345\216\237" } keys %::), 0; 93 94#These should probably go elsewhere. 95eval q{ sub wròng1 (_$); wròng1(1,2) }; 96like( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' ); 97 98eval q{ sub ча::ики ($__); ча::ики(1,2) }; 99like( $@, qr/Malformed prototype for ча::ики/ ); 100 101our $問 = 10; 102is $問, 10, "our works"; 103is $main::問, 10, "...as does getting the same variable through the fully qualified name"; 104is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't"; 105 106{ 107 use charnames qw( :full ); 108 109 eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !; 110 $@ =~ s/eval \d+/eval 11/; 111 is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after my $ニ <-- HERE near column 8 at (eval 11) line 1. 112', "'Unrecognized character' croak is UTF-8 clean"; 113 114 eval "q\0foobar\0 \x{FFFF}+1"; 115 $@ =~ s/eval \d+/eval 11/; 116 is( 117 $@, 118 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n", 119 "...and nul-clean" 120 ); 121 122 { 123 use re 'eval'; 124 my $f = qq{(?{\$ネ+ 1; \x{1F42A} })}; 125 eval { "a" =~ /^a$f/ }; 126 my $e = $@; 127 $e =~ s/eval \d+/eval 11/; 128 is( 129 $e, 130 "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n", 131 "Messages from a re-eval are UTF-8 clean" 132 ); 133 134 $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })}; 135 eval { "a" =~ /^a$f/ }; 136 my $e = $@; 137 $e =~ s/eval \d+/eval 11/; 138 is( 139 $e, 140 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n", 141 "...and nul-clean" 142 ); 143 } 144 145 { 146 eval qq{\$ネ+ 1; \x{1F42A}}; 147 $@ =~ s/eval \d+/eval 11/; 148 is( 149 $@, 150 "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n", 151 "Unrecognized character error doesn't cut off in the middle of characters" 152 ) 153 } 154 155} 156 157{ 158 use feature 'state'; 159 for ( qw( my state our ) ) { 160 local $@; 161 eval "$_ Foo $x = 1;"; 162 like $@, qr/No such class Foo/u, "'No such class' warning for $_ is UTF-8 clean"; 163 } 164} 165 166{ 167 local $@; 168 eval "our \$main::\x{30cb};"; 169 like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean"; 170} 171 172{ 173 use feature 'state'; 174 local $@; 175 for ( qw( my state ) ) { 176 eval "$_ \$::\x{30cb};"; 177 like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!; 178 } 179} 180 181{ 182 local $@; 183 eval qq!print \x{30cb}, "comma""!; 184 like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles."; 185} 186 187# tests for "Bad name" 188eval q{ Foo::$bar }; 189like( $@, qr/Bad name after Foo::/, 'Bad name after Foo::' ); 190eval q{ Foo''bar }; 191like( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' ); 192 193{ 194 no warnings 'utf8'; 195 local $SIG{__WARN__} = sub { }; # The eval will also output a warning, 196 # which we ignore 197 my $malformed_to_be = ($::IS_EBCDIC) # Overlong sequence 198 ? "\x{74}\x{41}" 199 : "\x{c0}\x{a0}"; 200 CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\""; 201 like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}'); 202} 203 204# RT# 124216: Perl_sv_clear: Assertion 205# If a parsing error occurred during a forced token within an interpolated 206# context, the stack unwinding failed to restore PL_lex_defer and so after 207# error recovery the state restored after the forced token was processed 208# was the wrong one, resulting in the lexer thinking we're still inside a 209# quoted string and things getting freed multiple times. 210# 211# The \x{3030} char isn't a legal var name, and this triggers the error. 212# 213# NB: this only failed if the closing quote of the interpolated string is 214# the last char of the file (i.e. no trailing \n). 215 216{ 217 my $bad = "\x{3030}"; 218 # Write out the individual utf8 bytes making up \x{3030}. This 219 # avoids 'Wide char in print' warnings from test.pl. (We may still 220 # get that warning when compiling the prog itself, since the 221 # error it prints to stderr contains a wide char.) 222 utf8::encode($bad); 223 224 fresh_perl_like(qq{use utf8; "\$$bad"}, 225 qr/ 226 \A 227 ( \QWide character in print at - line 1.\E\n )? 228 \Qsyntax error at - line 1, near \E"\$.*"\n 229 \QExecution of - aborted due to compilation errors.\E\z 230 /xm, 231 232 {stderr => 1}, "RT# 124216"); 233} 234 235SKIP: { 236 237 use Config; 238 if ($Config{uvsize} < 8) { 239 skip("test is only valid on 64-bit ints", 4); 240 } 241 else { 242 my $a; 243 my $b; 244 245 # This caused a memory fault [perl #128738] 246 $b = byte_utf8a_to_utf8n("\xFE\x82\x80\x80\x80\x80\x80"); # 0x80000000 247 eval "\$a = q ${b}abc${b}"; 248 is $@, "", 249 "No errors in eval'ing a string with large code point delimiter"; 250 is $a, 'abc', 251 "Got expected result in eval'ing a string with a large code point" 252 . " delimiter"; 253 254 $b = byte_utf8a_to_utf8n("\xFE\x83\xBF\xBF\xBF\xBF\xBF"); # 0xFFFFFFFF 255 eval "\$a = q ${b}Hello, \\\\whirled!${b}"; 256 is $@, "", 257 "No errors in eval'ing a string with large code point delimiter"; 258 is $a, 'Hello, \whirled!', 259 "Got expected result in eval'ing a string with a large code point" 260 . " delimiter"; 261 } 262} 263 264fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)'); 265use utf8; 266eval "sort \x{100}%"; 267die $@; 268EOS 269syntax error at (eval 1) line 1, at EOF 270EXPECT 271 272# New tests go here ^^^^^ 273 274# Keep this test last, as it will mess up line number reporting for any 275# subsequent tests. 276 277<<END; 278${ 279#line 57 280qq ϟϟ } 281END 282is __LINE__, 59, '#line directive and qq with uni delims inside heredoc'; 283 284# Put new tests above the line number tests. 285