1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9use Config; 10use 5.010; 11 12 13sub run_tests; 14 15$| = 1; 16 17 18BEGIN { 19 chdir 't' if -d 't'; 20 require './test.pl'; require './charset_tools.pl'; 21 set_up_inc('../lib'); 22} 23 24our @global; 25 26plan tests => 506; # Update this when adding/deleting tests. 27 28run_tests() unless caller; 29 30# test that runtime code without 'use re eval' is trapped 31 32sub norun { 33 like($@, qr/Eval-group not allowed at runtime/, @_); 34} 35 36# 37# Tests start here. 38# 39sub run_tests { 40 { 41 my $message = "Call code from qr //"; 42 local $_ = 'var="foo"'; 43 $a = qr/(?{++$b})/; 44 $b = 7; 45 ok(/$a$a/ && $b eq '9', $message); 46 47 my $c="$a"; 48 ok(/$a$a/ && $b eq '11', $message); 49 50 undef $@; 51 eval {/$c/}; 52 norun("$message norun 1"); 53 54 55 { 56 eval {/$a$c$a/}; 57 norun("$message norun 2"); 58 use re "eval"; 59 /$a$c$a/; 60 is($b, '14', $message); 61 } 62 63 our $lex_a = 43; 64 our $lex_b = 17; 65 our $lex_c = 27; 66 my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); 67 68 is($lex_res, 1, $message); 69 is($lex_a, 44, $message); 70 is($lex_c, 43, $message); 71 72 undef $@; 73 my $d = '(?{1})'; 74 my $match = eval { /$a$c$a$d/ }; 75 ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message); 76 is($b, '14', $message); 77 78 $lex_a = 2; 79 $lex_a = 43; 80 $lex_b = 17; 81 $lex_c = 27; 82 $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); 83 84 is($lex_res, 1, $message); 85 is($lex_a, 44, $message); 86 is($lex_c, 43, $message); 87 88 } 89 90 { 91 our $a = bless qr /foo/ => 'Foo'; 92 ok 'goodfood' =~ $a, "Reblessed qr // matches"; 93 is($a, '(?^:foo)', "Reblessed qr // stringifies"); 94 my $x = "\x{3fe}"; 95 my $z = my $y = byte_utf8a_to_utf8n("\317\276"); # Byte representation 96 # of $x 97 $a = qr /$x/; 98 ok $x =~ $a, "UTF-8 interpolation in qr //"; 99 ok "a$a" =~ $x, "Stringified qr // preserves UTF-8"; 100 ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8"; 101 ok "a$x" =~ /^a(??{$a})\z/, 102 "Postponed interpolation of qr // preserves UTF-8"; 103 104 105 is(length qr /##/x, 9, "## in qr // doesn't corrupt memory; Bug 17776"); 106 107 { 108 ok "$x$x" =~ /^$x(??{$x})\z/, 109 "Postponed UTF-8 string in UTF-8 re matches UTF-8"; 110 ok "$y$x" =~ /^$y(??{$x})\z/, 111 "Postponed UTF-8 string in non-UTF-8 re matches UTF-8"; 112 ok "$y$x" !~ /^$y(??{$y})\z/, 113 "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8"; 114 ok "$x$x" !~ /^$x(??{$y})\z/, 115 "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8"; 116 ok "$y$y" =~ /^$y(??{$y})\z/, 117 "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8"; 118 ok "$x$y" =~ /^$x(??{$y})\z/, 119 "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8"; 120 121 $y = $z; # Reset $y after upgrade. 122 ok "$x$y" !~ /^$x(??{$x})\z/, 123 "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8"; 124 ok "$y$y" !~ /^$y(??{$x})\z/, 125 "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; 126 } 127 } 128 129 130 { 131 # Test if $^N and $+ work in (?{}) 132 our @ctl_n = (); 133 our @plus = (); 134 our $nested_tags; 135 $nested_tags = qr{ 136 < 137 ((\w)+) 138 (?{ 139 push @ctl_n, (defined $^N ? $^N : "undef"); 140 push @plus, (defined $+ ? $+ : "undef"); 141 }) 142 > 143 (??{$nested_tags})* 144 </\s* \w+ \s*> 145 }x; 146 147 148 my $c = 0; 149 for my $test ( 150 # Test structure: 151 # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ] 152 [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ], 153 [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], 154 [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ], 155 [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ], 156 [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], 157 [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ], 158 [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ], 159 [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ], 160 [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ], 161 [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], 162 [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], 163 [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], 164 [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ], 165 166 ) { #"#silence vim highlighting 167 $c++; 168 @ctl_n = (); 169 @plus = (); 170 my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0); 171 push @ctl_n, (defined $^N ? $^N : "undef"); 172 push @plus, (defined $+ ? $+ : "undef"); 173 ok($test->[0] == $match, "match $c"); 174 if ($test->[0] != $match) { 175 # unset @ctl_n and @plus 176 @ctl_n = @plus = (); 177 } 178 is("@ctl_n", $test->[2], "ctl_n $c"); 179 is("@plus", $test->[3], "plus $c"); 180 } 181 } 182 183 { 184 our $f; 185 local $f; 186 $f = sub { 187 defined $_[0] ? $_[0] : "undef"; 188 }; 189 190 like("123", qr/^(\d)(((??{1 + $^N})))+$/, 'Bug 56194'); 191 192 our @ctl_n; 193 our @plus; 194 195 my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#; 196 my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#; 197 my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#; 198 our $re5; 199 local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#; 200 my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; 201 my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; 202 my $re8 = qr/(\d+)/; 203 my $c = 0; 204 for my $test ( 205 # Test structure: 206 # [ 207 # String to match 208 # Regex too match 209 # Expected values of $^N 210 # Expected values of $+ 211 # Expected values of $1, $2, $3, $4 and $5 212 # ] 213 [ 214 "1233", 215 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#, 216 "1 2 3 3", 217 "1 2 3 3", 218 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 219 ], 220 [ 221 "1233", 222 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#, 223 "1 2 3 3", 224 "1 2 3 3", 225 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 226 ], 227 [ 228 "1233", 229 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#, 230 "1 2 3 3", 231 "1 2 3 3", 232 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 233 ], 234 [ 235 "1233", 236 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#, 237 "1 2 3 3", 238 "1 2 3 3", 239 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 240 ], 241 [ 242 "1233", 243 qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#, 244 "1 2 3 3", 245 "1 2 3 3", 246 "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", 247 ], 248 [ 249 "123abc3", 250 qr#^($re)(|a(b)c|def)(??{$^R})$#, 251 "1 2 3 abc", 252 "1 2 3 b", 253 "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", 254 ], 255 [ 256 "123abc3", 257 qr#^($re2)$#, 258 "1 2 3 123abc3", 259 "1 2 3 b", 260 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", 261 ], 262 [ 263 "123abc3", 264 qr#^($re3)$#, 265 "1 2 123abc3", 266 "1 2 b", 267 "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", 268 ], 269 [ 270 "123abc3", 271 qr#^(??{$re5})(|abc|def)(??{"$^R"})$#, 272 "1 2 abc", 273 "1 2 abc", 274 "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef", 275 ], 276 [ 277 "123abc3", 278 qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#, 279 "1 2 abc", 280 "1 2 b", 281 "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef", 282 ], 283 [ 284 "1234", 285 qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#, 286 "1234 123 12 1 2 3 1234", 287 "1234 123 12 1 2 3 4", 288 "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4", 289 ], 290 [ 291 "1234556", 292 qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#, 293 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56", 294 "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5", 295 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56", 296 ], 297 [ 298 "12345562", 299 qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#, 300 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62", 301 "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2", 302 "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5", 303 ], 304 ) { 305 $c++; 306 @ctl_n = (); 307 @plus = (); 308 undef $^R; 309 my $match = $test->[0] =~ $test->[1]; 310 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5)); 311 push @ctl_n, $f->($^N); 312 push @plus, $f->($+); 313 ok($match, "match $c; Bug 56194"); 314 if (not $match) { 315 # unset $str, @ctl_n and @plus 316 $str = ""; 317 @ctl_n = @plus = (); 318 } 319 is("@ctl_n", $test->[2], "ctl_n $c; Bug 56194"); 320 is("@plus", $test->[3], "plus $c; Bug 56194"); 321 is($str, $test->[4], "str $c; Bug 56194"); 322 } 323 324 { 325 @ctl_n = (); 326 @plus = (); 327 328 our $re4; 329 local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#; 330 undef $^R; 331 my $match = "123abc3" =~ m/^(??{$re4})$/; 332 my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R)); 333 push @ctl_n, $f->($^N); 334 push @plus, $f->($+); 335 ok($match, 'Bug 56194'); 336 if (not $match) { 337 # unset $str 338 @ctl_n = (); 339 @plus = (); 340 $str = ""; 341 } 342 is("@ctl_n", "1 2 undef", 'Bug 56194'); 343 is("@plus", "1 2 undef", 'Bug 56194'); 344 is($str, 345 "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = 3", 346 'Bug 56194 ($^R tweaked by 121070)'); 347 } 348 { 349 undef $^R; 350 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })/; 351 is("$^R", 42, 'Bug 121070 - use of (?&Char) should not clobber $^R'); 352 "abcd"=~/(?<Char>.)(?&Char)(?{ 42 })(?{ 43 })/; 353 is("$^R", 43, 'related to 121070 - use of (?&Char) should not clobber $^R'); 354 } 355 } 356 357 { 358 # re evals within \U, \Q etc shouldn't be seen by the lexer 359 local our $a = "i"; 360 local our $B = "J"; 361 ok('(?{1})' =~ /^\Q(?{1})\E$/, '\Q(?{1})\E'); 362 ok('(?{1})' =~ /^\Q(?{\E1\}\)$/, '\Q(?{\E1\}\)'); 363 eval {/^\U(??{"$a\Ea"})$/ }; norun('^\U(??{"$a\Ea"})$ norun'); 364 eval {/^\L(??{"$B\Ea"})$/ }; norun('^\L(??{"$B\Ea"})$ norun'); 365 use re 'eval'; 366 ok('Ia' =~ /^\U(??{"$a\Ea"})$/, '^\U(??{"$a\Ea"})$'); 367 ok('ja' =~ /^\L(??{"$B\Ea"})$/, '^\L(??{"$B\Ea"})$'); 368 } 369 370 { 371 # Comprehensive (hopefully) tests of closure behaviour: 372 # i.e. when do (?{}) blocks get (re)compiled, and what instances 373 # of lexical vars do they close over? 374 375 # if the pattern string gets utf8 upgraded while concatenating, 376 # make sure a literal code block is still detected (by still 377 # compiling in the absence of use re 'eval') 378 379 { 380 my $s1 = "\x{80}"; 381 my $s2 = "\x{100}"; 382 ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade"); 383 } 384 385 my ($cr1, $cr2, $cr3, $cr4); 386 387 for my $x (qw(a b c)) { 388 my $bc = ($x ne 'a'); 389 my $c80 = chr(0x80); 390 391 # the most basic: literal code should be in same scope 392 # as the parent 393 394 ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code"); 395 ok("\x{100}$x" =~ /^\x{100}(??{$x})$/, "[$x] literal code UTF8"); 396 397 # the "don't recompile if pattern unchanged" mechanism 398 # shouldn't apply to code blocks - recompile every time 399 # to pick up new instances of variables 400 401 my $code1 = 'B(??{$x})'; 402 my $code1u = $c80 . "\x{100}" . '(??{$x})'; 403 404 eval {/^A$code1$/}; 405 norun("[$x] unvarying runtime code AA norun"); 406 eval {/^A$code1u$/}; 407 norun("[$x] unvarying runtime code AU norun"); 408 eval {/^$c80\x{100}$code1$/}; 409 norun("[$x] unvarying runtime code UA norun"); 410 eval {/^$c80\x{101}$code1u$/}; 411 norun("[$x] unvarying runtime code UU norun"); 412 413 { 414 use re 'eval'; 415 ok("AB$x" =~ /^A$code1$/, "[$x] unvarying runtime code AA"); 416 ok("A$c80\x{100}$x" =~ /^A$code1u$/, 417 "[$x] unvarying runtime code AU"); 418 ok("$c80\x{100}B$x" =~ /^$c80\x{100}$code1$/, 419 "[$x] unvarying runtime code UA"); 420 ok("$c80\x{101}$c80\x{100}$x" =~ /^$c80\x{101}$code1u$/, 421 "[$x] unvarying runtime code UU"); 422 } 423 424 # mixed literal and run-time code blocks 425 426 my $code2 = 'B(??{$x})'; 427 my $code2u = $c80 . "\x{100}" . '(??{$x})'; 428 429 eval {/^A(??{$x})-$code2$/}; 430 norun("[$x] literal+runtime AA norun"); 431 eval {/^A(??{$x})-$code2u$/}; 432 norun("[$x] literal+runtime AU norun"); 433 eval {/^$c80\x{100}(??{$x})-$code2$/}; 434 norun("[$x] literal+runtime UA norun"); 435 eval {/^$c80\x{101}(??{$x})-$code2u$/}; 436 norun("[$x] literal+runtime UU norun"); 437 438 { 439 use re 'eval'; 440 ok("A$x-B$x" =~ /^A(??{$x})-$code2$/, 441 "[$x] literal+runtime AA"); 442 ok("A$x-$c80\x{100}$x" =~ /^A(??{$x})-$code2u$/, 443 "[$x] literal+runtime AU"); 444 ok("$c80\x{100}$x-B$x" =~ /^$c80\x{100}(??{$x})-$code2$/, 445 "[$x] literal+runtime UA"); 446 ok("$c80\x{101}$x-$c80\x{100}$x" 447 =~ /^$c80\x{101}(??{$x})-$code2u$/, 448 "[$x] literal+runtime UU"); 449 } 450 451 # literal qr code only created once, naked 452 453 $cr1 //= qr/^A(??{$x})$/; 454 ok("Aa" =~ $cr1, "[$x] literal qr once naked"); 455 456 # literal qr code only created once, embedded with text 457 458 $cr2 //= qr/B(??{$x})$/; 459 ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text"); 460 461 # literal qr code only created once, embedded with text + lit code 462 463 $cr3 //= qr/C(??{$x})$/; 464 ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/, 465 "[$x] literal qr once embedded text + lit code"); 466 467 # literal qr code only created once, embedded with text + run code 468 469 $cr4 //= qr/C(??{$x})$/; 470 my $code3 = 'A(??{$x})'; 471 472 eval {/^$code3-B$cr4/}; 473 norun("[$x] literal qr once embedded text + run code norun"); 474 { 475 use re 'eval'; 476 ok("A$x-BCa" =~ /^$code3-B$cr4/, 477 "[$x] literal qr once embedded text + run code"); 478 } 479 480 # literal qr code, naked 481 482 my $r1 = qr/^A(??{$x})$/; 483 ok("A$x" =~ $r1, "[$x] literal qr naked"); 484 485 # literal qr code, embedded with text 486 487 my $r2 = qr/B(??{$x})$/; 488 ok("AB$x" =~ /^A$r2/, "[$x] literal qr embedded text"); 489 490 # literal qr code, embedded with text + lit code 491 492 my $r3 = qr/C(??{$x})$/; 493 ok("A$x-BC$x" =~ /^A(??{$x})-B$r3/, 494 "[$x] literal qr embedded text + lit code"); 495 496 # literal qr code, embedded with text + run code 497 498 my $r4 = qr/C(??{$x})$/; 499 my $code4 = '(??{$x})'; 500 501 eval {/^A$code4-B$r4/}; 502 norun("[$x] literal qr embedded text + run code"); 503 { 504 use re 'eval'; 505 ok("A$x-BC$x" =~ /^A$code4-B$r4/, 506 "[$x] literal qr embedded text + run code"); 507 } 508 509 # nested qr in different scopes 510 511 my $code5 = '(??{$x})'; 512 my $r5 = qr/C(??{$x})/; 513 514 my $r6; 515 eval {qr/$code5-C(??{$x})/}; norun("r6 norun"); 516 { 517 use re 'eval'; 518 $r6 = qr/$code5-C(??{$x})/; 519 } 520 521 my @rr5; 522 my @rr6; 523 524 for my $y (qw(d e f)) { 525 526 my $rr5 = qr/^A(??{"$x$y"})-$r5/; 527 push @rr5, $rr5; 528 ok("A$x$y-C$x" =~ $rr5, 529 "[$x-$y] literal qr + r5"); 530 531 my $rr6 = qr/^A(??{"$x$y"})-$r6/; 532 push @rr6, $rr6; 533 ok("A$x$y-$x-C$x" =~ $rr6, 534 "[$x-$y] literal qr + r6"); 535 } 536 537 for my $i (0,1,2) { 538 my $y = 'Y'; 539 my $yy = (qw(d e f))[$i]; 540 my $rr5 = $rr5[$i]; 541 ok("A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside"); 542 ok("A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})$/, 543 "[$x-$yy] literal qr + r5 + lit, outside"); 544 545 546 my $rr6 = $rr6[$i]; 547 push @rr6, $rr6; 548 ok("A$x$yy-$x-C$x" =~ $rr6, 549 "[$x-$yy] literal qr + r6, outside"); 550 ok("A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/, 551 "[$x-$yy] literal qr + r6 +lit, outside"); 552 } 553 } 554 555 # recursive subs should get lexical from the correct pad depth 556 557 sub recurse { 558 my ($n) = @_; 559 return if $n > 2; 560 ok("A$n" =~ /^A(??{$n})$/, "recurse($n)"); 561 recurse($n+1); 562 } 563 recurse(0); 564 565 # for qr// containing run-time elements but with a compile-time 566 # code block, make sure the run-time bits are executed in the same 567 # pad they were compiled in 568 { 569 my $a = 'a'; # ensure outer and inner pads don't align 570 my $b = 'b'; 571 my $c = 'c'; 572 my $d = 'd'; 573 my $r = qr/^$b(??{$c})$d$/; 574 ok("bcd" =~ $r, "qr with run-time elements and code block"); 575 } 576 577 # check that cascaded embedded regexes all see their own lexical 578 # environment 579 580 { 581 my ($r1, $r2, $r3, $r4); 582 my ($x1, $x2, $x3, $x4) = (5,6,7,8); 583 { my $x1 = 1; $r1 = qr/A(??{$x1})/; } 584 { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; } 585 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; } 586 { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; } 587 ok("A1234" =~ /^$r4$/, "cascaded qr"); 588 } 589 590 # and again, but in a loop, with no external references 591 # being maintained to the qr's 592 593 { 594 my $r = 'A'; 595 for my $x (1..4) { 596 $r = qr/$r(??{$x})/; 597 } 598 my $x = 5; 599 ok("A1234" =~ /^$r$/, "cascaded qr loop"); 600 } 601 602 603 # and again, but compiling the qrs in an eval so there 604 # aren't even refs to the qrs from any ops 605 606 { 607 my $r = 'A'; 608 for my $x (1..4) { 609 $r = eval q[ qr/$r(??{$x})/; ]; 610 } 611 my $x = 5; 612 ok("A1234" =~ /^$r$/, "cascaded qr loop"); 613 } 614 615 # have qrs with either literal code blocks or only embedded 616 # code blocks, but not both 617 618 { 619 my ($r1, $r2, $r3, $r4); 620 my ($x1, $x3) = (7,8); 621 { my $x1 = 1; $r1 = qr/A(??{$x1})/; } 622 { $r2 = qr/${r1}2/; } 623 { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; } 624 { $r4 = qr/${r3}4/; } 625 ok("A1234" =~ /^$r4$/, "cascaded qr mix 1"); 626 ok("A12345" =~ /^${r4}5$/, "cascaded qr mix 2"); 627 ok("A1234" =~ qr/^$r4$/ , "cascaded qr mix 3"); 628 ok("A12345" =~ qr/^${r4}5$/, "cascaded qr mix 4"); 629 } 630 631 # and make sure things are freed at the right time 632 { 633 sub Foo99::DESTROY { $Foo99::d++ } 634 $Foo99::d = 0; 635 my $r1; 636 { 637 my $x = bless [1], 'Foo99'; 638 $r1 = eval 'qr/(??{$x->[0]})/'; 639 } 640 my $r2 = eval 'qr/a$r1/'; 641 my $x = 2; 642 ok(eval '"a1" =~ qr/^$r2$/', "match while in scope"); 643 # make sure PL_reg_curpm isn't holding on to anything 644 "a" =~ /a(?{1})/; 645 is($Foo99::d, 0, "before scope exit"); 646 } 647 ::is($Foo99::d, 1, "after scope exit"); 648 649 # forward declared subs should Do The Right Thing with any anon CVs 650 # within them (i.e. pad_fixup_inner_anons() should work) 651 652 sub forward; 653 sub forward { 654 my $x = "a"; 655 my $A = "A"; 656 ok("Aa" =~ qr/^A(??{$x})$/, "forward qr compiletime"); 657 ok("Aa" =~ qr/^$A(??{$x})$/, "forward qr runtime"); 658 } 659 forward; 660 } 661 662 # test that run-time embedded code, when re-fed into toker, 663 # does all the right escapes 664 665 { 666 my $enc; 667 $enc = eval 'use Encode; find_encoding("ascii")' unless $::IS_EBCDIC; 668 669 my $x = 0; 670 my $y = 'bad'; 671 672 # note that most of the strings below are single-quoted, and the 673 # things within them, like '$y', *aren't* intended to interpolate 674 675 my $s1 = 676 'a\\$y(?# (??{BEGIN{$x=1} "X1"})b(?# \Ux2\E)c\'d\\\\e\\\\Uf\\\\E'; 677 678 ok(q{a$ybc'd\e\Uf\E} =~ /^$s1$/, "reparse"); 679 is($x, 0, "reparse no BEGIN"); 680 681 my $s2 = 'g\\$y# (??{{BEGIN{$x=2} "X3"}) \Ux3\E' . "\nh"; 682 683 ok(q{a$ybc'd\\e\\Uf\\Eg$yh} =~ /^$s1$s2$/x, "reparse /x"); 684 is($x, 0, "reparse /x no BEGIN"); 685 686 my $b = '\\'; 687 my $q = '\''; 688 689 # non-ascii in string as "<0xNNN>" 690 sub esc_str { 691 my $s = shift; 692 $s =~ s{(.)}{ 693 my $c = ord($1); 694 (utf8::native_to_unicode($c)< 32 695 || utf8::native_to_unicode($c) > 127) 696 ? sprintf("<0x%x>", $c) : $1; 697 }ge; 698 $s; 699 } 700 sub fmt { sprintf "hairy backslashes %s [%s] =~ /^%s/", 701 $_[0], esc_str($_[1]), esc_str($_[2]); 702 } 703 704 705 for my $u ( 706 [ '', '', 'blank ' ], 707 [ "\x{100}", '\x{100}', 'single' ], 708 [ "\x{100}", "\x{100}", 'double' ]) 709 { 710 for my $pair ( 711 [ "$b", "$b$b" ], 712 [ "$q", "$q" ], 713 [ "$b$q", "$b$b$b$q" ], 714 [ "$b$b$q", "$b$b$b$b$q" ], 715 [ "$b$b$b$q", "$b$b$b$b$b$b$q" ], 716 [ "$b$b$b$b$q","$b$b$b$b$b$b$b$b$q" ], 717 ) { 718 my ($s, $r) = @$pair; 719 $s = "9$s"; 720 my $ss = "$u->[0]$s"; 721 722 my $c = '9' . $r; 723 my $cc = "$u->[1]$c"; 724 725 ok($ss =~ /^$cc/, fmt("plain $u->[2]", $ss, $cc)); 726 727 no strict; 728 $nine = $nine = "bad"; 729 $ss = "$u->[0]\t${q}\x41${b}x42$s" if $::IS_ASCII; 730 $ss = "$u->[0]\t${q}\xC1${b}xC2$s" if $::IS_EBCDIC; 731 for my $use_qr ('', 'qr') { 732 $c = qq[(??{my \$z='{';] 733 . (($::IS_ASCII) 734 ? qq[$use_qr"$b${b}t$b$q$b${b}x41$b$b$b${b}x42"] 735 : qq[$use_qr"$b${b}t$b$q$b${b}xC1$b$b$b${b}xC2"]) 736 . qq[. \$nine})]; 737 # (??{ qr/str/ }) goes through one less interpolation 738 # stage than (??{ qq/str/ }) 739 $c =~ s{\\\\}{\\}g if ($use_qr eq 'qr'); 740 $c .= $r; 741 $cc = "$u->[1]$c"; 742 my $nine = 9; 743 744 eval {/^$cc/}; norun(fmt("code norun $u->[2]", $ss, $cc)); 745 { 746 use re 'eval'; 747 ok($ss =~ /^$cc/, fmt("code $u->[2]", $ss, $cc)); 748 } 749 } 750 } 751 } 752 753 my $code1u = "(??{qw(\x{100})})"; 754 eval {/^$code1u$/}; norun("reparse embedded unicode norun"); 755 { 756 use re 'eval'; 757 ok("\x{100}" =~ /^$code1u$/, "reparse embedded unicode"); 758 } 759 } 760 761 # a non-pattern literal won't get code blocks parsed at compile time; 762 # but they must get parsed later on if 'use re eval' is in scope 763 # also check that unbalanced {}'s are parsed ok 764 765 { 766 eval q["a{" =~ '^(??{"a{"})$']; 767 norun("non-pattern literal code norun"); 768 eval {/^${\'(??{"a{"})'}$/}; 769 norun("runtime code with unbalanced {} norun"); 770 771 use re 'eval'; 772 ok("a{" =~ '^a(??{"{"})$', "non-pattern literal code"); 773 ok("a{" =~ /^a${\'(??{"{"})'}$/, "runtime code with unbalanced {}"); 774 } 775 776 # make sure warnings come from the right place 777 778 { 779 use warnings; 780 my ($s, $t, $w); 781 local $SIG{__WARN__} = sub { $w .= "@_" }; 782 783 $w = ''; $s = 's'; 784 my $r = qr/(?{$t=$s+1})/; 785 "a" =~ /a$r/; 786 like($w, qr/pat_re_eval/, "warning main file"); 787 788 # do it in an eval to get predictable line numbers 789 eval q[ 790 791 $r = qr/(?{$t=$s+1})/; 792 ]; 793 $w = ''; $s = 's'; 794 "a" =~ /a$r/; 795 like($w, qr/ at \(eval \d+\) line 3/, "warning eval A"); 796 797 $w = ''; $s = 's'; 798 eval q[ 799 use re 'eval'; 800 my $c = '(?{$t=$s+1})'; 801 "a" =~ /a$c/; 802 1; 803 ]; 804 like($w, qr/ at \(eval \d+\) line 1/, "warning eval B"); 805 } 806 807 # jumbo test for: 808 # * recursion; 809 # * mixing all the different types of blocks (literal, qr/literal/, 810 # runtime); 811 # * backtracking (the Z+ alternation ensures CURLYX and full 812 # scope popping on backtracking) 813 814 { 815 sub recurse2 { 816 my ($depth)= @_; 817 return unless $depth; 818 my $s1 = '3-LMN'; 819 my $r1 = qr/(??{"$s1-$depth"})/; 820 821 my $s2 = '4-PQR'; 822 my $c1 = '(??{"$s2-$depth"})'; 823 use re 'eval'; 824 ok( "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>" 825 . "<12345-ABC-$depth-123-LMN-$depth-1234-PQR-$depth>" 826 =~ 827 /^<(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1> 828 <(\d|Z+)+(??{"45-ABC-$depth-"})(\d|Z+)+$r1-\d+$c1>$/x, 829 "recurse2($depth)"); 830 recurse2($depth-1); 831 } 832 recurse2(5); 833 } 834 835 # nested (??{}) called from various levels of a recursive function 836 837 { 838 sub recurse3 { 839 my ($n) = @_; 840 return if $n > 3; 841 ok("A$n" =~ m{^A(??{ "0123" =~ /((??{$n}))/; $1 })$}, 842 "recurse3($n)"); 843 ok("A$n" !~ m{^A(??{ "0123" =~ /((??{$n}))/; "X" })$}, 844 "recurse3($n) nomatch"); 845 recurse3($n+1); 846 } 847 recurse3(0); 848 } 849 850 # nested (??{}) being invoked recursively via a function 851 852 { 853 my $s = ''; 854 our $recurse4; 855 my @alpha = qw(A B C D E); 856 $recurse4 = sub { 857 my ($n) = @_; 858 $s .= "(n=$n:"; 859 if ($n < 4) { 860 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~ 861 m{^([A-Z]) 862 (??{ 863 $s .= "1=$1:"; 864 "$n-0123" =~ m{^(\d)-(((??{$recurse4->($n+1)})))}; 865 $s .= "i1=$1:<=[$2]"; 866 $3; # NB - not stringified 867 }) 868 $ 869 }x; 870 $s .= "1a=$1:"; 871 $s .= $m ? 'M' : '!M'; 872 } 873 my $ret = '.*?' . ($n-1); 874 $s .= "<=[$ret])"; 875 return $ret; 876 }; 877 $recurse4->(0); 878 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])' 879 . 'i1=3:<=[0123]1a=D:M<=[.*?2])i1=2:<=[012]1a=C:M<=[.*?1])' 880 . 'i1=1:<=[01]1a=B:M<=[.*?0])i1=0:<=[0]1a=A:M<=[.*?-1])'; 881 is($s, $exp, 'recurse4'); 882 } 883 884 # single (??{}) being invoked recursively via a function 885 886 { 887 my $s = ''; 888 our $recurse5; 889 my @alpha = qw(A B C D E); 890 $recurse5 = sub { 891 my ($n) = @_; 892 $s .= "(n=$n:"; 893 if ($n < 4) { 894 my $m = ("$alpha[$n]" . substr("0123", 0, $n+1)) =~ 895 m{^([A-Z]) 896 ((??{ 897 $s .= "1=$1:"; 898 $recurse5->($n+1); 899 })) 900 $ 901 }x; 902 $s .= "1a=$1:2=$2:"; 903 $s .= $m ? 'M' : '!M'; 904 } 905 my $ret = '.*?' . ($n-1); 906 $s .= "<=[$ret])"; 907 return $ret; 908 }; 909 $recurse5->(0); 910 my $exp = '(n=0:1=A:(n=1:1=B:(n=2:1=C:(n=3:1=D:(n=4:<=[.*?3])' 911 . '1a=D:2=0123:M<=[.*?2])1a=C:2=012:M<=[.*?1])' 912 . '1a=B:2=01:M<=[.*?0])1a=A:2=0:M<=[.*?-1])'; 913 is($s, $exp, 'recurse5'); 914 } 915 916 917 # make sure that errors during compiling run-time code get trapped 918 919 { 920 use re 'eval'; 921 922 my $code = '(?{$x=})'; 923 eval { "a" =~ /^a$code/ }; 924 like($@, qr/syntax error at \(eval \d+\) line \d+/, 'syntax error'); 925 926 $code = '(?{BEGIN{die})'; 927 eval { "a" =~ /^a$code/ }; 928 like($@, 929 qr/BEGIN failed--compilation aborted at \(eval \d+\) line \d+/, 930 'syntax error'); 931 932 use utf8; 933 $code = '(?{Foo::$bar})'; 934 eval { "a" =~ /^a$code/ }; 935 like($@, qr/Bad name after Foo:: at \(eval \d+\) line \d+/, 'UTF8 sytax error'); 936 } 937 938 # make sure that 'use re eval' is propagated into compiling the 939 # pattern returned by (??{}) 940 941 { 942 use re 'eval'; 943 my $pat = 'B(??{1})C'; 944 my $A = 'A'; 945 # compile-time outer code-block 946 ok("AB1CD" =~ /^A(??{$pat})D$/, "re eval propagated compile-time"); 947 # run-time outer code-block 948 ok("AB1CD" =~ /^$A(??{$pat})D$/, "re eval propagated run-time"); 949 } 950 951 # returning a ref to something that had set magic but wasn't 952 # PERL_MAGIC_qr triggered a false positive assertion failure 953 # The test is not so much concerned with it not matching, 954 # as with not failing the assertion 955 956 { 957 ok("a" !~ /^(a)(??{ \$1 })/, '(??{ ref })'); 958 } 959 960 # make sure the uninit warning from returning an undef var 961 # sees the right var 962 963 { 964 my ($u1, $u2); 965 my $warn = ''; 966 local $SIG{__WARN__} = sub { $warn .= $_[0] }; 967 $u1 =~ /(??{$u2})/ or die; 968 like($warn, qr/value \$u1 in pattern match.*\n.*value at/, 'uninit'); 969 } 970 971 # test that code blocks are called in scalar context 972 973 { 974 my @a = (0); 975 ok("" =~ /^(?{@a})$/, '(?{}) in scalar context'); 976 is($^R, 1, '(?{}) in scalar context: $^R'); 977 ok("1" =~ /^(??{@a})$/, '(??{}) in scalar context'); 978 ok("foo" =~ /^(?(?{@a})foo|bar)$/, '(?(?{})|) in scalar context'); 979 } 980 981 # BEGIN in compiled blocks shouldn't mess with $1 et al 982 983 { 984 use re 'eval'; 985 my $code1 = '(B)(??{ BEGIN { "X" =~ /X/ } $1})(C)'; 986 ok("ABBCA" =~ /^(.)(??{$code1})\1$/, '(?{}) BEGIN and $1'); 987 my $code2 = '(B)(??{ BEGIN { "X" =~ /X/ } $1 =~ /(.)/ ? $1 : ""})(C)'; 988 ok("ABBCA" =~ /^(.)(??{$code2})\1$/, '(?{}) BEGIN and $1 mark 2'); 989 } 990 991 # check that the optimiser is applied to code blocks: see if aelem has 992 # been converted to aelemfast 993 994 { 995 my $out; 996 for my $prog ( 997 '/(?{$a[0]})/', 998 'q() =~ qr/(?{$a[0]})/', 999 'use re q(eval); q() =~ q{(?{$a[0]})}', 1000 'use re q(eval); $c = q{(?{$a[0]})}; /$c/', 1001 'use re q(eval); $c = q{(?{$a[0]})}; /(?{1;})$c/', 1002 ) { 1003 $out = runperl(switches => ["-Dt"], prog => $prog, stderr => 1); 1004 like($out, qr/aelemfast|Recompile perl with -DDEBUGGING/, 1005 "optimise: '$prog'"); 1006 } 1007 } 1008 1009 # [perl #115080] 1010 # Ensure that ?pat? matches exactly once, even when the run-time 1011 # pattern changes, and even when the presence of run-time (?{}) affects 1012 # how and when patterns are recompiled 1013 1014 { 1015 my $m; 1016 1017 $m = ''; 1018 for (qw(a a a)) { 1019 $m .= $_ if m?$_?; 1020 } 1021 is($m, 'a', '?pat? with a,a,a'); 1022 1023 $m = ''; 1024 for (qw(a b c)) { 1025 $m .= $_ if m?$_?; 1026 } 1027 is($m, 'a', '?pat? with a,b,c'); 1028 1029 use re 'eval'; 1030 1031 $m = ''; 1032 for (qw(a a a)) { 1033 my $e = qq[(??{"$_"})]; 1034 $m .= $_ if m?$e?; 1035 } 1036 is($m, 'a', '?pat? with (??{a,a,a})'); 1037 1038 $m = ''; 1039 for (qw(a b c)) { 1040 my $e = qq[(??{"$_"})]; 1041 $m .= $_ if m?$e?; 1042 } 1043 is($m, 'a', '?pat? with (??{a,b,c})'); 1044 } 1045 1046 { 1047 # this code won't actually fail, but it used to fail valgrind, 1048 # so its here just to make sure valgrind doesn't fail again 1049 # While examining the ops of the secret anon sub wrapped around 1050 # the qr//, the pad of the sub was in scope, so cSVOPo_sv 1051 # got the const from the wrong pad. By having lots of $s's 1052 # (aka gvsv(*s), this forces the targs of the consts which have 1053 # been moved to the pad, to have high indices. 1054 1055 sub { 1056 local our $s = "abc"; 1057 my $qr = qr/^(?{1})$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s$s/; 1058 }->(); 1059 pass("cSVOPo_sv"); 1060 } 1061 1062 # [perl #115004] 1063 # code blocks in qr objects that are interpolated in arrays need 1064 # handling the same as if they were interpolated from scalar vars 1065 # (before this code would need 'use re "eval"') 1066 1067 { 1068 use Tie::Array; 1069 1070 local @global; 1071 my @array; 1072 my @refs = (0, \@array, 2); 1073 my @tied; 1074 tie @tied, 'Tie::StdArray'; 1075 { 1076 my $bb = 'B'; 1077 my $dd = 'D'; 1078 @array = ('A', qr/(??{$bb})/, 'C', qr/(??{$dd})/, 'E'); 1079 @tied = @array; 1080 @global = @array; 1081 } 1082 my $bb = 'X'; 1083 my $dd = 'Y'; 1084 ok("A B C D E=" =~ /@array/, 'bare interpolated array match'); 1085 ok("A B C D E=" =~ qr/@array/, 'qr bare interpolated array match'); 1086 ok("A B C D E=" =~ /@global/, 'bare interpolated global array match'); 1087 ok("A B C D E=" =~ qr/@global/, 1088 'qr bare interpolated global array match'); 1089 ok("A B C D E=" =~ /@{$refs[1]}/, 'bare interpolated ref array match'); 1090 ok("A B C D E=" =~ qr/@{$refs[1]}/, 1091 'qr bare interpolated ref array match'); 1092 ok("A B C D E=" =~ /@tied/, 'bare interpolated tied array match'); 1093 ok("A B C D E=" =~ qr/@tied/, 'qr bare interpolated tied array match'); 1094 ok("aA B C D E=" =~ /^a@array=$/, 'interpolated array match'); 1095 ok("aA B C D E=" =~ qr/^a@array=$/, 'qr interpolated array match'); 1096 ok("aA B C D E=" =~ /^a@global=$/, 'interpolated global array match'); 1097 ok("aA B C D E=" =~ qr/^a@global=$/, 1098 'qr interpolated global array match'); 1099 ok("aA B C D E=" =~ /^a@{$refs[1]}=$/, 'interpolated ref array match'); 1100 ok("aA B C D E=" =~ qr/^a@{$refs[1]}=$/, 1101 'qr interpolated ref array match'); 1102 ok("aA B C D E=" =~ /^a@tied=$/, 'interpolated tied array match'); 1103 ok("aA B C D E=" =~ qr/^a@tied=$/, 'qr interpolated tied array match'); 1104 1105 { 1106 local $" = '-'; 1107 ok("aA-B-C-D-E=" =~ /^a@{array}=$/, 1108 'interpolated array match with local sep'); 1109 ok("aA-B-C-D-E=" =~ qr/^a@{array}=$/, 1110 'qr interpolated array match with local sep'); 1111 ok("aA-B-C-D-E=" =~ /^a@{global}=$/, 1112 'interpolated global array match with local sep'); 1113 ok("aA-B-C-D-E=" =~ qr/^a@{global}=$/, 1114 'qr interpolated global array match with local sep'); 1115 ok("aA-B-C-D-E=" =~ /^a@{tied}=$/, 1116 'interpolated tied array match with local sep'); 1117 ok("aA-B-C-D-E=" =~ qr/^a@{tied}=$/, 1118 'qr interpolated tied array match with local sep'); 1119 } 1120 1121 # but don't handle the array ourselves in the presence of \Q etc 1122 1123 @array = ('A', '(?{})'); 1124 @global = @array; 1125 @tied = @array; 1126 ok("aA (?{})=" =~ /^a\Q@{array}\E=$/, 1127 'interpolated array match with \Q'); 1128 ok("aA (?{})=" =~ qr/^a\Q@{array}\E=$/, 1129 'qr interpolated array match with \Q'); 1130 ok("aA (?{})=" =~ /^a\Q@{global}\E=$/, 1131 'interpolated global array match with \Q'); 1132 ok("aA (?{})=" =~ qr/^a\Q@{global}\E=$/, 1133 'qr interpolated global array match with \Q'); 1134 ok("aA (?{})=" =~ /^a\Q@{$refs[1]}\E=$/, 1135 'interpolated ref array match with \Q'); 1136 ok("aA (?{})=" =~ qr/^a\Q@{$refs[1]}\E=$/, 1137 'qr interpolated ref array match with \Q'); 1138 ok("aA (?{})=" =~ /^a\Q@{tied}\E=$/, 1139 'interpolated tied array match with \Q'); 1140 ok("aA (?{})=" =~ qr/^a\Q@{tied}\E=$/, 1141 'qr interpolated tied array match with \Q'); 1142 1143 # and check it works with an empty array 1144 1145 @array = (); 1146 @global = (); 1147 @tied = (); 1148 ok("a=" =~ /^a@array=$/, 'empty array match'); 1149 ok("a=" =~ qr/^a@array=$/, 'qr empty array match'); 1150 ok("a=" =~ /^a@global=$/, 'empty global array match'); 1151 ok("a=" =~ qr/^a@global=$/, 'qr empty global array match'); 1152 ok("a=" =~ /^a@tied=$/, 'empty tied array match'); 1153 ok("a=" =~ qr/^a@tied=$/, 'qr empty tied array match'); 1154 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q'); 1155 ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q'); 1156 ok("a=" =~ qr/^a\Q@{global}\E=$/, 1157 'qr empty global array match with \Q'); 1158 ok("a=" =~ /^a\Q@{tied}\E=$/, 'empty tied array match with \Q'); 1159 ok("a=" =~ qr/^a\Q@{tied}\E=$/, 'qr empty tied array match with \Q'); 1160 1161 # NB: these below are empty patterns, so they happen to use the 1162 # successful match from the line above 1163 1164 ok("a=" =~ /@array/, 'empty array pattern'); 1165 ok("a=" =~ qr/@array/, 'qr empty array pattern'); 1166 ok("a=" =~ /@global/, 'empty global array pattern'); 1167 ok("a=" =~ qr/@global/, 'qr empty global array pattern'); 1168 ok("a=" =~ /@tied/, 'empty tied pattern'); 1169 ok("a=" =~ qr/@tied/, 'qr empty tied pattern'); 1170 ok("a=" =~ /\Q@array\E/, 'empty array pattern with \Q'); 1171 ok("a=" =~ qr/\Q@array\E/, 'qr empty array pattern with \Q'); 1172 ok("a=" =~ /\Q@global\E/, 'empty global array pattern with \Q'); 1173 ok("a=" =~ qr/\Q@global\E/, 'qr empty global array pattern with \Q'); 1174 ok("a=" =~ /\Q@tied\E/, 'empty tied pattern with \Q'); 1175 ok("a=" =~ qr/\Q@tied\E/, 'qr empty tied pattern with \Q'); 1176 ok("a=" =~ //, 'completely empty pattern'); 1177 ok("a=" =~ qr//, 'qr completely empty pattern'); 1178 } 1179 1180 { 1181 { package o; use overload '""'=>sub { "abc" } } 1182 my $x = bless [],"o"; 1183 my $y = \$x; 1184 (my $y_addr = "$y") =~ y/()//d; # REF(0x7fcb9c02) -> REF0x7fcb9c02 1185 # $y_addr =~ $y should be true, as should $y_addr =~ /(??{$y})/ 1186 "abc$y_addr" =~ /(??{$x})(??{$y})/; 1187 is "$&", "abc$y_addr", 1188 '(??{$x}) does not leak cached qr to (??{\$x}) (match)'; 1189 is scalar "abcabc" =~ /(??{$x})(??{$y})/, "", 1190 '(??{$x}) does not leak cached qr to (??{\$x}) (no match)'; 1191 } 1192 1193 { 1194 sub ReEvalTieTest::TIESCALAR {bless[], "ReEvalTieTest"} 1195 sub ReEvalTieTest::STORE{} 1196 sub ReEvalTieTest::FETCH { "$1" } 1197 tie my $t, "ReEvalTieTest"; 1198 $t = bless [], "o"; 1199 "aab" =~ /(a)((??{"b" =~ m|(.)|; $t}))/; 1200 is "[$1 $2]", "[a b]", 1201 '(??{$tied_former_overload}) sees the right $1 in FETCH'; 1202 } 1203 1204 { 1205 my @matchsticks; 1206 my $ref = bless \my $o, "o"; 1207 my $foo = sub { push @matchsticks, scalar "abc" =~ /(??{$ref})/ }; 1208 &$foo; 1209 bless \$o; 1210 () = "$ref"; # flush AMAGIC flag on main 1211 &$foo; 1212 is "@matchsticks", "1 ", 'qr magic is not cached on refs'; 1213 } 1214 1215 { 1216 my ($foo, $bar) = ("foo"x1000, "bar"x1000); 1217 "$foo$bar" =~ /(??{".*"})/; 1218 is "$&", "foo"x1000 . "bar"x1000, 1219 'padtmp swiping does not affect "$a$b" =~ /(??{})/' 1220 } 1221 1222 { 1223 # [perl #129140] 1224 # this used to cause a double-free of the code_block struct 1225 # when re-running the compilation after spotting utf8. 1226 # This test doesn't catch it, but might panic, or fail under 1227 # valgrind etc 1228 1229 my $s = ''; 1230 /$s(?{})\x{100}/ for '', ''; 1231 pass "RT #129140"; 1232 } 1233 1234 # RT #130650 code blocks could get double-freed during a pattern 1235 # compilation croak 1236 1237 { 1238 # this used to panic or give ASAN errors 1239 eval 'qr/(?{})\6/'; 1240 like $@, qr/Reference to nonexistent group/, "RT #130650"; 1241 } 1242 1243 # RT #129881 1244 # on exit from a pattern with multiple code blocks from different 1245 # CVs, PL_comppad wasn't being restored correctly 1246 1247 sub { 1248 # give first few pad slots known values 1249 my ($x1, $x2, $x3, $x4, $x5) = 101..105; 1250 # these vars are in a separate pad 1251 my $r = qr/((?{my ($y1, $y2) = 201..202; 1;})A){2}X/; 1252 # the first alt fails, causing a switch to this anon 1253 # sub's pad 1254 "AAA" =~ /$r|(?{my ($z1, $z2) = 301..302; 1;})A/; 1255 is $x1, 101, "RT #129881: x1"; 1256 is $x2, 102, "RT #129881: x2"; 1257 is $x3, 103, "RT #129881: x3"; 1258 }->(); 1259 1260 1261 # RT #126697 1262 # savestack wasn't always being unwound on EVAL failure 1263 { 1264 local our $i = 0; 1265 my $max = 0; 1266 1267 'ABC' =~ m{ 1268 \A 1269 (?: 1270 (?: AB | A | BC ) 1271 (?{ 1272 local $i = $i + 1; 1273 $max = $i if $max < $i; 1274 }) 1275 )* 1276 \z 1277 }x; 1278 is $max, 2, "RT #126697"; 1279 } 1280 1281 # RT #132772 1282 # 1283 # Ensure that optimisation of OP_CONST into OP_MULTICONCAT doesn't 1284 # leave any freed ops in the execution path. This is associated 1285 # with rpeep() being called before optimize_optree(), which causes 1286 # gv/rv2sv to be prematurely optimised into gvsv, confusing 1287 # S_maybe_multiconcat when it tries to reorganise a concat subtree 1288 # into a multiconcat list 1289 1290 { 1291 my $a = "a"; 1292 local $b = "b"; # not lexical, so optimised to OP_GVSV 1293 local $_ = "abc"; 1294 ok /^a(??{ $b."c" })$/, "RT #132772 - compile time"; 1295 ok /^$a(??{ $b."c" })$/, "RT #132772 - run time"; 1296 my $qr = qr/^a(??{ $b."c" })$/; 1297 ok /$qr/, "RT #132772 - compile time qr//"; 1298 $qr = qr/(??{ $b."c" })$/; 1299 ok /^a$qr$/, "RT #132772 - compile time qr// compound"; 1300 $qr = qr/$a(??{ $b."c" })$/; 1301 ok /^$qr$/, "RT #132772 - run time qr//"; 1302 } 1303 1304 # RT #133687 1305 # mixing compile-time (?(?{code})) with run-time code blocks 1306 # was failing, because the second pass through the parser 1307 # (which compiles the runtime code blocks) was failing to adequately 1308 # mask the compile-time code blocks to shield them from a second 1309 # compile: /X(?{...})Y/ was being correctly masked as /X________Y/ 1310 # but /X(?(?{...}))Y/ was being incorrectly masked as 1311 # /X(?________)Y/ 1312 1313 { 1314 use re 'eval'; 1315 my $runtime_re = '(??{ "A"; })'; 1316 ok "ABC" =~ /^ $runtime_re (?(?{ 1; })BC) $/x, 'RT #133687 yes'; 1317 ok "ABC" =~ /^ $runtime_re (?(?{ 0; })xy|BC) $/x, 'RT #133687 yes|no'; 1318 } 1319 1320 # RT #134208 1321 # when the string being matched was an SvTEMP and the re_eval died, 1322 # the SV's magic was being restored after the SV was freed. 1323 # Give ASan something to play with. 1324 1325 { 1326 my $a; 1327 no warnings 'uninitialized'; 1328 eval { "$a $1" =~ /(?{ die })/ }; 1329 pass("SvTEMP 1"); 1330 eval { sub { " " }->() =~ /(?{ die })/ }; 1331 pass("SvTEMP 2"); 1332 } 1333 1334} # End of sub run_tests 1335 13361; 1337