1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9no warnings 'experimental::vlb'; 10use 5.010; 11 12sub run_tests; 13 14$| = 1; 15 16 17BEGIN { 18 chdir 't' if -d 't'; 19 require './test.pl'; 20 set_up_inc('../lib', '.', '../ext/re'); 21 require Config; Config->import; 22 require './charset_tools.pl'; 23 require './loc_tools.pl'; 24} 25 26skip_all_without_unicode_tables(); 27 28my $has_locales = locales_enabled('LC_CTYPE'); 29my $utf8_locale = find_utf8_ctype_locale(); 30 31plan tests => 1267; # Update this when adding/deleting tests. 32 33run_tests() unless caller; 34 35# 36# Tests start here. 37# 38sub run_tests { 39 { 40 # see https://github.com/Perl/perl5/issues/12948 41 my $string="ABCDEFGHIJKL"; 42 my $pat= "(.)" x length($string); 43 my $ok= $string=~/^$pat\z/; 44 foreach my $n (1 .. length($string)) { 45 $ok= eval sprintf 'is $%d, "%s", q($%d = %s); 1', ($n, substr($string,$n-1,1))x2; 46 ok($ok, "eval for \$$n test"); 47 $ok= eval sprintf 'is ${%d}, "%s", q(${%d} = %s); 1', ($n, substr($string,$n-1,1))x2; 48 ok($ok, "eval for \${$n} test"); 49 50 $ok= eval sprintf 'is $0%d, "%s", q($0%d = %s); 1', ($n, substr($string,$n-1,1))x2; 51 ok(!$ok, "eval failed as expected for \$0$n test"); 52 $ok= eval sprintf 'is ${0%d}, "%s", q(${0%d} = %s); 1', ($n, substr($string,$n-1,1))x2; 53 ok(!$ok, "eval failed as expected for \${0$n} test"); 54 55 no strict 'refs'; 56 $ok= eval sprintf 'is ${0b%b}, "%s", q(${0b%b} = %s); 1', ($n, substr($string,$n-1,1))x2; 57 ok($ok, sprintf "eval for \${0b%b} test", $n); 58 $ok= eval sprintf 'is ${0x%x}, "%s", q(${0x%x} = %s); 1', ($n, substr($string,$n-1,1))x2; 59 ok($ok, sprintf "eval for \${0x%x} test", $n); 60 $ok= eval sprintf 'is ${0b%08b}, "%s", q(${0b%08b} = %s); 1', ($n, substr($string,$n-1,1))x2; 61 ok($ok, sprintf "eval for \${0b%b} test", $n); 62 $ok= eval sprintf 'is ${0x%04x}, "%s", q(${0x%04x} = %s); 1', ($n, substr($string,$n-1,1))x2; 63 ok($ok, sprintf "eval for \${0x%04x} test", $n); 64 } 65 } 66 67 my $sharp_s = uni_to_native("\xdf"); 68 69 { 70 my $x = "abc\ndef\n"; 71 (my $x_pretty = $x) =~ s/\n/\\n/g; 72 73 ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/]; 74 ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/]; 75 76 # used to be a test for $* 77 ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m]; 78 79 ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]); 80 ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]); 81 82 ok $x =~ /def/, qq ["$x_pretty" =~ /def/]; 83 ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]); 84 85 ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/]; 86 ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]); 87 88 ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/]; 89 ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]); 90 } 91 92 { 93 $_ = '123'; 94 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; 95 } 96 97 { 98 $_ = 'aaabbbccc'; 99 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', 100 qq [\$_ = '$_'; /(a*b*)(c*)/]; 101 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; 102 unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]); 103 104 $_ = 'aaabccc'; 105 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; 106 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 107 108 $_ = 'aaaccc'; 109 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 110 unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]); 111 112 $_ = 'abcdef'; 113 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; 114 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; 115 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; 116 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; 117 } 118 119 { 120 # used to be a test for $* 121 ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m]; 122 } 123 124 { 125 our %XXX = map {($_ => $_)} 123, 234, 345; 126 127 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); 128 while ($_ = shift(@XXX)) { 129 my $e = index ($_, 'not') >= 0 ? '' : 1; 130 my $r = m?(.*)?; 131 is($r, $e, "?(.*)?"); 132 /not/ && reset; 133 if (/not ok 2/) { 134 if ($^O eq 'VMS') { 135 $_ = shift(@XXX); 136 } 137 else { 138 reset 'X'; 139 } 140 } 141 } 142 143 SKIP: { 144 if ($^O eq 'VMS') { 145 skip "Reset 'X'", 1; 146 } 147 ok !keys %XXX, "%XXX is empty"; 148 } 149 150 } 151 152 { 153 my $message = "Test empty pattern"; 154 my $xyz = 'xyz'; 155 my $cde = 'cde'; 156 157 $cde =~ /[^ab]*/; 158 $xyz =~ //; 159 is($&, $xyz, $message); 160 161 my $foo = '[^ab]*'; 162 $cde =~ /$foo/; 163 $xyz =~ //; 164 is($&, $xyz, $message); 165 166 $cde =~ /$foo/; 167 my $null; 168 no warnings 'uninitialized'; 169 $xyz =~ /$null/; 170 is($&, $xyz, $message); 171 172 $null = ""; 173 $xyz =~ /$null/; 174 is($&, $xyz, $message); 175 176 # each entry: regexp, match string, $&, //o match success 177 my @tests = 178 ( 179 [ "", "xy", "x", 1 ], 180 [ "y", "yz", "y", !1 ], 181 ); 182 for my $test (@tests) { 183 my ($re, $str, $matched, $omatch) = @$test; 184 $xyz =~ /x/o; 185 ok($str =~ /$re/, "$str matches /$re/"); 186 is($&, $matched, "on $matched"); 187 $xyz =~ /x/o; 188 is($str =~ /$re/o, $omatch, "$str matches /$re/o (or not)"); 189 } 190 } 191 192 { 193 my $message = q !Check $`, $&, $'!; 194 $_ = 'abcdefghi'; 195 /def/; # optimized up to cmd 196 is("$`:$&:$'", 'abc:def:ghi', $message); 197 198 no warnings 'void'; 199 /cde/ + 0; # optimized only to spat 200 is("$`:$&:$'", 'ab:cde:fghi', $message); 201 202 /[d][e][f]/; # not optimized 203 is("$`:$&:$'", 'abc:def:ghi', $message); 204 } 205 206 { 207 $_ = 'now is the {time for all} good men to come to.'; 208 / \{([^}]*)}/; 209 is($1, 'time for all', "Match braces"); 210 } 211 212 { 213 my $message = "{N,M} quantifier"; 214 $_ = 'xxx {3,4} yyy zzz'; 215 ok(/( {3,4})/, $message); 216 is($1, ' ', $message); 217 unlike($_, qr/( {4,})/, $message); 218 ok(/( {2,3}.)/, $message); 219 is($1, ' y', $message); 220 ok(/(y{2,3}.)/, $message); 221 is($1, 'yyy ', $message); 222 unlike($_, qr/x {3,4}/, $message); 223 unlike($_, qr/^xxx {3,4}/, $message); 224 } 225 226 { 227 my $message = "Test /g"; 228 local $" = ":"; 229 $_ = "now is the time for all good men to come to."; 230 my @words = /(\w+)/g; 231 my $exp = "now:is:the:time:for:all:good:men:to:come:to"; 232 233 is("@words", $exp, $message); 234 235 @words = (); 236 while (/\w+/g) { 237 push (@words, $&); 238 } 239 is("@words", $exp, $message); 240 241 @words = (); 242 pos = 0; 243 while (/to/g) { 244 push(@words, $&); 245 } 246 is("@words", "to:to", $message); 247 248 pos $_ = 0; 249 @words = /to/g; 250 is("@words", "to:to", $message); 251 } 252 253 { 254 $_ = "abcdefghi"; 255 256 my $pat1 = 'def'; 257 my $pat2 = '^def'; 258 my $pat3 = '.def.'; 259 my $pat4 = 'abc'; 260 my $pat5 = '^abc'; 261 my $pat6 = 'abc$'; 262 my $pat7 = 'ghi'; 263 my $pat8 = '\w*ghi'; 264 my $pat9 = 'ghi$'; 265 266 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = 267 my $t6 = my $t7 = my $t8 = my $t9 = 0; 268 269 for my $iter (1 .. 5) { 270 $t1++ if /$pat1/o; 271 $t2++ if /$pat2/o; 272 $t3++ if /$pat3/o; 273 $t4++ if /$pat4/o; 274 $t5++ if /$pat5/o; 275 $t6++ if /$pat6/o; 276 $t7++ if /$pat7/o; 277 $t8++ if /$pat8/o; 278 $t9++ if /$pat9/o; 279 } 280 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; 281 is($x, '505550555', "Test /o"); 282 } 283 284 { 285 my $xyz = 'xyz'; 286 ok "abc" =~ /^abc$|$xyz/, "| after \$"; 287 288 # perl 4.009 says "unmatched ()" 289 my $message = '$ inside ()'; 290 291 my $result; 292 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; 293 is($@, "", $message); 294 is($result, "abc:bc", $message); 295 } 296 297 { 298 my $message = "Scalar /g"; 299 $_ = "abcfooabcbar"; 300 301 ok( /abc/g && $` eq "", $message); 302 ok( /abc/g && $` eq "abcfoo", $message); 303 ok(!/abc/g, $message); 304 305 $message = "Scalar /gi"; 306 pos = 0; 307 ok( /ABC/gi && $` eq "", $message); 308 ok( /ABC/gi && $` eq "abcfoo", $message); 309 ok(!/ABC/gi, $message); 310 311 $message = "Scalar /g"; 312 pos = 0; 313 ok( /abc/g && $' eq "fooabcbar", $message); 314 ok( /abc/g && $' eq "bar", $message); 315 316 $_ .= ''; 317 my @x = /abc/g; 318 is(@x, 2, "/g reset after assignment"); 319 } 320 321 { 322 my $message = '/g, \G and pos'; 323 $_ = "abdc"; 324 pos $_ = 2; 325 /\Gc/gc; 326 is(pos $_, 2, $message); 327 /\Gc/g; 328 is(pos $_, undef, $message); 329 } 330 331 { 332 my $message = '(?{ })'; 333 our $out = 1; 334 'abc' =~ m'a(?{ $out = 2 })b'; 335 is($out, 2, $message); 336 337 $out = 1; 338 'abc' =~ m'a(?{ $out = 3 })c'; 339 is($out, 1, $message); 340 } 341 342 { 343 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; 344 my @out = /(?<!foo)bar./g; 345 is("@out", 'bar2 barf', "Negative lookbehind"); 346 } 347 348 { 349 my $message = "REG_INFTY tests"; 350 # Tests which depend on REG_INFTY 351 352 # Defaults assumed if this fails 353 eval { require Config; }; 354 $::reg_infty = $Config::Config{reg_infty} // ((1<<31)-1); 355 $::reg_infty_m = $::reg_infty - 1; 356 $::reg_infty_p = $::reg_infty + 1; 357 $::reg_infty_m = $::reg_infty_m; # Suppress warning. 358 359 # As well as failing if the pattern matches do unexpected things, the 360 # next three tests will fail if you should have picked up a lower-than- 361 # default value for $reg_infty from Config.pm, but have not. 362 SKIP: { 363 skip "REG_INFTY too big to test ($::reg_infty)", 7 364 if $::reg_infty > (1<<16); 365 366 is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message); 367 is($@, '', $message); 368 is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message); 369 is($@, '', $message); 370 isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message); 371 is($@, '', $message); 372 373 # It should be 'a' x 2147483647, but that exhausts memory on 374 # reasonably sized modern machines 375 like('a' x $::reg_infty_m, qr/a{1,}/, 376 "{1,} matches more times than REG_INFTY"); 377 } 378 379 eval "'aaa' =~ /a{1,$::reg_infty}/"; 380 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 381 eval "'aaa' =~ /a{1,$::reg_infty_p}/"; 382 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 383 384 } 385 386 { 387 # Poke a couple more parse failures 388 my $context = 'x' x 256; 389 eval qq("${context}y" =~ /(?<=$context)y/); 390 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; 391 } 392 393 SKIP: 394 { # Long Monsters 395 396 my @trials = (125, 140, 250, 270, 300000, 30); 397 398 skip('limited memory', @trials * 4) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'}; 399 400 for my $l (@trials) { # Ordered to free memory 401 my $a = 'a' x $l; 402 # we do not use like() or unlike() here as the string 403 # is very long and is not useful if the match fails, 404 # the useful part 405 ok("ba$a=" =~ m/a$a=/, sprintf 406 'Long monster: ("ba".("a" x %d)."=") =~ m/aa...a=/', $l); 407 ok("b$a=" !~ m/a$a=/, sprintf 408 'Long monster: ("b" .("a" x %d)."=") !~ m/aa...a=/', $l); 409 ok("b$a=" =~ m/ba+=/, sprintf 410 'Long monster: ("b" .("a" x %d)."=") =~ m/ba+=/', $l); 411 ok("ba$a=" =~ m/b(?:a|b)+=/, sprintf 412 'Long monster: ("ba".("a" x %d)."=") =~ m/b(?:a|b)+=/', $l); 413 } 414 } 415 416 SKIP: 417 { # 20000 nodes, each taking 3 words per string, and 1 per branch 418 419 my %ans = ( 'ax13876y25677lbc' => 1, 420 'ax13876y25677mcb' => 0, # not b. 421 'ax13876y35677nbc' => 0, # Num too big 422 'ax13876y25677y21378obc' => 1, 423 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] 424 'ax13876y25677y21378y21378kbc' => 1, 425 'ax13876y25677y21378y21378kcb' => 0, # Not b. 426 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs 427 ); 428 429 skip('limited memory', 2 * scalar keys %ans) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'}; 430 431 my $long_constant_len = join '|', 12120 .. 32645; 432 my $long_var_len = join '|', 8120 .. 28645; 433 434 for (keys %ans) { 435 my $message = "20000 nodes, const-len '$_'"; 436 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message; 437 438 $message = "20000 nodes, var-len '$_'"; 439 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message; 440 } 441 } 442 443 { 444 my $message = "Complicated backtracking"; 445 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; 446 my $expect = "(bla()) ((l)u((e))) (l(e)e)"; 447 448 our $c; 449 sub matchit { 450 m/ 451 ( 452 \( 453 (?{ $c = 1 }) # Initialize 454 (?: 455 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop 456 (?! 457 ) # Fail: will unwind one iteration back 458 ) 459 (?: 460 [^()]+ # Match a big chunk 461 (?= 462 [()] 463 ) # Do not try to match subchunks 464 | 465 \( 466 (?{ ++$c }) 467 | 468 \) 469 (?{ --$c }) 470 ) 471 )+ # This may not match with different subblocks 472 ) 473 (?(?{ $c != 0 }) 474 (?! 475 ) # Fail 476 ) # Otherwise the chunk 1 may succeed with $c>0 477 /xg; 478 } 479 480 my @ans = (); 481 my $res; 482 push @ans, $res while $res = matchit; 483 is("@ans", "1 1 1", $message); 484 485 @ans = matchit; 486 is("@ans", $expect, $message); 487 488 $message = "Recursion with (??{ })"; 489 our $matched; 490 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; 491 492 @ans = my @ans1 = (); 493 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; 494 495 is("@ans", "1 1 1", $message); 496 is("@ans1", $expect, $message); 497 498 @ans = m/$matched/g; 499 is("@ans", $expect, $message); 500 501 } 502 503 { 504 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; 505 } 506 507 { 508 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad 509 is("@ans", 'a/ b', "Stack may be bad"); 510 } 511 512 { 513 my $message = "Eval-group not allowed at runtime"; 514 my $code = '{$blah = 45}'; 515 our $blah = 12; 516 eval { /(?$code)/ }; 517 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 518 519 $blah = 12; 520 my $res = eval { "xx" =~ /(?$code)/o }; 521 { 522 no warnings 'uninitialized'; 523 chomp $@; my $message = "$message '$@', '$res', '$blah'"; 524 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 525 } 526 527 $code = '=xx'; 528 $blah = 12; 529 $res = eval { "xx" =~ /(?$code)/o }; 530 { 531 no warnings 'uninitialized'; 532 my $message = "$message '$@', '$res', '$blah'"; 533 ok(!$@ && $res, $message); 534 } 535 536 $code = '{$blah = 45}'; 537 $blah = 12; 538 eval "/(?$code)/"; 539 is($blah, 45, $message); 540 541 $blah = 12; 542 /(?{$blah = 45})/; 543 is($blah, 45, $message); 544 } 545 546 { 547 my $message = "Pos checks"; 548 my $x = 'banana'; 549 $x =~ /.a/g; 550 is(pos $x, 2, $message); 551 552 $x =~ /.z/gc; 553 is(pos $x, 2, $message); 554 555 sub f { 556 my $p = $_[0]; 557 return $p; 558 } 559 560 $x =~ /.a/g; 561 is(f (pos $x), 4, $message); 562 } 563 564 { 565 my $message = 'Checking $^R'; 566 our $x = $^R = 67; 567 'foot' =~ /foo(?{$x = 12; 75})[t]/; 568 is($^R, 75, $message); 569 570 $x = $^R = 67; 571 'foot' =~ /foo(?{$x = 12; 75})[xy]/; 572 ok($^R eq '67' && $x eq '12', $message); 573 574 $x = $^R = 67; 575 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; 576 ok($^R eq '79' && $x eq '12', $message); 577 } 578 579 { 580 is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i'); 581 is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'); 582 is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'); 583 is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'); 584 is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'); 585 is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'); 586 } 587 588 { # Test that charset modifier work, and are interpolated 589 is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'); 590 is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'); 591 is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'); 592 is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'); 593 is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); 594 595 my $dual = qr/\b\v$/; 596 my $locale; 597 598 SKIP: { 599 skip 'Locales not available', 1 unless $has_locales; 600 601 use locale; 602 $locale = qr/\b\v$/; 603 is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); 604 no locale; 605 } 606 607 use feature 'unicode_strings'; 608 my $unicode = qr/\b\v$/; 609 is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); 610 is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 611 612 SKIP: { 613 skip 'Locales not available', 1 unless $has_locales; 614 615 is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); 616 } 617 618 no feature 'unicode_strings'; 619 SKIP: { 620 skip 'Locales not available', 1 unless $has_locales; 621 is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); 622 } 623 624 is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); 625 626 SKIP: { 627 skip 'Locales not available', 2 unless $has_locales; 628 629 use locale; 630 is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 631 is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); 632 } 633 } 634 635 { 636 my $message = "Look around"; 637 $_ = 'xabcx'; 638 foreach my $ans ('', 'c') { 639 ok(/(?<=(?=a)..)((?=c)|.)/g, $message); 640 is($1, $ans, $message); 641 } 642 } 643 644 { 645 my $message = "Empty clause"; 646 $_ = 'a'; 647 foreach my $ans ('', 'a', '') { 648 ok(/^|a|$/g, $message); 649 is($&, $ans, $message); 650 } 651 } 652 653 { 654 sub prefixify { 655 my $message = "Prefixify"; 656 { 657 my ($v, $a, $b, $res) = @_; 658 ok($v =~ s/\Q$a\E/$b/, $message); 659 is($v, $res, $message); 660 } 661 } 662 663 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); 664 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); 665 } 666 667 { 668 $_ = 'var="foo"'; 669 /(\")/; 670 ok $1 && /$1/, "Capture a quote"; 671 } 672 673 { 674 no warnings 'closure'; 675 my $message = '(?{ $var } refers to package vars'; 676 package aa; 677 our $c = 2; 678 $::c = 3; 679 '' =~ /(?{ $c = 4 })/; 680 main::is($c, 4, $message); 681 main::is($::c, 3, $message); 682 } 683 684 { 685 is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef); 686 like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/, 687 'POSIX class [: :] must have valid name'); 688 689 for my $d (qw [= .]) { 690 is(eval "/[[${d}foo${d}]]/", undef); 691 like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/, 692 "POSIX syntax [[$d $d]] is an error"); 693 } 694 } 695 696 { 697 # test if failure of patterns returns empty list 698 my $message = "Failed pattern returns empty list"; 699 $_ = 'aaa'; 700 @_ = /bbb/; 701 is("@_", "", $message); 702 703 @_ = /bbb/g; 704 is("@_", "", $message); 705 706 @_ = /(bbb)/; 707 is("@_", "", $message); 708 709 @_ = /(bbb)/g; 710 is("@_", "", $message); 711 } 712 { 713 my $message = 'ACCEPT and CLOSE - '; 714 $_ = "aced"; 715 #12 3 4 5 716 /((a?(*ACCEPT)())())()/ 717 or die "Failed to match"; 718 is($1,"a",$message . "buffer 1 is defined with expected value"); 719 is($2,"a",$message . "buffer 2 is defined with expected value"); 720 ok(!defined($3),$message . "buffer 3 is not defined"); 721 ok(!defined($4),$message . "buffer 4 is not defined"); 722 ok(!defined($5),$message . "buffer 5 is not defined"); 723 ok(!defined($6),$message . "buffer 6 is not defined"); 724 $message= 'NO ACCEPT and CLOSE - '; 725 /((a?())())()/ 726 or die "Failed to match"; 727 is($1,"a",$message . "buffer 1 is defined with expected value"); 728 is($2,"a",$message . "buffer 2 is defined with expected value"); 729 is($3,"", $message . "buffer 3 is defined with expected value"); 730 is($4,"", $message . "buffer 4 is defined with expected value"); 731 is($5,"",$message . "buffer 5 is defined with expected value"); 732 ok(!defined($6),$message . "buffer 6 is not defined"); 733 #12 3 4 5 734 $message = 'ACCEPT and CLOSE - '; 735 /((a?(*ACCEPT)(c))(e))(d)/ 736 or die "Failed to match"; 737 is($1,"a",$message . "buffer 1 is defined with expected value"); 738 is($2,"a",$message . "buffer 2 is defined with expected value"); 739 ok(!defined($3),$message . "buffer 3 is not defined"); 740 ok(!defined($4),$message . "buffer 4 is not defined"); 741 ok(!defined($5),$message . "buffer 5 is not defined"); 742 ok(!defined($6),$message . "buffer 6 is not defined"); 743 $message= 'NO ACCEPT and CLOSE - '; 744 /((a?(c))(e))(d)/ 745 or die "Failed to match"; 746 is($1,"ace", $message . "buffer 1 is defined with expected value"); 747 is($2,"ac", $message . "buffer 2 is defined with expected value"); 748 is($3,"c", $message . "buffer 3 is defined with expected value"); 749 is($4,"e", $message . "buffer 4 is defined with expected value"); 750 is($5,"d", $message . "buffer 5 is defined with expected value"); 751 ok(!defined($6),$message . "buffer 6 is not defined"); 752 } 753 { 754 my $message = '@- and @+ and @{^CAPTURE} tests'; 755 756 $_= "ace"; 757 /c(?=.$)/; 758 is($#{^CAPTURE}, -1, $message); 759 is($#+, 0, $message); 760 is($#-, 0, $message); 761 is($+ [0], 2, $message); 762 is($- [0], 1, $message); 763 ok(!defined $+ [1] && !defined $- [1] && 764 !defined $+ [2] && !defined $- [2], $message); 765 766 /a(c)(e)/; 767 is($#{^CAPTURE}, 1, $message); # one less than $#- 768 is($#+, 2, $message); 769 is($#-, 2, $message); 770 is($+ [0], 3, $message); 771 is($- [0], 0, $message); 772 is(${^CAPTURE}[0], "c", $message); 773 is($+ [1], 2, $message); 774 is($- [1], 1, $message); 775 is(${^CAPTURE}[1], "e", $message); 776 is($+ [2], 3, $message); 777 is($- [2], 2, $message); 778 ok(!defined $+ [3] && !defined $- [3] && 779 !defined ${^CAPTURE}[2] && !defined ${^CAPTURE}[3] && 780 !defined $+ [4] && !defined $- [4], $message); 781 782 # Exists has a special check for @-/@+ - bug 45147 783 ok(exists $-[0], $message); 784 ok(exists $+[0], $message); 785 ok(exists ${^CAPTURE}[0], $message); 786 ok(exists ${^CAPTURE}[1], $message); 787 ok(exists $-[2], $message); 788 ok(exists $+[2], $message); 789 ok(!exists ${^CAPTURE}[2], $message); 790 ok(!exists $-[3], $message); 791 ok(!exists $+[3], $message); 792 ok(exists ${^CAPTURE}[-1], $message); 793 ok(exists ${^CAPTURE}[-2], $message); 794 ok(exists $-[-1], $message); 795 ok(exists $+[-1], $message); 796 ok(exists $-[-3], $message); 797 ok(exists $+[-3], $message); 798 ok(!exists $-[-4], $message); 799 ok(!exists $+[-4], $message); 800 ok(!exists ${^CAPTURE}[-3], $message); 801 802 803 /.(c)(b)?(e)/; 804 is($#{^CAPTURE}, 2, $message); # one less than $#- 805 is($#+, 3, $message); 806 is($#-, 3, $message); 807 is(${^CAPTURE}[0], "c", $message); 808 is(${^CAPTURE}[2], "e", $message . "[$1 $3]"); 809 is($+ [1], 2, $message); 810 is($- [1], 1, $message); 811 is($+ [3], 3, $message); 812 is($- [3], 2, $message); 813 ok(!defined $+ [2] && !defined $- [2] && 814 !defined $+ [4] && !defined $- [4] && 815 !defined ${^CAPTURE}[1], $message); 816 817 /.(c)/; 818 is($#{^CAPTURE}, 0, $message); # one less than $#- 819 is($#+, 1, $message); 820 is($#-, 1, $message); 821 is(${^CAPTURE}[0], "c", $message); 822 is($+ [0], 2, $message); 823 is($- [0], 0, $message); 824 is($+ [1], 2, $message); 825 is($- [1], 1, $message); 826 ok(!defined $+ [2] && !defined $- [2] && 827 !defined $+ [3] && !defined $- [3] && 828 !defined ${^CAPTURE}[1], $message); 829 830 /.(c)(ba*)?/; 831 is($#{^CAPTURE}, 0, $message); # one less than $#- 832 is($#+, 2, $message); 833 is($#-, 1, $message); 834 835 # Check that values don't stick 836 " "=~/()()()(.)(..)/; 837 my($m,$p,$q) = (\$-[5], \$+[5], \${^CAPTURE}[4]); 838 () = "$$_" for $m, $p, $q; # FETCH (or eqv.) 839 " " =~ /()/; 840 is $$m, undef, 'values do not stick to @- elements'; 841 is $$p, undef, 'values do not stick to @+ elements'; 842 is $$q, undef, 'values do not stick to @{^CAPTURE} elements'; 843 } 844 845 foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', 846 '${^CAPTURE}[0] = 13', 847 '@- = qw (foo bar)', '$^N = 42') { 848 is(eval $_, undef); 849 like($@, qr/^Modification of a read-only value attempted/, 850 '$^N, @- and @+ are read-only'); 851 } 852 853 { 854 my $message = '\G testing'; 855 $_ = 'aaa'; 856 pos = 1; 857 my @a = /\Ga/g; 858 is("@a", "a a", $message); 859 860 my $str = 'abcde'; 861 pos $str = 2; 862 unlike($str, qr/^\G/, $message); 863 unlike($str, qr/^.\G/, $message); 864 like($str, qr/^..\G/, $message); 865 unlike($str, qr/^...\G/, $message); 866 ok($str =~ /\G../ && $& eq 'cd', $message); 867 ok($str =~ /.\G./ && $& eq 'bc', $message); 868 869 } 870 871 { 872 my $message = '\G and intuit and anchoring'; 873 $_ = "abcdef"; 874 pos = 0; 875 ok($_ =~ /\Gabc/, $message); 876 ok($_ =~ /^\Gabc/, $message); 877 878 pos = 3; 879 ok($_ =~ /\Gdef/, $message); 880 pos = 3; 881 ok($_ =~ /\Gdef$/, $message); 882 pos = 3; 883 ok($_ =~ /abc\Gdef$/, $message); 884 pos = 3; 885 ok($_ =~ /^abc\Gdef$/, $message); 886 pos = 3; 887 ok($_ =~ /c\Gd/, $message); 888 pos = 3; 889 ok($_ =~ /..\GX?def/, $message); 890 } 891 892 { 893 my $s = '123'; 894 pos($s) = 1; 895 my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1 896 is("@a", "1", '\G looping'); 897 } 898 899 900 { 901 my $message = 'pos inside (?{ })'; 902 my $str = 'abcde'; 903 our ($foo, $bar); 904 like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message); 905 is($foo, $str, $message); 906 is($bar, 2, $message); 907 is(pos $str, undef, $message); 908 909 undef $foo; 910 undef $bar; 911 pos $str = undef; 912 ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message); 913 is($foo, $str, $message); 914 is($bar, 2, $message); 915 is(pos $str, 3, $message); 916 917 $_ = $str; 918 undef $foo; 919 undef $bar; 920 like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message); 921 is($foo, $str, $message); 922 is($bar, 2, $message); 923 924 undef $foo; 925 undef $bar; 926 ok(/b(?{$foo = $_; $bar = pos})c/g, $message); 927 is($foo, $str, $message); 928 is($bar, 2, $message); 929 is(pos, 3, $message); 930 931 undef $foo; 932 undef $bar; 933 pos = undef; 934 1 while /b(?{$foo = $_; $bar = pos})c/g; 935 is($foo, $str, $message); 936 is($bar, 2, $message); 937 is(pos, undef, $message); 938 939 undef $foo; 940 undef $bar; 941 $_ = 'abcde|abcde'; 942 ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message); 943 is($foo, 'abcde|abcde', $message); 944 is($bar, 8, $message); 945 is($_, 'axde|axde', $message); 946 947 # List context: 948 $_ = 'abcde|abcde'; 949 our @res; 950 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; 951 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 952 is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message); 953 954 @res = (); 955 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; 956 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 957 is("@res", "'' 'ab' 'cde|abcde' " . 958 "'' 'abc' 'de|abcde' " . 959 "'abcd' 'e|' 'abcde' " . 960 "'abcde|' 'ab' 'cde' " . 961 "'abcde|' 'abc' 'de'", $message); 962 } 963 964 { 965 my $message = '\G anchor checks'; 966 my $foo = 'aabbccddeeffgg'; 967 pos ($foo) = 1; 968 969 ok($foo =~ /.\G(..)/g, $message); 970 is($1, 'ab', $message); 971 972 pos ($foo) += 1; 973 ok($foo =~ /.\G(..)/g, $message); 974 is($1, 'cc', $message); 975 976 pos ($foo) += 1; 977 ok($foo =~ /.\G(..)/g, $message); 978 is($1, 'de', $message); 979 980 ok($foo =~ /\Gef/g, $message); 981 982 undef pos $foo; 983 ok($foo =~ /\G(..)/g, $message); 984 is($1, 'aa', $message); 985 986 ok($foo =~ /\G(..)/g, $message); 987 is($1, 'bb', $message); 988 989 pos ($foo) = 5; 990 ok($foo =~ /\G(..)/g, $message); 991 is($1, 'cd', $message); 992 } 993 994 { 995 my $message = 'basic \G floating checks'; 996 my $foo = 'aabbccddeeffgg'; 997 pos ($foo) = 1; 998 999 ok($foo =~ /a+\G(..)/g, "$message: a+\\G"); 1000 is($1, 'ab', "$message: ab"); 1001 1002 pos ($foo) += 1; 1003 ok($foo =~ /b+\G(..)/g, "$message: b+\\G"); 1004 is($1, 'cc', "$message: cc"); 1005 1006 pos ($foo) += 1; 1007 ok($foo =~ /d+\G(..)/g, "$message: d+\\G"); 1008 is($1, 'de', "$message: de"); 1009 1010 ok($foo =~ /\Gef/g, "$message: \\Gef"); 1011 1012 pos ($foo) = 1; 1013 1014 ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)"); 1015 is($1, 'aa', "$message: aa"); 1016 1017 pos ($foo) = 2; 1018 1019 ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)"); 1020 is($1, 'ab', "$message: ab"); 1021 1022 } 1023 1024 { 1025 $_ = '123x123'; 1026 my @res = /(\d*|x)/g; 1027 local $" = '|'; 1028 is("@res", "123||x|123|", "0 match in alternation"); 1029 } 1030 1031 { 1032 my $message = "Match against temporaries (created via pp_helem())" . 1033 " is safe"; 1034 ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message); 1035 is($1, "bar", $message); 1036 } 1037 1038 { 1039 my $message = 'package $i inside (?{ }), ' . 1040 'saved substrings and changing $_'; 1041 our @a = qw [foo bar]; 1042 our @b = (); 1043 s/(\w)(?{push @b, $1})/,$1,/g for @a; 1044 is("@b", "f o o b a r", $message); 1045 is("@a", ",f,,o,,o, ,b,,a,,r,", $message); 1046 1047 $message = 'lexical $i inside (?{ }), ' . 1048 'saved substrings and changing $_'; 1049 no warnings 'closure'; 1050 my @c = qw [foo bar]; 1051 my @d = (); 1052 s/(\w)(?{push @d, $1})/,$1,/g for @c; 1053 is("@d", "f o o b a r", $message); 1054 is("@c", ",f,,o,,o, ,b,,a,,r,", $message); 1055 } 1056 1057 { 1058 my $message = 'Brackets'; 1059 our $brackets; 1060 $brackets = qr { 1061 { (?> [^{}]+ | (??{ $brackets }) )* } 1062 }x; 1063 1064 ok("{{}" =~ $brackets, $message); 1065 is($&, "{}", $message); 1066 ok("something { long { and } hairy" =~ $brackets, $message); 1067 is($&, "{ and }", $message); 1068 ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message); 1069 is($&, "{ and }", $message); 1070 } 1071 1072 { 1073 $_ = "a-a\nxbb"; 1074 pos = 1; 1075 ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'); 1076 } 1077 1078 { 1079 my $message = '\G anchor checks'; 1080 my $text = "aaXbXcc"; 1081 pos ($text) = 0; 1082 ok($text !~ /\GXb*X/g, $message); 1083 } 1084 1085 { 1086 $_ = "xA\n" x 500; 1087 unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'); 1088 1089 my $text = "abc dbf"; 1090 my @res = ($text =~ /.*?(b).*?\b/g); 1091 is("@res", "b b", '\b is not special'); 1092 } 1093 1094 { 1095 my $message = '\S, [\S], \s, [\s]'; 1096 my @a = map chr, 0 .. 255; 1097 my @b = grep m/\S/, @a; 1098 my @c = grep m/[^\s]/, @a; 1099 is("@b", "@c", $message); 1100 1101 @b = grep /\S/, @a; 1102 @c = grep /[\S]/, @a; 1103 is("@b", "@c", $message); 1104 1105 @b = grep /\s/, @a; 1106 @c = grep /[^\S]/, @a; 1107 is("@b", "@c", $message); 1108 1109 @b = grep /\s/, @a; 1110 @c = grep /[\s]/, @a; 1111 is("@b", "@c", $message); 1112 1113 # Test an inverted posix class with a char also in the class. 1114 my $nbsp = chr utf8::unicode_to_native(0xA0); 1115 my $non_s = chr utf8::unicode_to_native(0xA1); 1116 my $pat_string = "[^\\S ]"; 1117 unlike(" ", qr/$pat_string/, "Verify ' ' !~ /$pat_string/"); 1118 like("\t", qr/$pat_string/, "Verify '\\t =~ /$pat_string/"); 1119 unlike($nbsp, qr/$pat_string/, "Verify non-utf8-NBSP !~ /$pat_string/"); 1120 utf8::upgrade($nbsp); 1121 like($nbsp, qr/$pat_string/, "Verify utf8-NBSP =~ /$pat_string/"); 1122 unlike($non_s, qr/$pat_string/, "Verify non-utf8-inverted-bang !~ /$pat_string/"); 1123 utf8::upgrade($non_s); 1124 unlike($non_s, qr/$pat_string/, "Verify utf8-inverted-bang !~ /$pat_string/"); 1125 } 1126 { 1127 my $message = '\D, [\D], \d, [\d]'; 1128 my @a = map chr, 0 .. 255; 1129 my @b = grep /\D/, @a; 1130 my @c = grep /[^\d]/, @a; 1131 is("@b", "@c", $message); 1132 1133 @b = grep /\D/, @a; 1134 @c = grep /[\D]/, @a; 1135 is("@b", "@c", $message); 1136 1137 @b = grep /\d/, @a; 1138 @c = grep /[^\D]/, @a; 1139 is("@b", "@c", $message); 1140 1141 @b = grep /\d/, @a; 1142 @c = grep /[\d]/, @a; 1143 is("@b", "@c", $message); 1144 } 1145 { 1146 my $message = '\W, [\W], \w, [\w]'; 1147 my @a = map chr, 0 .. 255; 1148 my @b = grep /\W/, @a; 1149 my @c = grep /[^\w]/, @a; 1150 is("@b", "@c", $message); 1151 1152 @b = grep /\W/, @a; 1153 @c = grep /[\W]/, @a; 1154 is("@b", "@c", $message); 1155 1156 @b = grep /\w/, @a; 1157 @c = grep /[^\W]/, @a; 1158 is("@b", "@c", $message); 1159 1160 @b = grep /\w/, @a; 1161 @c = grep /[\w]/, @a; 1162 is("@b", "@c", $message); 1163 } 1164 1165 { 1166 # see if backtracking optimization works correctly 1167 my $message = 'Backtrack optimization'; 1168 like("\n\n", qr/\n $ \n/x, $message); 1169 like("\n\n", qr/\n* $ \n/x, $message); 1170 like("\n\n", qr/\n+ $ \n/x, $message); 1171 like("\n\n", qr/\n? $ \n/x, $message); 1172 like("\n\n", qr/\n*? $ \n/x, $message); 1173 like("\n\n", qr/\n+? $ \n/x, $message); 1174 like("\n\n", qr/\n?? $ \n/x, $message); 1175 unlike("\n\n", qr/\n*+ $ \n/x, $message); 1176 unlike("\n\n", qr/\n++ $ \n/x, $message); 1177 like("\n\n", qr/\n?+ $ \n/x, $message); 1178 } 1179 1180 { 1181 package S; 1182 use overload '""' => sub {'Object S'}; 1183 sub new {bless []} 1184 1185 my $message = "Ref stringification"; 1186 ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message); 1187 ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message); 1188 ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message); 1189 ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message); 1190 ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message); 1191 } 1192 1193 { 1194 my $message = "Test result of match used as match"; 1195 ok('a1b' =~ ('xyz' =~ /y/), $message); 1196 is($`, 'a', $message); 1197 ok('a1b' =~ ('xyz' =~ /t/), $message); 1198 is($`, 'a', $message); 1199 } 1200 1201 { 1202 my $message = '"1" is not \s'; 1203 warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)}, 1204 undef, "$message (did not warn)"); 1205 } 1206 1207 { 1208 my $message = '\s, [[:space:]] and [[:blank:]]'; 1209 my %space = (spc => " ", 1210 tab => "\t", 1211 cr => "\r", 1212 lf => "\n", 1213 ff => "\f", 1214 # There's no \v but the vertical tabulator seems miraculously 1215 # be 11 both in ASCII and EBCDIC. 1216 vt => chr(11), 1217 false => "space"); 1218 1219 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; 1220 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; 1221 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; 1222 1223 is("@space0", "cr ff lf spc tab vt", $message); 1224 is("@space1", "cr ff lf spc tab vt", $message); 1225 is("@space2", "spc tab", $message); 1226 } 1227 1228 { 1229 my $n= 50; 1230 # this must be a high number and go from 0 to N, as the bug we are looking for doesn't 1231 # seem to be predictable. Slight changes to the test make it fail earlier or later. 1232 foreach my $i (0 .. $n) 1233 { 1234 my $str= "\n" x $i; 1235 ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i"; 1236 } 1237 } 1238 { 1239 # we are actually testing that we dont die when executing these patterns 1240 use utf8; 1241 my $e = "Böck"; 1242 ok(utf8::is_utf8($e),"got a unicode string - rt75680"); 1243 1244 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680"); 1245 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680"); 1246 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680"); 1247 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680"); 1248 } 1249 { 1250 # we are actually testing that we dont die when executing these patterns 1251 my $e = "B" . uni_to_native("\x{f6}") . "ck"; 1252 ok(!utf8::is_utf8($e), "got a latin string - rt75680"); 1253 1254 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680"); 1255 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680"); 1256 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680"); 1257 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680"); 1258 } 1259 1260 { 1261 # 1262 # Tests for bug 77414. 1263 # 1264 1265 my $message = '\p property after empty * match'; 1266 { 1267 like("1", qr/\s*\pN/, $message); 1268 like("-", qr/\s*\p{Dash}/, $message); 1269 like(" ", qr/\w*\p{Blank}/, $message); 1270 } 1271 1272 like("1", qr/\s*\pN+/, $message); 1273 like("-", qr/\s*\p{Dash}{1}/, $message); 1274 like(" ", qr/\w*\p{Blank}{1,4}/, $message); 1275 1276 } 1277 1278 { # Some constructs with Latin1 characters cause a utf8 string not 1279 # to match itself in non-utf8 1280 my $c = uni_to_native("\xc0"); 1281 my $pattern = my $utf8_pattern = qr/(($c)+,?)/; 1282 utf8::upgrade($utf8_pattern); 1283 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; 1284 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; 1285 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; 1286 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; 1287 utf8::upgrade($c); 1288 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; 1289 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; 1290 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; 1291 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; 1292 } 1293 1294 { # Make sure can override the formatting 1295 use feature 'unicode_strings'; 1296 ok uni_to_native("\xc0") =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/'; 1297 ok uni_to_native("\xc0") !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/'; 1298 } 1299 1300 { 1301 my $str= "\x{100}"; 1302 chop $str; 1303 my $qr= qr/$str/; 1304 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212"); 1305 $str= ""; 1306 $qr= qr/$str/; 1307 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212"); 1308 1309 } 1310 1311 { 1312 local $::TODO = "[perl #38133]"; 1313 1314 "A" =~ /(((?:A))?)+/; 1315 my $first = $2; 1316 1317 "A" =~ /(((A))?)+/; 1318 my $second = $2; 1319 1320 is($first, $second); 1321 } 1322 1323 { 1324 # RT #3516: \G in a m//g expression causes problems 1325 my $count = 0; 1326 while ("abc" =~ m/(\G[ac])?/g) { 1327 last if $count++ > 10; 1328 } 1329 ok($count < 10, 'RT #3516 A'); 1330 1331 $count = 0; 1332 while ("abc" =~ m/(\G|.)[ac]/g) { 1333 last if $count++ > 10; 1334 } 1335 ok($count < 10, 'RT #3516 B'); 1336 1337 $count = 0; 1338 while ("abc" =~ m/(\G?[ac])?/g) { 1339 last if $count++ > 10; 1340 } 1341 ok($count < 10, 'RT #3516 C'); 1342 } 1343 { 1344 # RT #84294: Is this a bug in the simple Perl regex? 1345 # : Nested buffers and (?{...}) dont play nicely on partial matches 1346 our @got= (); 1347 ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match"); 1348 my $want= "'ab', 'a', 'b'"; 1349 my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got); 1350 is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); 1351 } 1352 1353 { 1354 # Suppress warnings, as the non-unicode one comes out even if turn off 1355 # warnings here (because the execution is done in another scope). 1356 local $SIG{__WARN__} = sub {}; 1357 my $str = "\x{110000}"; 1358 1359 unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}"); 1360 like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}"); 1361 like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}"); 1362 unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}"); 1363 } 1364 1365 { 1366 # Test that IDstart works, but because the author (khw) knows 1367 # regexes much better than the rest of the core, it is being done here 1368 # in the context of a regex which relies on buffer names beginng with 1369 # IDStarts. 1370 use utf8; 1371 my $str = "abc"; 1372 like($str, qr/(?<a>abc)/, "'a' is legal IDStart"); 1373 like($str, qr/(?<_>abc)/, "'_' is legal IDStart"); 1374 like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart"); 1375 like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart"); 1376 1377 # This test works on Unicode 6.0 in which U+2118 and U+212E are legal 1378 # IDStarts there, but are not Word characters, and therefore Perl 1379 # doesn't allow them to be IDStarts. But there is no guarantee that 1380 # Unicode won't change things around in the future so that at some 1381 # future Unicode revision these tests would need to be revised. 1382 foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) { 1383 my $prog = <<"EOP"; 1384use utf8;; 1385"abc" =~ qr/(?<$char>abc)/; 1386EOP 1387 utf8::encode($prog); 1388 fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {}, 1389 sprintf("'U+%04X not legal IDFirst'", ord($char))); 1390 } 1391 } 1392 1393 { # [perl #101710] 1394 my $pat = "b"; 1395 utf8::upgrade($pat); 1396 like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); 1397 } 1398 1399 { # Crash with @a =~ // warning 1400 local $SIG{__WARN__} = sub { 1401 pass 'no crash for @a =~ // warning' 1402 }; 1403 eval ' sub { my @a =~ // } '; 1404 } 1405 1406 { # Concat overloading and qr// thingies 1407 my @refs; 1408 my $qr = qr//; 1409 package Cat { 1410 require overload; 1411 overload->import( 1412 '""' => sub { ${$_[0]} }, 1413 '.' => sub { 1414 push @refs, ref $_[1] if ref $_[1]; 1415 bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]" 1416 } 1417 ); 1418 } 1419 my $s = "foo"; 1420 my $o = bless \$s, Cat::; 1421 /$o$qr/; 1422 is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth'; 1423 } 1424 1425 { 1426 my $count=0; 1427 my $str="\n"; 1428 $count++ while $str=~/.*/g; 1429 is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g'; 1430 my $class_count= 0; 1431 $class_count++ while $str=~/[^\n]*/g; 1432 is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same'; 1433 my $anch_count= 0; 1434 $anch_count++ while $str=~/^.*/mg; 1435 is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once'; 1436 } 1437 1438 { # [perl #111174] 1439 use re '/u'; 1440 my $A_grave = uni_to_native("\xc0"); 1441 like uni_to_native("\xe0"), qr/(?i:$A_grave)/, "(?i: shouldn't lose the passed in /u"; 1442 use re '/a'; 1443 unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; 1444 use re '/aa'; 1445 unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; 1446 unlike 'k', qr'(?i:\N{KELVIN SIGN})', "(?i: shouldn't lose the passed in /aa"; 1447 } 1448 1449 { 1450 # the test for whether the pattern should be re-compiled should 1451 # consider the UTF8ness of the previous and current pattern 1452 # string, as well as the physical bytes of the pattern string 1453 1454 for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") { 1455 ok($s =~ /^$s$/, "re-compile check is UTF8-aware"); 1456 } 1457 } 1458 1459 # #113682 more overloading and qr// 1460 # when doing /foo$overloaded/, if $overloaded returns 1461 # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval' 1462 # shouldn't be required. Via '.', it still is. 1463 { 1464 package Qr0; 1465 use overload 'qr' => sub { qr/(??{50})/ }; 1466 1467 package Qr1; 1468 use overload '""' => sub { qr/(??{51})/ }; 1469 1470 package Qr2; 1471 use overload '.' => sub { $_[1] . qr/(??{52})/ }; 1472 1473 package Qr3; 1474 use overload '""' => sub { qr/(??{7})/ }, 1475 '.' => sub { $_[1] . qr/(??{53})/ }; 1476 1477 package Qr_indirect; 1478 use overload '""' => sub { $_[0][0] }; 1479 1480 package main; 1481 1482 for my $i (0..3) { 1483 my $o = bless [], "Qr$i"; 1484 if ((0,0,1,1)[$i]) { 1485 eval { "A5$i" =~ /^A$o$/ }; 1486 like($@, qr/Eval-group not allowed/, "Qr$i"); 1487 eval { "5$i" =~ /$o/ }; 1488 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1489 "Qr$i bare"); 1490 { 1491 use re 'eval'; 1492 ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval"); 1493 eval { "5$i" =~ /$o/ }; 1494 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1495 "Qr$i bare - with use re eval"); 1496 } 1497 } 1498 else { 1499 ok("A5$i" =~ /^A$o$/, "Qr$i"); 1500 ok("5$i" =~ /$o/, "Qr$i bare"); 1501 } 1502 } 1503 1504 my $o = bless [ bless [], "Qr1" ], 'Qr_indirect'; 1505 ok("A51" =~ /^A$o/, "Qr_indirect"); 1506 ok("51" =~ /$o/, "Qr_indirect bare"); 1507 } 1508 1509 { # Various flags weren't being set when a [] is optimized into an 1510 # EXACTish node 1511 ok("\x{017F}\x{017F}" =~ qr/^[$sharp_s]?$/i, "[] to EXACTish optimization"); 1512 } 1513 1514 { # Test that it avoids splitting a multi-char fold across nodes. 1515 # These all fold to things that are like 'ss', which, if split across 1516 # nodes could fail to match a single character that folds to the 1517 # combination. 1F0 byte expands when folded; 1518 for my $char('F', $sharp_s, "\x{1F0}", "\x{FB00}") { 1519 my $length = 260; # Long enough to overflow an EXACTFish regnode 1520 my $p = $char x $length; 1521 my $s = ($char eq $sharp_s) ? 'ss' 1522 : $char eq "\x{1F0}" 1523 ? "j\x{30c}" 1524 : 'ff'; 1525 $s = $s x $length; 1526 for my $charset (qw(u d l aa)) { 1527 for my $utf8 (0..1) { 1528 for my $locale ('C', $utf8_locale) { 1529 SKIP: 1530 { 1531 skip "test skipped for non-C locales", 2 1532 if $charset ne 'l' 1533 && (! defined $locale || $locale ne 'C'); 1534 if ($charset eq 'l') { 1535 skip 'Locales not available', 2 1536 unless $has_locales; 1537 if (! defined $locale) { 1538 skip "No UTF-8 locale", 2; 1539 } 1540 skip "Can't test in miniperl",2 1541 if is_miniperl(); 1542 1543 require POSIX; 1544 POSIX::setlocale(&LC_CTYPE, $locale); 1545 } 1546 1547 my $pat = $p; 1548 utf8::upgrade($pat) if $utf8; 1549 my $should_pass = 1550 ( $charset eq 'u' 1551 || ($charset eq 'd' && $utf8) 1552 || ($charset eq 'd' && ( $char =~ /[[:ascii:]]/ 1553 || ord $char > 255)) 1554 || ($charset eq 'aa' && $char =~ /[[:ascii:]]/) 1555 || ($charset eq 'l' && $locale ne 'C') 1556 || ($charset eq 'l' && $char =~ /[[:ascii:]]/) 1557 ); 1558 my $name = "(?i$charset), utf8=$utf8, locale=$locale," 1559 . " char=" . sprintf "%x", ord $char; 1560 no warnings 'locale'; 1561 is (eval " '$s' =~ qr/(?i$charset)$pat/;", 1562 $should_pass, $name); 1563 fail "$name: $@" if $@; 1564 is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;", 1565 $should_pass, "extra a, $name"); 1566 fail "$name: $@" if $@; 1567 } 1568 } 1569 } 1570 } 1571 } 1572 } 1573 1574 SKIP: 1575 { 1576 skip "no re debug", 5 if is_miniperl; 1577 my $s = ("0123456789" x 26214) x 2; # Should fill 2 LEXACTS, plus 1578 # small change 1579 my $pattern_prefix = "use utf8; use re qw(Debug COMPILE)"; 1580 my $pattern = "$pattern_prefix; qr/$s/;"; 1581 my $result = fresh_perl($pattern); 1582 if ($? != 0) { # Re-run so as to display STDERR. 1583 fail($pattern); 1584 fresh_perl($pattern, { stderr => 0, verbose => 1 }); 1585 } 1586 like($result, qr/Final program[^X]*\bLEXACT\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s, 1587 "Check that LEXACT nodes are generated"); 1588 like($s, qr/$s/, "Check that LEXACT nodes match"); 1589 like("a$s", qr/a$s/, "Previous test preceded by an 'a'"); 1590 substr($s, 260000, 1) = "\x{100}"; 1591 $pattern = "$pattern_prefix; qr/$s/;"; 1592 $result = fresh_perl($pattern, { 'wide_chars' => 1 } ); 1593 if ($? != 0) { # Re-run so as to display STDERR. 1594 fail($pattern); 1595 fresh_perl($pattern, { stderr => 0, verbose => 1 }); 1596 } 1597 like($result, qr/Final program[^X]*\bLEXACT_REQ8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s, 1598 "Check that an LEXACT_ONLY node is generated with a \\x{100}"); 1599 like($s, qr/$s/, "Check that LEXACT_REQ8 nodes match"); 1600 } 1601 1602 { 1603 for my $char (":", uni_to_native("\x{f7}"), "\x{2010}") { 1604 my $utf8_char = $char; 1605 utf8::upgrade($utf8_char); 1606 my $display = $char; 1607 $display = display($display); 1608 my $utf8_display = "utf8::upgrade(\"$display\")"; 1609 1610 like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/"); 1611 like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/"); 1612 like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/"); 1613 like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/"); 1614 } 1615 } 1616 1617 { 1618 # #116148: Pattern utf8ness sticks around globally 1619 # the utf8 in the first match was sticking around for the second 1620 # match 1621 1622 use feature 'unicode_strings'; 1623 1624 my $x = "\x{263a}"; 1625 $x =~ /$x/; 1626 1627 my $text = "Perl"; 1628 ok("Perl" =~ /P.*$/i, '#116148'); 1629 } 1630 1631 { # 118297: Mixing up- and down-graded strings in regex 1632 utf8::upgrade(my $u = "\x{e5}"); 1633 utf8::downgrade(my $d = "\x{e5}"); 1634 my $warned; 1635 local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ }; 1636 my $re = qr/$u$d/; 1637 ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern"); 1638 my $c = "\x{e5}\x{e5}"; 1639 utf8::downgrade($c); 1640 like($c, $re, "mixed up-/downgraded pattern matches downgraded string"); 1641 utf8::upgrade($c); 1642 like($c, $re, "mixed up-/downgraded pattern matches upgraded string"); 1643 } 1644 1645 { 1646 # if we have 87 capture buffers defined then \87 should refer to the 87th. 1647 # test that this is true for 1..100 1648 # Note that this test causes the engine to recurse at runtime, and 1649 # hence use a lot of C stack. 1650 1651 # Compiling for all 100 nested captures blows the stack under 1652 # clang and ASan; reduce. 1653 my $max_captures = $Config{ccflags} =~ /sanitize/ ? 20 : 100; 1654 1655 for my $i (1..100) { 1656 if ($i > $max_captures) { 1657 pass("skipping $i buffers under ASan aa"); 1658 pass("skipping $i buffers under ASan aba"); 1659 next; 1660 } 1661 my $capture= "a"; 1662 $capture= "($capture)" for 1 .. $i; 1663 for my $mid ("","b") { 1664 my $str= "a${mid}a"; 1665 my $backref= "\\$i"; 1666 eval { 1667 ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/"); 1668 1; 1669 } or do { 1670 is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/"); 1671 }; 1672 } 1673 } 1674 } 1675 1676 # this mixture of readonly (not COWable) and COWable strings 1677 # messed up the capture buffers under COW. The actual test results 1678 # are incidental; the issue is was an AddressSanitizer failure 1679 { 1680 my $c ='AB'; 1681 my $res = ''; 1682 for ($c, 'C', $c, 'DE') { 1683 ok(/(.)/, "COWable match"); 1684 $res .= $1; 1685 } 1686 is($res, "ACAD"); 1687 } 1688 1689 1690 { 1691 # RT #45667 1692 # /[#$x]/x didn't interpolate the var $x. 1693 my $b = 'cd'; 1694 my $s = 'abcd$%#&'; 1695 $s =~ s/[a#$b%]/X/g; 1696 is ($s, 'XbXX$XX&', 'RT #45667 without /x'); 1697 $s = 'abcd$%#&'; 1698 $s =~ s/[a#$b%]/X/gx; 1699 is ($s, 'XbXX$XX&', 'RT #45667 with /x'); 1700 } 1701 1702 { 1703 no warnings "uninitialized"; 1704 my @a; 1705 $a[1]++; 1706 /@a/; 1707 pass('no crash with /@a/ when array has nonexistent elems'); 1708 } 1709 1710 { 1711 is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'), 1712 "ok\n", 1713 'deleting *^R does not result in crashes'; 1714 no warnings 'once'; 1715 *^R = *caretRglobwithnoscalar; 1716 "" =~ /(?{42})/; 1717 is $^R, 42, 'assigning to *^R does not result in a crash'; 1718 is runperl( 1719 stderr => 1, 1720 prog => 'eval q|' 1721 .' q-..- =~ /(??{undef *^R;q--})(?{42})/; ' 1722 .' print qq-$^R\n-' 1723 .'|' 1724 ), 1725 "42\n", 1726 'undefining *^R within (??{}) does not result in a crash'; 1727 } 1728 1729 SKIP: { # Test literal range end point special handling 1730 unless ($::IS_EBCDIC) { 1731 skip "Valid only for EBCDIC", 24; 1732 } 1733 1734 like("\x89", qr/[i-j]/, '"\x89" should match [i-j]'); 1735 unlike("\x8A", qr/[i-j]/, '"\x8A" shouldnt match [i-j]'); 1736 unlike("\x90", qr/[i-j]/, '"\x90" shouldnt match [i-j]'); 1737 like("\x91", qr/[i-j]/, '"\x91" should match [i-j]'); 1738 1739 like("\x89", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x89" should match [i-\N{LATIN SMALL LETTER J}]'); 1740 unlike("\x8A", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1741 unlike("\x90", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1742 like("\x91", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x91" should match [i-\N{LATIN SMALL LETTER J}]'); 1743 1744 like("\x89", qr/[i-\N{U+6A}]/, '"\x89" should match [i-\N{U+6A}]'); 1745 unlike("\x8A", qr/[i-\N{U+6A}]/, '"\x8A" shouldnt match [i-\N{U+6A}]'); 1746 unlike("\x90", qr/[i-\N{U+6A}]/, '"\x90" shouldnt match [i-\N{U+6A}]'); 1747 like("\x91", qr/[i-\N{U+6A}]/, '"\x91" should match [i-\N{U+6A}]'); 1748 1749 like("\x89", qr/[\N{U+69}-\N{U+6A}]/, '"\x89" should match [\N{U+69}-\N{U+6A}]'); 1750 unlike("\x8A", qr/[\N{U+69}-\N{U+6A}]/, '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]'); 1751 unlike("\x90", qr/[\N{U+69}-\N{U+6A}]/, '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]'); 1752 like("\x91", qr/[\N{U+69}-\N{U+6A}]/, '"\x91" should match [\N{U+69}-\N{U+6A}]'); 1753 1754 like("\x89", qr/[i-\x{91}]/, '"\x89" should match [i-\x{91}]'); 1755 like("\x8A", qr/[i-\x{91}]/, '"\x8A" should match [i-\x{91}]'); 1756 like("\x90", qr/[i-\x{91}]/, '"\x90" should match [i-\x{91}]'); 1757 like("\x91", qr/[i-\x{91}]/, '"\x91" should match [i-\x{91}]'); 1758 1759 # Need to use eval, because tries to compile on ASCII platforms even 1760 # though the tests are skipped, and fails because 0x89-j is an illegal 1761 # range there. 1762 like("\x89", eval 'qr/[\x{89}-j]/', '"\x89" should match [\x{89}-j]'); 1763 like("\x8A", eval 'qr/[\x{89}-j]/', '"\x8A" should match [\x{89}-j]'); 1764 like("\x90", eval 'qr/[\x{89}-j]/', '"\x90" should match [\x{89}-j]'); 1765 like("\x91", eval 'qr/[\x{89}-j]/', '"\x91" should match [\x{89}-j]'); 1766 } 1767 1768 # These are based on looking at the code in regcomp.c 1769 # We don't look for specific code, just the existence of an SSC 1770 foreach my $re (qw( qr/a?c/ 1771 qr/a?c/i 1772 qr/[ab]?c/ 1773 qr/\R?c/ 1774 qr/\d?c/d 1775 qr/\w?c/l 1776 qr/\s?c/a 1777 qr/[[:lower:]]?c/u 1778 )) { 1779 SKIP: { 1780 skip "no re-debug under miniperl" if is_miniperl; 1781 my $prog = <<"EOP"; 1782use re qw(Debug COMPILE); 1783$re; 1784EOP 1785 fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class"); 1786 } 1787 } 1788 1789 { 1790 like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; 1791 } 1792 1793 SKIP: { 1794 skip("Tests are ASCII-centric, some would fail on EBCDIC", 12) if $::IS_EBCDIC; 1795 1796 # Verify that the very last Latin-1 U+00FF 1797 # (LATIN SMALL LETTER Y WITH DIAERESIS) 1798 # and its UPPER counterpart (U+0178 which is pure Unicode), 1799 # and likewise for the very first pure Unicode 1800 # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly, 1801 # and there are no off-by-one logic errors in the transition zone. 1802 1803 ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l"); 1804 ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u"); 1805 ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l"); 1806 ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u"); 1807 1808 # U+00FF with U+05D0 (non-casing Hebrew letter). 1809 ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l"); 1810 ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u"); 1811 ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l"); 1812 ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u"); 1813 1814 # U+0100. 1815 ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u"); 1816 ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l"); 1817 ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u"); 1818 ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l"); 1819 } 1820 1821 { 1822 use utf8; 1823 ok("abc" =~ /a b c/x, "NEL is white-space under /x"); 1824 } 1825 1826 { 1827 ok('a(b)c' =~ qr(a\(b\)c), "'\\(' is a literal in qr(...)"); 1828 ok('a[b]c' =~ qr[a\[b\]c], "'\\[' is a literal in qr[...]"); 1829 ok('a{3}c' =~ qr{a\{3\}c}, # Only failed when { could be a meta 1830 "'\\{' is a literal in qr{...}, where it could be a quantifier"); 1831 1832 # This one is for completeness 1833 ok('a<b>c' =~ qr<a\<b\>c>, "'\\<' is a literal in qr<...>)"); 1834 } 1835 1836 { # Was getting optimized into EXACT (non-folding node) 1837 my $x = qr/[x]/i; 1838 utf8::upgrade($x); 1839 like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case"); 1840 } 1841 1842 { # Special handling of literal-ended ranges in [...] was breaking this 1843 use utf8; 1844 like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]"); 1845 } 1846 1847 { # [perl #123539] 1848 like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1849 like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1850 } 1851 1852 { # [perl #123604] 1853 my($s, $x, @x) = ('abc', 'a', 'd'); 1854 my $long = 'b' x 2000; 1855 my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0}; 1856 $eval =~ s{bbb}{$long}; 1857 my $match = eval $eval; 1858 ok(1, "did not crash"); 1859 ok($match, "[bbb...] resolved as character class, not subscript"); 1860 } 1861 1862 { # [perl #123755] 1863 for my $pat ('(??', '(?P', '(?i-') { 1864 eval qq{ qr/$pat/ }; 1865 ok(1, "qr/$pat/ did not crash"); 1866 eval qq{ qr/${pat}\x{123}/ }; 1867 my $e = $@; 1868 like($e, qr{\x{123}}, 1869 "qr/${pat}x/ shows x in error even if it's a wide character"); 1870 } 1871 } 1872 1873 { 1874 # Expect one of these sizes to cause overflow and wrap to negative 1875 for my $bits (32, 64) { 1876 my $wrapneg = 2 ** ($bits - 2) * 3; 1877 for my $sign ('', '-') { 1878 my $pat = sprintf "qr/(?%s%u)/", $sign, $wrapneg; 1879 eval $pat; 1880 ok(1, "big backref $pat did not crash"); 1881 } 1882 } 1883 } 1884 { 1885 # Test that we handle qr/\8888888/ and variants without an infinite loop, 1886 # we use a test within a test so we can todo it, and make sure we don't 1887 # infinite loop our tests. 1888 # NOTE - Do not put quotes in the code! 1889 # NOTE - We have to triple escape the backref in the pattern below. 1890 my $code=' 1891 BEGIN{require q(./test.pl);} 1892 watchdog(3); 1893 for my $len (1 .. 20) { 1894 my $eights= q(8) x $len; 1895 eval qq{ qr/\\\\$eights/ }; 1896 } 1897 print q(No infinite loop here!); 1898 '; 1899 fresh_perl_is($code, "No infinite loop here!", {}, 1900 "test that we handle things like m/\\888888888/ without infinite loops" ); 1901 } 1902 1903 SKIP: 1904 { # Test that we handle some malformed UTF-8 without looping [perl 1905 # #123562] 1906 skip "no Encode", 1 if is_miniperl; 1907 my $code=' 1908 BEGIN{require q(./test.pl);} 1909 use Encode qw(_utf8_on); 1910 # \x80 and \x41 are continuation bytes in their respective 1911 # character sets 1912 my $malformed = (ord("A") == 65) ? "a\x80\n" : "a\x41\n"; 1913 utf8::downgrade($malformed); 1914 _utf8_on($malformed); 1915 watchdog(3); 1916 $malformed =~ /(\n\r|\r)$/; 1917 print q(No infinite loop here!); 1918 '; 1919 fresh_perl_like($code, qr/Malformed UTF-8 character/, {}, 1920 "test that we handle some UTF-8 malformations without looping" ); 1921 } 1922 1923 { 1924 # [perl #123843] hits SEGV trying to compile this pattern 1925 my $match; 1926 eval q{ ($match) = ("xxyxxyxy" =~ m{(x+(y(?1))*)}) }; 1927 ok(1, "compiled GOSUB in CURLYM ok"); 1928 is($match, 'xxyxxyx', "matched GOSUB in CURLYM"); 1929 } 1930 1931 { 1932 # [perl #123852] doesn't avoid all the capture-related work with 1933 # //n, leading to possible memory corruption 1934 eval q{ qr{()(?1)}n }; 1935 my $error = $@; 1936 ok(1, "qr{()(?1)}n didn't crash"); 1937 like($error, qr{Reference to nonexistent group}, 1938 'gave appropriate error for qr{()(?1)}n'); 1939 } 1940 1941 { 1942 # [perl #126406] panic with unmatchable quantifier 1943 my $code=' 1944 no warnings "regexp"; 1945 "" =~ m/(.0\N{6,0}0\N{6,0}000000000000000000000000000000000)/; 1946 '; 1947 fresh_perl_is($code, "", {}, 1948 "perl [#126406] panic"); 1949 } 1950 { 1951 my $bug="[perl #126182]"; # test for infinite pattern recursion 1952 for my $tuple ( 1953 [ 'q(a)=~/(.(?2))((?<=(?=(?1)).))/', "died", "look ahead left recursion fails fast" ], 1954 [ 'q(aa)=~/(?R)a/', "died", "left-recursion fails fast", ], 1955 [ 'q(bbaa)=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/', 1956 "died", "inter-cyclic optional left recursion dies" ], 1957 [ 'q(abc) =~ /a((?1)?)c/', "died", "optional left recursion dies" ], 1958 [ 'q(abc) =~ /a((?1)??)c/', "died", "min mod left recursion dies" ], 1959 [ 'q(abc) =~ /a((?1)*)c/', "died", "* left recursion dies" ], 1960 [ 'q(abc) =~ /a((?1)+)c/', "died", "+ left recursion dies" ], 1961 [ 'q(abc) =~ /a((?1){0,3})c/', "died", "{0,3} left recursion fails fast" ], 1962 1963 [ 'q(aaabbb)=~/a(?R)?b/', "matched", "optional self recursion works" ], 1964 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?0))*+\\\\))/', "matched", 1965 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1966 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?1))*+\\\\))/', "matched", 1967 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1968 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?0))*\\\\))/', "matched", 1969 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1970 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?1))*\\\\))/', "matched", 1971 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1972 ) { 1973 my ($expr, $expect, $test_name, $cap1)= @$tuple; 1974 # avoid quotes in this code! 1975 my $code=' 1976 BEGIN{require q(./test.pl);} 1977 watchdog(3); 1978 my $status= eval(q{ !(' . $expr . ') ? q(failed) : ' . 1979 ($cap1 ? '($1 ne q['.$cap1.']) ? qq(badmatch:$1) : ' : '') . 1980 ' q(matched) }) 1981 || ( ( $@ =~ /Infinite recursion/ ) ? qq(died) : q(strange-death) ); 1982 print $status; 1983 '; 1984 fresh_perl_is($code, $expect, {}, "$bug - $test_name" ); 1985 } 1986 } 1987 { 1988 my $is_cygwin = $^O eq "cygwin"; 1989 local $::TODO = "this flaps on github cygwin vm, but not on cygwin iron #18129" 1990 if $is_cygwin; 1991 my $expected = "Timeout"; 1992 my $code = ' 1993 BEGIN{require q(test.pl);} 1994 watchdog(3); 1995 $SIG{ALRM} = sub {print "'.$expected.'\n"; exit(1)}; 1996 alarm 1; 1997 $_ = "a" x 1000 . "b" x 1000 . "c" x 1000; 1998 /.*a.*b.*c.*[de]/; 1999 print "increase the multipliers in the regex above to run the regex longer"; 2000 '; 2001 # this flaps on github cygwin vm, but not on cygwin iron #18129 2002 # so on cygwin it's run for 50 seconds to see if it fails eventually 2003 my $max = $is_cygwin ? 50 : 1; 2004 my ($iter, $result, $status); 2005 for my $i (1..$max) { 2006 $iter = $i; 2007 $result = fresh_perl($code,{}); 2008 $status = $?; 2009 last if $result ne $expected; 2010 } 2011 is($result, $expected, "Test Perl 73464") 2012 or diag "PROG:", $code, "STATUS:", $status, "failed on iteration: $iter"; 2013 } 2014 2015 { # [perl #128686], crashed the the interpreter 2016 my $AE = chr utf8::unicode_to_native(0xC6); 2017 my $ae = chr utf8::unicode_to_native(0xE6); 2018 my $re = qr/[$ae\s]/i; 2019 ok($AE !~ $re, '/[\xE6\s]/i doesn\'t match \xC6 when not in UTF-8'); 2020 utf8::upgrade $AE; 2021 ok($AE =~ $re, '/[\xE6\s]/i matches \xC6 when in UTF-8'); 2022 } 2023 2024 { 2025 is(0+("\n" =~ m'\n'), 1, q|m'\n' should interpolate escapes|); 2026 } 2027 2028 { 2029 my $str = "a\xB6"; 2030 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - latin1 case" ); 2031 utf8::upgrade($str); 2032 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - utf8 case" ); 2033 } 2034 { 2035 my $got= run_perl( switches => [ '-l' ], prog => <<'EOF_CODE' ); 2036 my $died= !eval { 2037 $_=qq(ab); 2038 print; 2039 my $p=qr/(?{ s!!x! })/; 2040 /$p/; 2041 print; 2042 /a/; 2043 /$p/; 2044 print; 2045 /b/; 2046 /$p/; 2047 print; 2048 //; 2049 1; 2050 }; 2051 $error = $died ? ($@ || qq(Zombie)) : qq(none); 2052 print $died ? qq(died) : qq(lived); 2053 print qq(Error: $@); 2054EOF_CODE 2055 my @got= split /\n/, $got; 2056 is($got[0],"ab","empty pattern in regex codeblock: got expected start string"); 2057 is($got[1],"xab", 2058 "empty pattern in regex codeblock: first subst with no last-match worked right"); 2059 is($got[2],"xxb","empty pattern in regex codeblock: second subst worked right"); 2060 is($got[3],"xxx","empty pattern in regex codeblock: third subst worked right"); 2061 is($got[4],"died","empty pattern in regex codeblock: died as expected"); 2062 like($got[5],qr/Error: Infinite recursion via empty pattern/, 2063 "empty pattern in regex codeblock: produced the right exception message" ); 2064 } 2065 2066 # This test is based on the one directly above, which happened to 2067 # leak. Repeat the test, but stripped down to the bare essentials 2068 # of the leak, which is to die while executing a regex which is 2069 # already the current regex, thus causing the saved outer set of 2070 # capture offsets to leak. The test itself doesn't do anything 2071 # except sit around hoping not to be triggered by ASan 2072 { 2073 eval { 2074 my $s = "abcd"; 2075 $s =~ m{([abcd]) (?{ die if $1 eq 'd'; })}gx; 2076 $s =~ //g; 2077 $s =~ //g; 2078 $s =~ //g; 2079 }; 2080 pass("call to current regex doesn't leak"); 2081 } 2082 2083 2084 2085 { 2086 # [perl #130495] /x comment skipping stopped a byte short, leading 2087 # to assertion failure or 'malformed utf-8 character" warning 2088 fresh_perl_is( 2089 "use utf8; m{a#\x{124}}x", '', {wide_chars => 1}, 2090 '[perl #130495] utf-8 character at end of /x comment should not misparse', 2091 ); 2092 } 2093 { 2094 # [perl #130522] causes out-of-bounds read detected by clang with 2095 # address=sanitized when length of the STCLASS string is greater than 2096 # length of target string. 2097 my $re = qr{(?=\0z)\0?z?$}i; 2098 my($yes, $no) = (1, ""); 2099 for my $test ( 2100 [ $no, undef, '<undef>' ], 2101 [ $no, '', '' ], 2102 [ $no, "\0", '\0' ], 2103 [ $yes, "\0z", '\0z' ], 2104 [ $no, "\0z\0", '\0z\0' ], 2105 [ $yes, "\0z\n", '\0z\n' ], 2106 ) { 2107 my($result, $target, $disp) = @$test; 2108 no warnings qw/uninitialized/; 2109 is($target =~ $re, $result, "[perl #130522] with target '$disp'"); 2110 } 2111 } 2112 { 2113 # [perl #129377] backref to an unmatched capture should not cause 2114 # reading before start of string. 2115 SKIP: { 2116 skip "no re-debug under miniperl" if is_miniperl; 2117 my $prog = <<'EOP'; 2118use re qw(Debug EXECUTE); 2119"x" =~ m{ () y | () \1 }x; 2120EOP 2121 fresh_perl_like($prog, qr{ 2122 \A (?! .* ^ \s+ - ) 2123 }msx, { stderr => 1 }, "Offsets in debug output are not negative"); 2124 } 2125 } 2126 { 2127 # buffer overflow 2128 2129 # This test also used to leak - fixed by the commit which added 2130 # this line. 2131 2132 fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx", 2133 "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n", 2134 {}, "buffer overflow for regexp component"); 2135 } 2136 { 2137 # [perl #129281] buffer write overflow, detected by ASAN, valgrind 2138 fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much"); 2139 } 2140 { 2141 # RT #131893 - fails with ASAN -fsanitize=undefined 2142 fresh_perl_is('qr/0(0?(0||00*))|/', '', {}, "integer overflow during compilation"); 2143 } 2144 2145 { 2146 # RT #131575 intuit skipping back from the end to find the highest 2147 # possible start point, was potentially hopping back beyond pos() 2148 # and crashing by calling fbm_instr with a negative length 2149 2150 my $text = "=t=\x{5000}"; 2151 pos($text) = 3; 2152 ok(scalar($text !~ m{(~*=[a-z]=)}g), "RT #131575"); 2153 } 2154 { 2155 fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly"); 2156 } 2157 { 2158 fresh_perl_is('$_="0\x{1000000}";/^000?\0000/','',{},"dont throw assert errors trying to fbm past end of string"); 2159 } 2160 { # [perl $132227] 2161 fresh_perl_is("('0ba' . ('ss' x 300)) =~ m/0B\\N{U+41}" . $sharp_s x 150 . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 2162 2163 # A variation, but as far as khw knows not part of 132227 2164 fresh_perl_is("'0bssa' =~ m/0B" . $sharp_s . "\\N{U+41}" . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 2165 } 2166 { # [perl $132164] 2167 fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer"); 2168 } 2169 { # [perl #133642] 2170 fresh_perl_is('no warnings "experimental::vlb"; 2171 m/((?<=(0?)))/', "",{},"Was getting 'Double free'"); 2172 } 2173 { # [perl #133782] 2174 # this would panic on DEBUGGING builds 2175 fresh_perl_is(<<'CODE', "ok\nok\n",{}, 'Bad length magic was left on $^R'); 2176while( "\N{U+100}bc" =~ /(..?)(?{$^N})/g ) { 2177 print "ok\n" if length($^R)==length("$^R"); 2178} 2179CODE 2180 } 2181 { # [perl #133871], ASAN/valgrind out-of-bounds access 2182 fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]"); 2183 } 2184 { # [perl #133871], ASAN/valgrind out-of-bounds access 2185 fresh_perl_like('qr/\p{nv:NAnq}/', qr/Can't find Unicode property definition/, {}, "GH #17367"); 2186 } 2187 { # GH #17370, ASAN/valgrind out-of-bounds access 2188 fresh_perl_like('qr/\p{nv:qnan}/', qr/Can't find Unicode property definition/, {}, "GH #17370"); 2189 } 2190 { # GH #17371, segfault 2191 fresh_perl_like('qr/\p{nv=\\\\\}(?0)|\337ss|\337ss//', qr/Unicode property wildcard not terminated/, {}, "GH #17371"); 2192 } 2193 { # GH #17384, ASAN/valgrind out-of-bounds access 2194 fresh_perl_like('"q0" =~ /\p{__::Is0}/', qr/Unknown user-defined property name \\p\{__::Is0}/, {}, "GH #17384"); 2195 } 2196 2197 SKIP: 2198 { # [perl #133921], segfault 2199 skip "Not valid for EBCDIC", 5 if $::IS_EBCDIC; 2200 2201 fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\xÿÿÿþù\Q`\Qx`{0c!}e;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ Fn0t0c d;t ù ç!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); 2202 fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \xÿÿÿÿù\Q`\Qx`{0d ;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 0000000Fm0t0c d;t ù ç!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); 2203 2204fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿù\Q`\Q 2205 2206 2207 2208 2209 ç 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225x{0c!}\;\;çÿ q0/i0/!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ u00000F000t0p d? ù ç!00000000000000000000000m/0000000000000000000000000000000m/0\} )|i', "", {}, "[perl #133921]"); 2226 2227 fresh_perl_is('a aúúv sWtrt\ó||ß+Wüefù\Qx`\Qx`\x{1c!gGnuc given1111111111111O1111each111\jx` \xòÿÿÿù\Qx`\Q 2228 2229 2230 2231 2232 2233 ç 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249x{1c!}\;\;îçÿp qr/elsif/!eF/;îçÿù\QxÿÿÿÿùHQx`Lx{1c!}e;ù\Qx`\Qx`\x{1c!}\;ÿÿÿÿc!}\;îçÿù\Qx\xÿÿÿÿ>=\Qx`\Qx`ù\òx`ÿ>=\Qx`\Qx`ù\òx`\Qx`\x{1c!}8;\;îçÿp unshifteFnormat0cmp d?not ùp ç!0000000000000000000000000m/00000000000000000000000000000000m/0R\} )|\aï||K??p¿ÿÿfúd{\{gri{\x{1x/} ð¹NuntiÀh', "", {}, "[perl #133921]"); 2250 2251 fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0g0c 000n0000000000000O0h000x0 \xòÿÿÿù\Q`\Q 2252 2253 2254 2255 2256 2257 ç 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273x{0c!}\;\;îçÿ /0f/!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 000t0F000t0p d?n ù ç!00000000000000000000000m/0000000000000000000000000000000m/\} )|i', "", {}, "[perl #133933]"); 2274 } 2275 2276 { # perl #133998] 2277 fresh_perl_is('print "\x{110000}" =~ qr/(?l)|[^\S\pC\s]/', 1, {}, 2278 '/[\S\s]/l works'); 2279 } 2280 2281 { # perl #133995] 2282 use utf8; 2283 fresh_perl_is('"έδωσαν ελληνικήვე" =~ m/[^0](?=0)0?/', "", 2284 {wide_chars => 1}, 2285 '[^0] doesnt crash on UTF-8 target string'); 2286 } 2287 2288 { # [perl #133992] This is a tokenizer bug of parsing a pattern 2289 fresh_perl_is(q:$z = do { 2290 use utf8; 2291 "q!ÑеÑÑ! =~ m'" 2292 }; 2293 $z .= 'è(?#'; 2294 $z .= "'"; 2295 eval $z;:, "", {}, 'foo'); 2296 } 2297 2298 { # [perl #134325] 2299 my $quote="\\Q"; 2300 my $back="\\\\"; 2301 my $ff="\xff"; 2302 my $s = sprintf "/\\1|(|%s)%s%s /i", 2303 $quote x 8 . $back x 69, 2304 $quote x 5 . $back x 4, 2305 $ff x 48; 2306 like(fresh_perl("$s", { stderr => 1, }), qr/Unmatched \(/); 2307 } 2308 2309 { # GitHub #17196, caused assertion failure 2310 fresh_perl_like('("0" x 258) =~ /(?l)0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/', 2311 qr/^$/, 2312 {}, 2313 "Assertion failure with /l exact string longer than a single node"); 2314 } 2315 2316SKIP: 2317 { # [perl #134334], Assertion failure 2318 skip "no UTF-8 locale available" unless $utf8_locale; 2319 fresh_perl_like("use POSIX; POSIX::setlocale(&LC_CTYPE, '$utf8_locale'); 'ssss' =~ /\xDF+?sX/il;", 2320 qr/^$/, 2321 {}, 2322 "Assertion failure matching /il on single char folding to multi"); 2323 } 2324 2325 { # Test ANYOFHs 2326 my $pat = qr/[\x{4000001}\x{4000003}\x{4000005}]+/; 2327 unlike("\x{4000000}", $pat, "4000000 isn't in pattern"); 2328 like("\x{4000001}", $pat, "4000001 is in pattern"); 2329 unlike("\x{4000002}", $pat, "4000002 isn't in pattern"); 2330 like("\x{4000003}", $pat, "4000003 is in pattern"); 2331 unlike("\x{4000004}", $pat, "4000004 isn't in pattern"); 2332 like("\x{4000005}", $pat, "4000005 is in pattern"); 2333 unlike("\x{4000006}", $pat, "4000006 isn't in pattern"); 2334 2335 # gh #17319 2336 $pat = qr/[\N{U+200D}\N{U+2000}]()/; 2337 unlike("\x{1FFF}", $pat, "1FFF isn't in pattern"); 2338 like("\x{2000}", $pat, "2000 is in pattern"); 2339 unlike("\x{2001}", $pat, "2001 isn't in pattern"); 2340 unlike("\x{200C}", $pat, "200C isn't in pattern"); 2341 like("\x{200D}", $pat, "200 is in pattern"); 2342 unlike("\x{200E}", $pat, "200E isn't in pattern"); 2343 } 2344 2345 # gh17490: test recursion check 2346 { 2347 my $eval = '(?{1})'; 2348 my $re = sprintf '(?&FOO)(?(DEFINE)(?<FOO>%sfoo))', $eval x 20; 2349 my $result = eval qq{"foo" =~ /$re/}; 2350 is($@ // '', '', "many evals did not die"); 2351 ok($result, "regexp correctly matched"); 2352 } 2353 2354 # gh16947: test regexp corruption (GOSUB) 2355 { 2356 fresh_perl_is(q{ 2357 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok' 2358 }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)'); 2359 } 2360 # gh16947: test fix doesn't break SUSPEND 2361 { 2362 fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' }, 2363 'ok', {}, "gh16947: test fix doesn't break SUSPEND"); 2364 } 2365 2366 # gh17730: should not crash 2367 { 2368 fresh_perl_is(q{ 2369 "q00" =~ m{(((*ACCEPT)0)*00)?0(?1)}; print "ok" 2370 }, 'ok', {}, 'gh17730: should not crash'); 2371 } 2372 2373 # gh17743: more regexp corruption via GOSUB 2374 { 2375 fresh_perl_is(q{ 2376 "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok" 2377 }, 'ok', {}, 'gh17743: test regexp corruption (1)'); 2378 2379 fresh_perl_is(q{ 2380 "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/; 2381 print "ok" 2382 }, 'ok', {}, 'gh17743: test regexp corruption (2)'); 2383 } 2384 2385 { 2386 # Test branch reset (?|...|...) in list context. This was reported 2387 # in GH Issue #20710, in relation to breaking App::pl. See 2388 # https://github.com/Perl/perl5/issues/20710#issuecomment-1404549785 2389 my $ok = 0; 2390 my ($w,$x,$y,$z); 2391 $ok = ($x,$y) = "ab"=~/(?|(p)(q)|(x)(y)|(a)(b))/; 2392 ok($ok,"Branch reset pattern 1 matched as expected"); 2393 is($x,"a","Branch reset in list context check 1 (a)"); 2394 is($y,"b","Branch reset in list context check 2 (b)"); 2395 2396 $ok = ($x,$y,$z) = "xyz"=~/(?|(p)(q)|(x)(y)|(a)(b))(z)/; 2397 ok($ok,"Branch reset pattern 2 matched as expected"); 2398 is($x,"x","Branch reset in list context check 3 (x)"); 2399 is($y,"y","Branch reset in list context check 4 (y)"); 2400 is($z,"z","Branch reset in list context check 5 (z)"); 2401 2402 $ok = ($w,$x,$y) = "wpq"=~/(w)(?|(p)(q)|(x)(y)|(a)(b))/; 2403 ok($ok,"Branch reset pattern 3 matched as expected"); 2404 is($w,"w","Branch reset in list context check 6 (w)"); 2405 is($x,"p","Branch reset in list context check 7 (p)"); 2406 is($y,"q","Branch reset in list context check 8 (q)"); 2407 2408 $ok = ($w,$x,$y,$z) = "wabz"=~/(w)(?|(p)(q)|(x)(y)|(a)(b))(z)/; 2409 ok($ok,"Branch reset pattern 4 matched as expected"); 2410 is($w,"w","Branch reset in list context check 9 (w)"); 2411 is($x,"a","Branch reset in list context check 10 (a)"); 2412 is($y,"b","Branch reset in list context check 11 (b)"); 2413 is($z,"z","Branch reset in list context check 12 (z)"); 2414 } 2415 { 2416 # Test for GH Issue #20826. Save stack overflow introduced in 2417 # 92373dea9d7bcc0a017f20cb37192c1d8400767f PR #20530. 2418 # Note this test depends on an assert so it will only fail 2419 # under DEBUGGING. 2420 fresh_perl_is(q{ 2421 $_ = "x" x 1000; 2422 my $pat = '(.)' x 200; 2423 $pat = qr/($pat)+/; 2424 m/$pat/; 2425 print "ok"; 2426 }, 'ok', {}, 'gh20826: test regex save stack overflow'); 2427 } 2428 { 2429 my ($x, $y); 2430 ok( "aaa" =~ /(?:(a)?\1)+/, 2431 "GH Issue #18865 'aaa' - pattern matches"); 2432 $x = "($-[0],$+[0])"; 2433 ok( "aaa" =~ /(?:((?{})a)?\1)+/, 2434 "GH Issue #18865 'aaa' - deoptimized pattern matches"); 2435 $y = "($-[0],$+[0])"; 2436 { 2437 local $::TODO = "Not Yet Implemented"; 2438 is( $y, $x, 2439 "GH Issue #18865 'aaa' - test optimization"); 2440 } 2441 ok( "ababab" =~ /(?:(?:(ab))?\1)+/, 2442 "GH Issue #18865 'ababab' - pattern matches"); 2443 $x = "($-[0],$+[0])"; 2444 ok( "ababab" =~ /(?:(?:((?{})ab))?\1)+/, 2445 "GH Issue #18865 'ababab' - deoptimized pattern matches"); 2446 $y = "($-[0],$+[0])"; 2447 { 2448 local $::TODO = "Not Yet Implemented"; 2449 is( $y, $x, 2450 "GH Issue #18865 'ababab' - test optimization"); 2451 } 2452 ok( "XaaXbbXb" =~ /(?:X([ab])?\1)+/, 2453 "GH Issue #18865 'XaaXbbXb' - pattern matches"); 2454 $x = "($-[0],$+[0])"; 2455 ok( "XaaXbbXb" =~ /(?:X((?{})[ab])?\1)+/, 2456 "GH Issue #18865 'XaaXbbXb' - deoptimized pattern matches"); 2457 $y = "($-[0],$+[0])"; 2458 { 2459 local $::TODO = "Not Yet Implemented"; 2460 is( $y, $x, 2461 "GH Issue #18865 'XaaXbbXb' - test optimization"); 2462 } 2463 } 2464 { 2465 # Test that ${^LAST_SUCCESSFUL_PATTERN} works as expected. 2466 # It should match like the empty pattern does, and it should be dynamic 2467 # in the same was as $1 is dynamic. 2468 my ($str,$pat); 2469 $str = "ABCD"; 2470 $str =~/(D)/; 2471 is("$1", "D", '$1 is "D"'); 2472 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2473 is($pat, "(?^:(D))", 'Outer ${^LAST_SUCCESSFUL_PATTERN} is as expected'); 2474 { 2475 if ($str=~/BX/ || $str=~/(BC)/) { 2476 is("$1", "BC",'$1 is now "BC"'); 2477 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2478 ok($str =~ s//ZZ/, "Empty pattern matched as expected"); 2479 is($str, "AZZD", "Empty pattern in s/// has result we expected"); 2480 } 2481 } 2482 is("$1", "D", '$1 should now be "D" again'); 2483 is($pat, "(?^:(BC))", 'inner ${^LAST_SUCCESSFUL_PATTERN} is as expected'); 2484 ok($str=~s//Q/, 'Empty pattern to "Q" was successful'); 2485 is($str, "AZZQ", "Empty pattern in s/// has result we expected (try2)"); 2486 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2487 is($pat, "(?^:(D))", 'Outer ${^LAST_SUCCESSFUL_PATTERN} restored to its previous value as expected'); 2488 2489 $str = "ABCD"; 2490 { 2491 if ($str=~/BX/ || $str=~/(BC)/) { 2492 is("$1", "BC",'$1 is now "BC"'); 2493 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2494 ok($str=~s/${^LAST_SUCCESSFUL_PATTERN}/ZZ/, '${^LAST_SUCCESSFUL_PATTERN} matched as expected'); 2495 is($str, "AZZD", '${^LAST_SUCCESSFUL_PATTERN} in s/// has result we expected'); 2496 } 2497 } 2498 is("$1", "D", '$1 should now be "D" again'); 2499 is($pat, "(?^:(BC))", 'inner ${^LAST_SUCCESSFUL_PATTERN} is as expected'); 2500 is($str, "AZZD", 'Using ${^LAST_SUCCESSFUL_PATTERN} as a pattern has same result as empty pattern'); 2501 ok($str=~s/${^LAST_SUCCESSFUL_PATTERN}/Q/, '${^LAST_SUCCESSFUL_PATTERN} to "Q" was successful'); 2502 is($str, "AZZQ", '${^LAST_SUCCESSFUL_PATTERN} in s/// has result we expected'); 2503 ok($str=~/ZQ/, "/ZQ/ matched as expected"); 2504 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2505 is($pat, "(?^:ZQ)", '${^LAST_SUCCESSFUL_PATTERN} changed as expected'); 2506 2507 $str = "foobarfoo"; 2508 ok($str =~ s/foo//, "matched foo"); 2509 my $copy= ${^LAST_SUCCESSFUL_PATTERN}; 2510 ok(defined($copy), '$copy is defined'); 2511 ok($str =~ s/bar//,"matched bar"); 2512 ok($str =~ s/$copy/PQR/, 'replaced $copy with PQR'); 2513 is($str, "PQR", 'final string should be PQR'); 2514 } 2515 2516 { 2517 # github #21661 2518 fresh_perl_is(<<'PROG', <<'EXPECT', {}, "double-free on fatal warn with existing error"); 2519use warnings FATAL => qw(all); 2520/() {}/X; 2521PROG 2522Unknown regexp modifier "/X" at - line 2, at end of line 2523Unescaped left brace in regex is passed through in regex; marked by <-- HERE in m/() { <-- HERE }/ at - line 2. 2524Execution of - aborted due to compilation errors. 2525EXPECT 2526 fresh_perl_is(<<'PROG', "", {}, "leak if __WARN__ handler dies"); 2527use warnings; 2528local $SIG{__WARN__} = sub { die; }; 2529eval "qr/()x{/;" for 1..10; 2530PROG 2531 } 2532} # End of sub run_tests 2533 25341; 2535 2536# 2537# ex: set ts=8 sts=4 sw=4 et: 2538# 2539