1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9no warnings 'experimental::vlb'; 10use 5.010; 11 12sub run_tests; 13 14$| = 1; 15 16 17BEGIN { 18 chdir 't' if -d 't'; 19 require './test.pl'; 20 set_up_inc('../lib', '.', '../ext/re'); 21 require Config; import Config; 22 require './charset_tools.pl'; 23 require './loc_tools.pl'; 24} 25 26skip_all_without_unicode_tables(); 27 28my $has_locales = locales_enabled('LC_CTYPE'); 29 30plan tests => 1265; # Update this when adding/deleting tests. 31 32run_tests() unless caller; 33 34# 35# Tests start here. 36# 37sub run_tests { 38 { 39 # see https://github.com/Perl/perl5/issues/12948 40 my $string="ABCDEFGHIJKL"; 41 my $pat= "(.)" x length($string); 42 my $ok= $string=~/^$pat\z/; 43 foreach my $n (1 .. length($string)) { 44 $ok= eval sprintf 'is $%d, "%s", q($%d = %s); 1', ($n, substr($string,$n-1,1))x2; 45 ok($ok, "eval for \$$n test"); 46 $ok= eval sprintf 'is ${%d}, "%s", q(${%d} = %s); 1', ($n, substr($string,$n-1,1))x2; 47 ok($ok, "eval for \${$n} test"); 48 49 $ok= eval sprintf 'is $0%d, "%s", q($0%d = %s); 1', ($n, substr($string,$n-1,1))x2; 50 ok(!$ok, "eval failed as expected for \$0$n test"); 51 $ok= eval sprintf 'is ${0%d}, "%s", q(${0%d} = %s); 1', ($n, substr($string,$n-1,1))x2; 52 ok(!$ok, "eval failed as expected for \${0$n} test"); 53 54 no strict 'refs'; 55 $ok= eval sprintf 'is ${0b%b}, "%s", q(${0b%b} = %s); 1', ($n, substr($string,$n-1,1))x2; 56 ok($ok, sprintf "eval for \${0b%b} test", $n); 57 $ok= eval sprintf 'is ${0x%x}, "%s", q(${0x%x} = %s); 1', ($n, substr($string,$n-1,1))x2; 58 ok($ok, sprintf "eval for \${0x%x} test", $n); 59 $ok= eval sprintf 'is ${0b%08b}, "%s", q(${0b%08b} = %s); 1', ($n, substr($string,$n-1,1))x2; 60 ok($ok, sprintf "eval for \${0b%b} test", $n); 61 $ok= eval sprintf 'is ${0x%04x}, "%s", q(${0x%04x} = %s); 1', ($n, substr($string,$n-1,1))x2; 62 ok($ok, sprintf "eval for \${0x%04x} test", $n); 63 } 64 } 65 66 my $sharp_s = uni_to_native("\xdf"); 67 68 { 69 my $x = "abc\ndef\n"; 70 (my $x_pretty = $x) =~ s/\n/\\n/g; 71 72 ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/]; 73 ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/]; 74 75 # used to be a test for $* 76 ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m]; 77 78 ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]); 79 ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]); 80 81 ok $x =~ /def/, qq ["$x_pretty" =~ /def/]; 82 ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]); 83 84 ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/]; 85 ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]); 86 87 ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/]; 88 ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]); 89 } 90 91 { 92 $_ = '123'; 93 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; 94 } 95 96 { 97 $_ = 'aaabbbccc'; 98 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', 99 qq [\$_ = '$_'; /(a*b*)(c*)/]; 100 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; 101 unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]); 102 103 $_ = 'aaabccc'; 104 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; 105 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 106 107 $_ = 'aaaccc'; 108 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 109 unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]); 110 111 $_ = 'abcdef'; 112 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; 113 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; 114 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; 115 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; 116 } 117 118 { 119 # used to be a test for $* 120 ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m]; 121 } 122 123 { 124 our %XXX = map {($_ => $_)} 123, 234, 345; 125 126 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); 127 while ($_ = shift(@XXX)) { 128 my $e = index ($_, 'not') >= 0 ? '' : 1; 129 my $r = m?(.*)?; 130 is($r, $e, "?(.*)?"); 131 /not/ && reset; 132 if (/not ok 2/) { 133 if ($^O eq 'VMS') { 134 $_ = shift(@XXX); 135 } 136 else { 137 reset 'X'; 138 } 139 } 140 } 141 142 SKIP: { 143 if ($^O eq 'VMS') { 144 skip "Reset 'X'", 1; 145 } 146 ok !keys %XXX, "%XXX is empty"; 147 } 148 149 } 150 151 { 152 my $message = "Test empty pattern"; 153 my $xyz = 'xyz'; 154 my $cde = 'cde'; 155 156 $cde =~ /[^ab]*/; 157 $xyz =~ //; 158 is($&, $xyz, $message); 159 160 my $foo = '[^ab]*'; 161 $cde =~ /$foo/; 162 $xyz =~ //; 163 is($&, $xyz, $message); 164 165 $cde =~ /$foo/; 166 my $null; 167 no warnings 'uninitialized'; 168 $xyz =~ /$null/; 169 is($&, $xyz, $message); 170 171 $null = ""; 172 $xyz =~ /$null/; 173 is($&, $xyz, $message); 174 175 # each entry: regexp, match string, $&, //o match success 176 my @tests = 177 ( 178 [ "", "xy", "x", 1 ], 179 [ "y", "yz", "y", !1 ], 180 ); 181 for my $test (@tests) { 182 my ($re, $str, $matched, $omatch) = @$test; 183 $xyz =~ /x/o; 184 ok($str =~ /$re/, "$str matches /$re/"); 185 is($&, $matched, "on $matched"); 186 $xyz =~ /x/o; 187 is($str =~ /$re/o, $omatch, "$str matches /$re/o (or not)"); 188 } 189 } 190 191 { 192 my $message = q !Check $`, $&, $'!; 193 $_ = 'abcdefghi'; 194 /def/; # optimized up to cmd 195 is("$`:$&:$'", 'abc:def:ghi', $message); 196 197 no warnings 'void'; 198 /cde/ + 0; # optimized only to spat 199 is("$`:$&:$'", 'ab:cde:fghi', $message); 200 201 /[d][e][f]/; # not optimized 202 is("$`:$&:$'", 'abc:def:ghi', $message); 203 } 204 205 { 206 $_ = 'now is the {time for all} good men to come to.'; 207 / \{([^}]*)}/; 208 is($1, 'time for all', "Match braces"); 209 } 210 211 { 212 my $message = "{N,M} quantifier"; 213 $_ = 'xxx {3,4} yyy zzz'; 214 ok(/( {3,4})/, $message); 215 is($1, ' ', $message); 216 unlike($_, qr/( {4,})/, $message); 217 ok(/( {2,3}.)/, $message); 218 is($1, ' y', $message); 219 ok(/(y{2,3}.)/, $message); 220 is($1, 'yyy ', $message); 221 unlike($_, qr/x {3,4}/, $message); 222 unlike($_, qr/^xxx {3,4}/, $message); 223 } 224 225 { 226 my $message = "Test /g"; 227 local $" = ":"; 228 $_ = "now is the time for all good men to come to."; 229 my @words = /(\w+)/g; 230 my $exp = "now:is:the:time:for:all:good:men:to:come:to"; 231 232 is("@words", $exp, $message); 233 234 @words = (); 235 while (/\w+/g) { 236 push (@words, $&); 237 } 238 is("@words", $exp, $message); 239 240 @words = (); 241 pos = 0; 242 while (/to/g) { 243 push(@words, $&); 244 } 245 is("@words", "to:to", $message); 246 247 pos $_ = 0; 248 @words = /to/g; 249 is("@words", "to:to", $message); 250 } 251 252 { 253 $_ = "abcdefghi"; 254 255 my $pat1 = 'def'; 256 my $pat2 = '^def'; 257 my $pat3 = '.def.'; 258 my $pat4 = 'abc'; 259 my $pat5 = '^abc'; 260 my $pat6 = 'abc$'; 261 my $pat7 = 'ghi'; 262 my $pat8 = '\w*ghi'; 263 my $pat9 = 'ghi$'; 264 265 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = 266 my $t6 = my $t7 = my $t8 = my $t9 = 0; 267 268 for my $iter (1 .. 5) { 269 $t1++ if /$pat1/o; 270 $t2++ if /$pat2/o; 271 $t3++ if /$pat3/o; 272 $t4++ if /$pat4/o; 273 $t5++ if /$pat5/o; 274 $t6++ if /$pat6/o; 275 $t7++ if /$pat7/o; 276 $t8++ if /$pat8/o; 277 $t9++ if /$pat9/o; 278 } 279 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; 280 is($x, '505550555', "Test /o"); 281 } 282 283 { 284 my $xyz = 'xyz'; 285 ok "abc" =~ /^abc$|$xyz/, "| after \$"; 286 287 # perl 4.009 says "unmatched ()" 288 my $message = '$ inside ()'; 289 290 my $result; 291 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; 292 is($@, "", $message); 293 is($result, "abc:bc", $message); 294 } 295 296 { 297 my $message = "Scalar /g"; 298 $_ = "abcfooabcbar"; 299 300 ok( /abc/g && $` eq "", $message); 301 ok( /abc/g && $` eq "abcfoo", $message); 302 ok(!/abc/g, $message); 303 304 $message = "Scalar /gi"; 305 pos = 0; 306 ok( /ABC/gi && $` eq "", $message); 307 ok( /ABC/gi && $` eq "abcfoo", $message); 308 ok(!/ABC/gi, $message); 309 310 $message = "Scalar /g"; 311 pos = 0; 312 ok( /abc/g && $' eq "fooabcbar", $message); 313 ok( /abc/g && $' eq "bar", $message); 314 315 $_ .= ''; 316 my @x = /abc/g; 317 is(@x, 2, "/g reset after assignment"); 318 } 319 320 { 321 my $message = '/g, \G and pos'; 322 $_ = "abdc"; 323 pos $_ = 2; 324 /\Gc/gc; 325 is(pos $_, 2, $message); 326 /\Gc/g; 327 is(pos $_, undef, $message); 328 } 329 330 { 331 my $message = '(?{ })'; 332 our $out = 1; 333 'abc' =~ m'a(?{ $out = 2 })b'; 334 is($out, 2, $message); 335 336 $out = 1; 337 'abc' =~ m'a(?{ $out = 3 })c'; 338 is($out, 1, $message); 339 } 340 341 { 342 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; 343 my @out = /(?<!foo)bar./g; 344 is("@out", 'bar2 barf', "Negative lookbehind"); 345 } 346 347 { 348 my $message = "REG_INFTY tests"; 349 # Tests which depend on REG_INFTY 350 351 # Defaults assumed if this fails 352 eval { require Config; }; 353 $::reg_infty = $Config::Config{reg_infty} // ((1<<31)-1); 354 $::reg_infty_m = $::reg_infty - 1; 355 $::reg_infty_p = $::reg_infty + 1; 356 $::reg_infty_m = $::reg_infty_m; # Suppress warning. 357 358 # As well as failing if the pattern matches do unexpected things, the 359 # next three tests will fail if you should have picked up a lower-than- 360 # default value for $reg_infty from Config.pm, but have not. 361 SKIP: { 362 skip "REG_INFTY too big to test ($::reg_infty)", 7 363 if $::reg_infty > (1<<16); 364 365 is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message); 366 is($@, '', $message); 367 is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message); 368 is($@, '', $message); 369 isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message); 370 is($@, '', $message); 371 372 # It should be 'a' x 2147483647, but that exhausts memory on 373 # reasonably sized modern machines 374 like('a' x $::reg_infty_m, qr/a{1,}/, 375 "{1,} matches more times than REG_INFTY"); 376 } 377 378 eval "'aaa' =~ /a{1,$::reg_infty}/"; 379 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 380 eval "'aaa' =~ /a{1,$::reg_infty_p}/"; 381 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 382 383 } 384 385 { 386 # Poke a couple more parse failures 387 my $context = 'x' x 256; 388 eval qq("${context}y" =~ /(?<=$context)y/); 389 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; 390 } 391 392 SKIP: 393 { # Long Monsters 394 395 my @trials = (125, 140, 250, 270, 300000, 30); 396 397 skip('limited memory', @trials * 4) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'}; 398 399 for my $l (@trials) { # Ordered to free memory 400 my $a = 'a' x $l; 401 # we do not use like() or unlike() here as the string 402 # is very long and is not useful if the match fails, 403 # the useful part 404 ok("ba$a=" =~ m/a$a=/, sprintf 405 'Long monster: ("ba".("a" x %d)."=") =~ m/aa...a=/', $l); 406 ok("b$a=" !~ m/a$a=/, sprintf 407 'Long monster: ("b" .("a" x %d)."=") !~ m/aa...a=/', $l); 408 ok("b$a=" =~ m/ba+=/, sprintf 409 'Long monster: ("b" .("a" x %d)."=") =~ m/ba+=/', $l); 410 ok("ba$a=" =~ m/b(?:a|b)+=/, sprintf 411 'Long monster: ("ba".("a" x %d)."=") =~ m/b(?:a|b)+=/', $l); 412 } 413 } 414 415 SKIP: 416 { # 20000 nodes, each taking 3 words per string, and 1 per branch 417 418 my %ans = ( 'ax13876y25677lbc' => 1, 419 'ax13876y25677mcb' => 0, # not b. 420 'ax13876y35677nbc' => 0, # Num too big 421 'ax13876y25677y21378obc' => 1, 422 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] 423 'ax13876y25677y21378y21378kbc' => 1, 424 'ax13876y25677y21378y21378kcb' => 0, # Not b. 425 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs 426 ); 427 428 skip('limited memory', 2 * scalar keys %ans) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'}; 429 430 my $long_constant_len = join '|', 12120 .. 32645; 431 my $long_var_len = join '|', 8120 .. 28645; 432 433 for (keys %ans) { 434 my $message = "20000 nodes, const-len '$_'"; 435 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message; 436 437 $message = "20000 nodes, var-len '$_'"; 438 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message; 439 } 440 } 441 442 { 443 my $message = "Complicated backtracking"; 444 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; 445 my $expect = "(bla()) ((l)u((e))) (l(e)e)"; 446 447 our $c; 448 sub matchit { 449 m/ 450 ( 451 \( 452 (?{ $c = 1 }) # Initialize 453 (?: 454 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop 455 (?! 456 ) # Fail: will unwind one iteration back 457 ) 458 (?: 459 [^()]+ # Match a big chunk 460 (?= 461 [()] 462 ) # Do not try to match subchunks 463 | 464 \( 465 (?{ ++$c }) 466 | 467 \) 468 (?{ --$c }) 469 ) 470 )+ # This may not match with different subblocks 471 ) 472 (?(?{ $c != 0 }) 473 (?! 474 ) # Fail 475 ) # Otherwise the chunk 1 may succeed with $c>0 476 /xg; 477 } 478 479 my @ans = (); 480 my $res; 481 push @ans, $res while $res = matchit; 482 is("@ans", "1 1 1", $message); 483 484 @ans = matchit; 485 is("@ans", $expect, $message); 486 487 $message = "Recursion with (??{ })"; 488 our $matched; 489 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; 490 491 @ans = my @ans1 = (); 492 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; 493 494 is("@ans", "1 1 1", $message); 495 is("@ans1", $expect, $message); 496 497 @ans = m/$matched/g; 498 is("@ans", $expect, $message); 499 500 } 501 502 { 503 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; 504 } 505 506 { 507 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad 508 is("@ans", 'a/ b', "Stack may be bad"); 509 } 510 511 { 512 my $message = "Eval-group not allowed at runtime"; 513 my $code = '{$blah = 45}'; 514 our $blah = 12; 515 eval { /(?$code)/ }; 516 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 517 518 $blah = 12; 519 my $res = eval { "xx" =~ /(?$code)/o }; 520 { 521 no warnings 'uninitialized'; 522 chomp $@; my $message = "$message '$@', '$res', '$blah'"; 523 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 524 } 525 526 $code = '=xx'; 527 $blah = 12; 528 $res = eval { "xx" =~ /(?$code)/o }; 529 { 530 no warnings 'uninitialized'; 531 my $message = "$message '$@', '$res', '$blah'"; 532 ok(!$@ && $res, $message); 533 } 534 535 $code = '{$blah = 45}'; 536 $blah = 12; 537 eval "/(?$code)/"; 538 is($blah, 45, $message); 539 540 $blah = 12; 541 /(?{$blah = 45})/; 542 is($blah, 45, $message); 543 } 544 545 { 546 my $message = "Pos checks"; 547 my $x = 'banana'; 548 $x =~ /.a/g; 549 is(pos $x, 2, $message); 550 551 $x =~ /.z/gc; 552 is(pos $x, 2, $message); 553 554 sub f { 555 my $p = $_[0]; 556 return $p; 557 } 558 559 $x =~ /.a/g; 560 is(f (pos $x), 4, $message); 561 } 562 563 { 564 my $message = 'Checking $^R'; 565 our $x = $^R = 67; 566 'foot' =~ /foo(?{$x = 12; 75})[t]/; 567 is($^R, 75, $message); 568 569 $x = $^R = 67; 570 'foot' =~ /foo(?{$x = 12; 75})[xy]/; 571 ok($^R eq '67' && $x eq '12', $message); 572 573 $x = $^R = 67; 574 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; 575 ok($^R eq '79' && $x eq '12', $message); 576 } 577 578 { 579 is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i'); 580 is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'); 581 is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'); 582 is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'); 583 is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'); 584 is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'); 585 } 586 587 { # Test that charset modifier work, and are interpolated 588 is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'); 589 is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'); 590 is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'); 591 is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'); 592 is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); 593 594 my $dual = qr/\b\v$/; 595 my $locale; 596 597 SKIP: { 598 skip 'Locales not available', 1 unless $has_locales; 599 600 use locale; 601 $locale = qr/\b\v$/; 602 is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); 603 no locale; 604 } 605 606 use feature 'unicode_strings'; 607 my $unicode = qr/\b\v$/; 608 is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); 609 is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 610 611 SKIP: { 612 skip 'Locales not available', 1 unless $has_locales; 613 614 is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); 615 } 616 617 no feature 'unicode_strings'; 618 SKIP: { 619 skip 'Locales not available', 1 unless $has_locales; 620 is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); 621 } 622 623 is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); 624 625 SKIP: { 626 skip 'Locales not available', 2 unless $has_locales; 627 628 use locale; 629 is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 630 is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); 631 } 632 } 633 634 { 635 my $message = "Look around"; 636 $_ = 'xabcx'; 637 foreach my $ans ('', 'c') { 638 ok(/(?<=(?=a)..)((?=c)|.)/g, $message); 639 is($1, $ans, $message); 640 } 641 } 642 643 { 644 my $message = "Empty clause"; 645 $_ = 'a'; 646 foreach my $ans ('', 'a', '') { 647 ok(/^|a|$/g, $message); 648 is($&, $ans, $message); 649 } 650 } 651 652 { 653 sub prefixify { 654 my $message = "Prefixify"; 655 { 656 my ($v, $a, $b, $res) = @_; 657 ok($v =~ s/\Q$a\E/$b/, $message); 658 is($v, $res, $message); 659 } 660 } 661 662 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); 663 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); 664 } 665 666 { 667 $_ = 'var="foo"'; 668 /(\")/; 669 ok $1 && /$1/, "Capture a quote"; 670 } 671 672 { 673 no warnings 'closure'; 674 my $message = '(?{ $var } refers to package vars'; 675 package aa; 676 our $c = 2; 677 $::c = 3; 678 '' =~ /(?{ $c = 4 })/; 679 main::is($c, 4, $message); 680 main::is($::c, 3, $message); 681 } 682 683 { 684 is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef); 685 like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/, 686 'POSIX class [: :] must have valid name'); 687 688 for my $d (qw [= .]) { 689 is(eval "/[[${d}foo${d}]]/", undef); 690 like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/, 691 "POSIX syntax [[$d $d]] is an error"); 692 } 693 } 694 695 { 696 # test if failure of patterns returns empty list 697 my $message = "Failed pattern returns empty list"; 698 $_ = 'aaa'; 699 @_ = /bbb/; 700 is("@_", "", $message); 701 702 @_ = /bbb/g; 703 is("@_", "", $message); 704 705 @_ = /(bbb)/; 706 is("@_", "", $message); 707 708 @_ = /(bbb)/g; 709 is("@_", "", $message); 710 } 711 { 712 my $message = 'ACCEPT and CLOSE - '; 713 $_ = "aced"; 714 #12 3 4 5 715 /((a?(*ACCEPT)())())()/ 716 or die "Failed to match"; 717 is($1,"a",$message . "buffer 1 is defined with expected value"); 718 is($2,"a",$message . "buffer 2 is defined with expected value"); 719 ok(!defined($3),$message . "buffer 3 is not defined"); 720 ok(!defined($4),$message . "buffer 4 is not defined"); 721 ok(!defined($5),$message . "buffer 5 is not defined"); 722 ok(!defined($6),$message . "buffer 6 is not defined"); 723 $message= 'NO ACCEPT and CLOSE - '; 724 /((a?())())()/ 725 or die "Failed to match"; 726 is($1,"a",$message . "buffer 1 is defined with expected value"); 727 is($2,"a",$message . "buffer 2 is defined with expected value"); 728 is($3,"", $message . "buffer 3 is defined with expected value"); 729 is($4,"", $message . "buffer 4 is defined with expected value"); 730 is($5,"",$message . "buffer 5 is defined with expected value"); 731 ok(!defined($6),$message . "buffer 6 is not defined"); 732 #12 3 4 5 733 $message = 'ACCEPT and CLOSE - '; 734 /((a?(*ACCEPT)(c))(e))(d)/ 735 or die "Failed to match"; 736 is($1,"a",$message . "buffer 1 is defined with expected value"); 737 is($2,"a",$message . "buffer 2 is defined with expected value"); 738 ok(!defined($3),$message . "buffer 3 is not defined"); 739 ok(!defined($4),$message . "buffer 4 is not defined"); 740 ok(!defined($5),$message . "buffer 5 is not defined"); 741 ok(!defined($6),$message . "buffer 6 is not defined"); 742 $message= 'NO ACCEPT and CLOSE - '; 743 /((a?(c))(e))(d)/ 744 or die "Failed to match"; 745 is($1,"ace", $message . "buffer 1 is defined with expected value"); 746 is($2,"ac", $message . "buffer 2 is defined with expected value"); 747 is($3,"c", $message . "buffer 3 is defined with expected value"); 748 is($4,"e", $message . "buffer 4 is defined with expected value"); 749 is($5,"d", $message . "buffer 5 is defined with expected value"); 750 ok(!defined($6),$message . "buffer 6 is not defined"); 751 } 752 { 753 my $message = '@- and @+ and @{^CAPTURE} tests'; 754 755 $_= "ace"; 756 /c(?=.$)/; 757 is($#{^CAPTURE}, -1, $message); 758 is($#+, 0, $message); 759 is($#-, 0, $message); 760 is($+ [0], 2, $message); 761 is($- [0], 1, $message); 762 ok(!defined $+ [1] && !defined $- [1] && 763 !defined $+ [2] && !defined $- [2], $message); 764 765 /a(c)(e)/; 766 is($#{^CAPTURE}, 1, $message); # one less than $#- 767 is($#+, 2, $message); 768 is($#-, 2, $message); 769 is($+ [0], 3, $message); 770 is($- [0], 0, $message); 771 is(${^CAPTURE}[0], "c", $message); 772 is($+ [1], 2, $message); 773 is($- [1], 1, $message); 774 is(${^CAPTURE}[1], "e", $message); 775 is($+ [2], 3, $message); 776 is($- [2], 2, $message); 777 ok(!defined $+ [3] && !defined $- [3] && 778 !defined ${^CAPTURE}[2] && !defined ${^CAPTURE}[3] && 779 !defined $+ [4] && !defined $- [4], $message); 780 781 # Exists has a special check for @-/@+ - bug 45147 782 ok(exists $-[0], $message); 783 ok(exists $+[0], $message); 784 ok(exists ${^CAPTURE}[0], $message); 785 ok(exists ${^CAPTURE}[1], $message); 786 ok(exists $-[2], $message); 787 ok(exists $+[2], $message); 788 ok(!exists ${^CAPTURE}[2], $message); 789 ok(!exists $-[3], $message); 790 ok(!exists $+[3], $message); 791 ok(exists ${^CAPTURE}[-1], $message); 792 ok(exists ${^CAPTURE}[-2], $message); 793 ok(exists $-[-1], $message); 794 ok(exists $+[-1], $message); 795 ok(exists $-[-3], $message); 796 ok(exists $+[-3], $message); 797 ok(!exists $-[-4], $message); 798 ok(!exists $+[-4], $message); 799 ok(!exists ${^CAPTURE}[-3], $message); 800 801 802 /.(c)(b)?(e)/; 803 is($#{^CAPTURE}, 2, $message); # one less than $#- 804 is($#+, 3, $message); 805 is($#-, 3, $message); 806 is(${^CAPTURE}[0], "c", $message); 807 is(${^CAPTURE}[2], "e", $message . "[$1 $3]"); 808 is($+ [1], 2, $message); 809 is($- [1], 1, $message); 810 is($+ [3], 3, $message); 811 is($- [3], 2, $message); 812 ok(!defined $+ [2] && !defined $- [2] && 813 !defined $+ [4] && !defined $- [4] && 814 !defined ${^CAPTURE}[1], $message); 815 816 /.(c)/; 817 is($#{^CAPTURE}, 0, $message); # one less than $#- 818 is($#+, 1, $message); 819 is($#-, 1, $message); 820 is(${^CAPTURE}[0], "c", $message); 821 is($+ [0], 2, $message); 822 is($- [0], 0, $message); 823 is($+ [1], 2, $message); 824 is($- [1], 1, $message); 825 ok(!defined $+ [2] && !defined $- [2] && 826 !defined $+ [3] && !defined $- [3] && 827 !defined ${^CAPTURE}[1], $message); 828 829 /.(c)(ba*)?/; 830 is($#{^CAPTURE}, 0, $message); # one less than $#- 831 is($#+, 2, $message); 832 is($#-, 1, $message); 833 834 # Check that values don't stick 835 " "=~/()()()(.)(..)/; 836 my($m,$p,$q) = (\$-[5], \$+[5], \${^CAPTURE}[4]); 837 () = "$$_" for $m, $p, $q; # FETCH (or eqv.) 838 " " =~ /()/; 839 is $$m, undef, 'values do not stick to @- elements'; 840 is $$p, undef, 'values do not stick to @+ elements'; 841 is $$q, undef, 'values do not stick to @{^CAPTURE} elements'; 842 } 843 844 foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', 845 '${^CAPTURE}[0] = 13', 846 '@- = qw (foo bar)', '$^N = 42') { 847 is(eval $_, undef); 848 like($@, qr/^Modification of a read-only value attempted/, 849 '$^N, @- and @+ are read-only'); 850 } 851 852 { 853 my $message = '\G testing'; 854 $_ = 'aaa'; 855 pos = 1; 856 my @a = /\Ga/g; 857 is("@a", "a a", $message); 858 859 my $str = 'abcde'; 860 pos $str = 2; 861 unlike($str, qr/^\G/, $message); 862 unlike($str, qr/^.\G/, $message); 863 like($str, qr/^..\G/, $message); 864 unlike($str, qr/^...\G/, $message); 865 ok($str =~ /\G../ && $& eq 'cd', $message); 866 ok($str =~ /.\G./ && $& eq 'bc', $message); 867 868 } 869 870 { 871 my $message = '\G and intuit and anchoring'; 872 $_ = "abcdef"; 873 pos = 0; 874 ok($_ =~ /\Gabc/, $message); 875 ok($_ =~ /^\Gabc/, $message); 876 877 pos = 3; 878 ok($_ =~ /\Gdef/, $message); 879 pos = 3; 880 ok($_ =~ /\Gdef$/, $message); 881 pos = 3; 882 ok($_ =~ /abc\Gdef$/, $message); 883 pos = 3; 884 ok($_ =~ /^abc\Gdef$/, $message); 885 pos = 3; 886 ok($_ =~ /c\Gd/, $message); 887 pos = 3; 888 ok($_ =~ /..\GX?def/, $message); 889 } 890 891 { 892 my $s = '123'; 893 pos($s) = 1; 894 my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1 895 is("@a", "1", '\G looping'); 896 } 897 898 899 { 900 my $message = 'pos inside (?{ })'; 901 my $str = 'abcde'; 902 our ($foo, $bar); 903 like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message); 904 is($foo, $str, $message); 905 is($bar, 2, $message); 906 is(pos $str, undef, $message); 907 908 undef $foo; 909 undef $bar; 910 pos $str = undef; 911 ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message); 912 is($foo, $str, $message); 913 is($bar, 2, $message); 914 is(pos $str, 3, $message); 915 916 $_ = $str; 917 undef $foo; 918 undef $bar; 919 like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message); 920 is($foo, $str, $message); 921 is($bar, 2, $message); 922 923 undef $foo; 924 undef $bar; 925 ok(/b(?{$foo = $_; $bar = pos})c/g, $message); 926 is($foo, $str, $message); 927 is($bar, 2, $message); 928 is(pos, 3, $message); 929 930 undef $foo; 931 undef $bar; 932 pos = undef; 933 1 while /b(?{$foo = $_; $bar = pos})c/g; 934 is($foo, $str, $message); 935 is($bar, 2, $message); 936 is(pos, undef, $message); 937 938 undef $foo; 939 undef $bar; 940 $_ = 'abcde|abcde'; 941 ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message); 942 is($foo, 'abcde|abcde', $message); 943 is($bar, 8, $message); 944 is($_, 'axde|axde', $message); 945 946 # List context: 947 $_ = 'abcde|abcde'; 948 our @res; 949 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; 950 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 951 is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message); 952 953 @res = (); 954 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; 955 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 956 is("@res", "'' 'ab' 'cde|abcde' " . 957 "'' 'abc' 'de|abcde' " . 958 "'abcd' 'e|' 'abcde' " . 959 "'abcde|' 'ab' 'cde' " . 960 "'abcde|' 'abc' 'de'", $message); 961 } 962 963 { 964 my $message = '\G anchor checks'; 965 my $foo = 'aabbccddeeffgg'; 966 pos ($foo) = 1; 967 968 ok($foo =~ /.\G(..)/g, $message); 969 is($1, 'ab', $message); 970 971 pos ($foo) += 1; 972 ok($foo =~ /.\G(..)/g, $message); 973 is($1, 'cc', $message); 974 975 pos ($foo) += 1; 976 ok($foo =~ /.\G(..)/g, $message); 977 is($1, 'de', $message); 978 979 ok($foo =~ /\Gef/g, $message); 980 981 undef pos $foo; 982 ok($foo =~ /\G(..)/g, $message); 983 is($1, 'aa', $message); 984 985 ok($foo =~ /\G(..)/g, $message); 986 is($1, 'bb', $message); 987 988 pos ($foo) = 5; 989 ok($foo =~ /\G(..)/g, $message); 990 is($1, 'cd', $message); 991 } 992 993 { 994 my $message = 'basic \G floating checks'; 995 my $foo = 'aabbccddeeffgg'; 996 pos ($foo) = 1; 997 998 ok($foo =~ /a+\G(..)/g, "$message: a+\\G"); 999 is($1, 'ab', "$message: ab"); 1000 1001 pos ($foo) += 1; 1002 ok($foo =~ /b+\G(..)/g, "$message: b+\\G"); 1003 is($1, 'cc', "$message: cc"); 1004 1005 pos ($foo) += 1; 1006 ok($foo =~ /d+\G(..)/g, "$message: d+\\G"); 1007 is($1, 'de', "$message: de"); 1008 1009 ok($foo =~ /\Gef/g, "$message: \\Gef"); 1010 1011 pos ($foo) = 1; 1012 1013 ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)"); 1014 is($1, 'aa', "$message: aa"); 1015 1016 pos ($foo) = 2; 1017 1018 ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)"); 1019 is($1, 'ab', "$message: ab"); 1020 1021 } 1022 1023 { 1024 $_ = '123x123'; 1025 my @res = /(\d*|x)/g; 1026 local $" = '|'; 1027 is("@res", "123||x|123|", "0 match in alternation"); 1028 } 1029 1030 { 1031 my $message = "Match against temporaries (created via pp_helem())" . 1032 " is safe"; 1033 ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message); 1034 is($1, "bar", $message); 1035 } 1036 1037 { 1038 my $message = 'package $i inside (?{ }), ' . 1039 'saved substrings and changing $_'; 1040 our @a = qw [foo bar]; 1041 our @b = (); 1042 s/(\w)(?{push @b, $1})/,$1,/g for @a; 1043 is("@b", "f o o b a r", $message); 1044 is("@a", ",f,,o,,o, ,b,,a,,r,", $message); 1045 1046 $message = 'lexical $i inside (?{ }), ' . 1047 'saved substrings and changing $_'; 1048 no warnings 'closure'; 1049 my @c = qw [foo bar]; 1050 my @d = (); 1051 s/(\w)(?{push @d, $1})/,$1,/g for @c; 1052 is("@d", "f o o b a r", $message); 1053 is("@c", ",f,,o,,o, ,b,,a,,r,", $message); 1054 } 1055 1056 { 1057 my $message = 'Brackets'; 1058 our $brackets; 1059 $brackets = qr { 1060 { (?> [^{}]+ | (??{ $brackets }) )* } 1061 }x; 1062 1063 ok("{{}" =~ $brackets, $message); 1064 is($&, "{}", $message); 1065 ok("something { long { and } hairy" =~ $brackets, $message); 1066 is($&, "{ and }", $message); 1067 ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message); 1068 is($&, "{ and }", $message); 1069 } 1070 1071 { 1072 $_ = "a-a\nxbb"; 1073 pos = 1; 1074 ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'); 1075 } 1076 1077 { 1078 my $message = '\G anchor checks'; 1079 my $text = "aaXbXcc"; 1080 pos ($text) = 0; 1081 ok($text !~ /\GXb*X/g, $message); 1082 } 1083 1084 { 1085 $_ = "xA\n" x 500; 1086 unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'); 1087 1088 my $text = "abc dbf"; 1089 my @res = ($text =~ /.*?(b).*?\b/g); 1090 is("@res", "b b", '\b is not special'); 1091 } 1092 1093 { 1094 my $message = '\S, [\S], \s, [\s]'; 1095 my @a = map chr, 0 .. 255; 1096 my @b = grep m/\S/, @a; 1097 my @c = grep m/[^\s]/, @a; 1098 is("@b", "@c", $message); 1099 1100 @b = grep /\S/, @a; 1101 @c = grep /[\S]/, @a; 1102 is("@b", "@c", $message); 1103 1104 @b = grep /\s/, @a; 1105 @c = grep /[^\S]/, @a; 1106 is("@b", "@c", $message); 1107 1108 @b = grep /\s/, @a; 1109 @c = grep /[\s]/, @a; 1110 is("@b", "@c", $message); 1111 1112 # Test an inverted posix class with a char also in the class. 1113 my $nbsp = chr utf8::unicode_to_native(0xA0); 1114 my $non_s = chr utf8::unicode_to_native(0xA1); 1115 my $pat_string = "[^\\S ]"; 1116 unlike(" ", qr/$pat_string/, "Verify ' ' !~ /$pat_string/"); 1117 like("\t", qr/$pat_string/, "Verify '\\t =~ /$pat_string/"); 1118 unlike($nbsp, qr/$pat_string/, "Verify non-utf8-NBSP !~ /$pat_string/"); 1119 utf8::upgrade($nbsp); 1120 like($nbsp, qr/$pat_string/, "Verify utf8-NBSP =~ /$pat_string/"); 1121 unlike($non_s, qr/$pat_string/, "Verify non-utf8-inverted-bang !~ /$pat_string/"); 1122 utf8::upgrade($non_s); 1123 unlike($non_s, qr/$pat_string/, "Verify utf8-inverted-bang !~ /$pat_string/"); 1124 } 1125 { 1126 my $message = '\D, [\D], \d, [\d]'; 1127 my @a = map chr, 0 .. 255; 1128 my @b = grep /\D/, @a; 1129 my @c = grep /[^\d]/, @a; 1130 is("@b", "@c", $message); 1131 1132 @b = grep /\D/, @a; 1133 @c = grep /[\D]/, @a; 1134 is("@b", "@c", $message); 1135 1136 @b = grep /\d/, @a; 1137 @c = grep /[^\D]/, @a; 1138 is("@b", "@c", $message); 1139 1140 @b = grep /\d/, @a; 1141 @c = grep /[\d]/, @a; 1142 is("@b", "@c", $message); 1143 } 1144 { 1145 my $message = '\W, [\W], \w, [\w]'; 1146 my @a = map chr, 0 .. 255; 1147 my @b = grep /\W/, @a; 1148 my @c = grep /[^\w]/, @a; 1149 is("@b", "@c", $message); 1150 1151 @b = grep /\W/, @a; 1152 @c = grep /[\W]/, @a; 1153 is("@b", "@c", $message); 1154 1155 @b = grep /\w/, @a; 1156 @c = grep /[^\W]/, @a; 1157 is("@b", "@c", $message); 1158 1159 @b = grep /\w/, @a; 1160 @c = grep /[\w]/, @a; 1161 is("@b", "@c", $message); 1162 } 1163 1164 { 1165 # see if backtracking optimization works correctly 1166 my $message = 'Backtrack optimization'; 1167 like("\n\n", qr/\n $ \n/x, $message); 1168 like("\n\n", qr/\n* $ \n/x, $message); 1169 like("\n\n", qr/\n+ $ \n/x, $message); 1170 like("\n\n", qr/\n? $ \n/x, $message); 1171 like("\n\n", qr/\n*? $ \n/x, $message); 1172 like("\n\n", qr/\n+? $ \n/x, $message); 1173 like("\n\n", qr/\n?? $ \n/x, $message); 1174 unlike("\n\n", qr/\n*+ $ \n/x, $message); 1175 unlike("\n\n", qr/\n++ $ \n/x, $message); 1176 like("\n\n", qr/\n?+ $ \n/x, $message); 1177 } 1178 1179 { 1180 package S; 1181 use overload '""' => sub {'Object S'}; 1182 sub new {bless []} 1183 1184 my $message = "Ref stringification"; 1185 ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message); 1186 ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message); 1187 ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message); 1188 ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message); 1189 ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message); 1190 } 1191 1192 { 1193 my $message = "Test result of match used as match"; 1194 ok('a1b' =~ ('xyz' =~ /y/), $message); 1195 is($`, 'a', $message); 1196 ok('a1b' =~ ('xyz' =~ /t/), $message); 1197 is($`, 'a', $message); 1198 } 1199 1200 { 1201 my $message = '"1" is not \s'; 1202 warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)}, 1203 undef, "$message (did not warn)"); 1204 } 1205 1206 { 1207 my $message = '\s, [[:space:]] and [[:blank:]]'; 1208 my %space = (spc => " ", 1209 tab => "\t", 1210 cr => "\r", 1211 lf => "\n", 1212 ff => "\f", 1213 # There's no \v but the vertical tabulator seems miraculously 1214 # be 11 both in ASCII and EBCDIC. 1215 vt => chr(11), 1216 false => "space"); 1217 1218 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; 1219 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; 1220 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; 1221 1222 is("@space0", "cr ff lf spc tab vt", $message); 1223 is("@space1", "cr ff lf spc tab vt", $message); 1224 is("@space2", "spc tab", $message); 1225 } 1226 1227 { 1228 my $n= 50; 1229 # this must be a high number and go from 0 to N, as the bug we are looking for doesn't 1230 # seem to be predictable. Slight changes to the test make it fail earlier or later. 1231 foreach my $i (0 .. $n) 1232 { 1233 my $str= "\n" x $i; 1234 ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i"; 1235 } 1236 } 1237 { 1238 # we are actually testing that we dont die when executing these patterns 1239 use utf8; 1240 my $e = "Böck"; 1241 ok(utf8::is_utf8($e),"got a unicode string - rt75680"); 1242 1243 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680"); 1244 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680"); 1245 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680"); 1246 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680"); 1247 } 1248 { 1249 # we are actually testing that we dont die when executing these patterns 1250 my $e = "B" . uni_to_native("\x{f6}") . "ck"; 1251 ok(!utf8::is_utf8($e), "got a latin string - rt75680"); 1252 1253 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680"); 1254 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680"); 1255 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680"); 1256 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680"); 1257 } 1258 1259 { 1260 # 1261 # Tests for bug 77414. 1262 # 1263 1264 my $message = '\p property after empty * match'; 1265 { 1266 like("1", qr/\s*\pN/, $message); 1267 like("-", qr/\s*\p{Dash}/, $message); 1268 like(" ", qr/\w*\p{Blank}/, $message); 1269 } 1270 1271 like("1", qr/\s*\pN+/, $message); 1272 like("-", qr/\s*\p{Dash}{1}/, $message); 1273 like(" ", qr/\w*\p{Blank}{1,4}/, $message); 1274 1275 } 1276 1277 { # Some constructs with Latin1 characters cause a utf8 string not 1278 # to match itself in non-utf8 1279 my $c = uni_to_native("\xc0"); 1280 my $pattern = my $utf8_pattern = qr/(($c)+,?)/; 1281 utf8::upgrade($utf8_pattern); 1282 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; 1283 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; 1284 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; 1285 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; 1286 utf8::upgrade($c); 1287 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; 1288 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; 1289 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; 1290 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; 1291 } 1292 1293 { # Make sure can override the formatting 1294 use feature 'unicode_strings'; 1295 ok uni_to_native("\xc0") =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/'; 1296 ok uni_to_native("\xc0") !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/'; 1297 } 1298 1299 { 1300 my $str= "\x{100}"; 1301 chop $str; 1302 my $qr= qr/$str/; 1303 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212"); 1304 $str= ""; 1305 $qr= qr/$str/; 1306 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212"); 1307 1308 } 1309 1310 { 1311 local $::TODO = "[perl #38133]"; 1312 1313 "A" =~ /(((?:A))?)+/; 1314 my $first = $2; 1315 1316 "A" =~ /(((A))?)+/; 1317 my $second = $2; 1318 1319 is($first, $second); 1320 } 1321 1322 { 1323 # RT #3516: \G in a m//g expression causes problems 1324 my $count = 0; 1325 while ("abc" =~ m/(\G[ac])?/g) { 1326 last if $count++ > 10; 1327 } 1328 ok($count < 10, 'RT #3516 A'); 1329 1330 $count = 0; 1331 while ("abc" =~ m/(\G|.)[ac]/g) { 1332 last if $count++ > 10; 1333 } 1334 ok($count < 10, 'RT #3516 B'); 1335 1336 $count = 0; 1337 while ("abc" =~ m/(\G?[ac])?/g) { 1338 last if $count++ > 10; 1339 } 1340 ok($count < 10, 'RT #3516 C'); 1341 } 1342 { 1343 # RT #84294: Is this a bug in the simple Perl regex? 1344 # : Nested buffers and (?{...}) dont play nicely on partial matches 1345 our @got= (); 1346 ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match"); 1347 my $want= "'ab', 'a', 'b'"; 1348 my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got); 1349 is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); 1350 } 1351 1352 { 1353 # Suppress warnings, as the non-unicode one comes out even if turn off 1354 # warnings here (because the execution is done in another scope). 1355 local $SIG{__WARN__} = sub {}; 1356 my $str = "\x{110000}"; 1357 1358 unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}"); 1359 like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}"); 1360 like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}"); 1361 unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}"); 1362 } 1363 1364 { 1365 # Test that IDstart works, but because the author (khw) knows 1366 # regexes much better than the rest of the core, it is being done here 1367 # in the context of a regex which relies on buffer names beginng with 1368 # IDStarts. 1369 use utf8; 1370 my $str = "abc"; 1371 like($str, qr/(?<a>abc)/, "'a' is legal IDStart"); 1372 like($str, qr/(?<_>abc)/, "'_' is legal IDStart"); 1373 like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart"); 1374 like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart"); 1375 1376 # This test works on Unicode 6.0 in which U+2118 and U+212E are legal 1377 # IDStarts there, but are not Word characters, and therefore Perl 1378 # doesn't allow them to be IDStarts. But there is no guarantee that 1379 # Unicode won't change things around in the future so that at some 1380 # future Unicode revision these tests would need to be revised. 1381 foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) { 1382 my $prog = <<"EOP"; 1383use utf8;; 1384"abc" =~ qr/(?<$char>abc)/; 1385EOP 1386 utf8::encode($prog); 1387 fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {}, 1388 sprintf("'U+%04X not legal IDFirst'", ord($char))); 1389 } 1390 } 1391 1392 { # [perl #101710] 1393 my $pat = "b"; 1394 utf8::upgrade($pat); 1395 like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); 1396 } 1397 1398 { # Crash with @a =~ // warning 1399 local $SIG{__WARN__} = sub { 1400 pass 'no crash for @a =~ // warning' 1401 }; 1402 eval ' sub { my @a =~ // } '; 1403 } 1404 1405 { # Concat overloading and qr// thingies 1406 my @refs; 1407 my $qr = qr//; 1408 package Cat { 1409 require overload; 1410 overload->import( 1411 '""' => sub { ${$_[0]} }, 1412 '.' => sub { 1413 push @refs, ref $_[1] if ref $_[1]; 1414 bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]" 1415 } 1416 ); 1417 } 1418 my $s = "foo"; 1419 my $o = bless \$s, Cat::; 1420 /$o$qr/; 1421 is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth'; 1422 } 1423 1424 { 1425 my $count=0; 1426 my $str="\n"; 1427 $count++ while $str=~/.*/g; 1428 is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g'; 1429 my $class_count= 0; 1430 $class_count++ while $str=~/[^\n]*/g; 1431 is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same'; 1432 my $anch_count= 0; 1433 $anch_count++ while $str=~/^.*/mg; 1434 is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once'; 1435 } 1436 1437 { # [perl #111174] 1438 use re '/u'; 1439 my $A_grave = uni_to_native("\xc0"); 1440 like uni_to_native("\xe0"), qr/(?i:$A_grave)/, "(?i: shouldn't lose the passed in /u"; 1441 use re '/a'; 1442 unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; 1443 use re '/aa'; 1444 unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; 1445 unlike 'k', qr'(?i:\N{KELVIN SIGN})', "(?i: shouldn't lose the passed in /aa"; 1446 } 1447 1448 { 1449 # the test for whether the pattern should be re-compiled should 1450 # consider the UTF8ness of the previous and current pattern 1451 # string, as well as the physical bytes of the pattern string 1452 1453 for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") { 1454 ok($s =~ /^$s$/, "re-compile check is UTF8-aware"); 1455 } 1456 } 1457 1458 # #113682 more overloading and qr// 1459 # when doing /foo$overloaded/, if $overloaded returns 1460 # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval' 1461 # shouldn't be required. Via '.', it still is. 1462 { 1463 package Qr0; 1464 use overload 'qr' => sub { qr/(??{50})/ }; 1465 1466 package Qr1; 1467 use overload '""' => sub { qr/(??{51})/ }; 1468 1469 package Qr2; 1470 use overload '.' => sub { $_[1] . qr/(??{52})/ }; 1471 1472 package Qr3; 1473 use overload '""' => sub { qr/(??{7})/ }, 1474 '.' => sub { $_[1] . qr/(??{53})/ }; 1475 1476 package Qr_indirect; 1477 use overload '""' => sub { $_[0][0] }; 1478 1479 package main; 1480 1481 for my $i (0..3) { 1482 my $o = bless [], "Qr$i"; 1483 if ((0,0,1,1)[$i]) { 1484 eval { "A5$i" =~ /^A$o$/ }; 1485 like($@, qr/Eval-group not allowed/, "Qr$i"); 1486 eval { "5$i" =~ /$o/ }; 1487 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1488 "Qr$i bare"); 1489 { 1490 use re 'eval'; 1491 ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval"); 1492 eval { "5$i" =~ /$o/ }; 1493 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1494 "Qr$i bare - with use re eval"); 1495 } 1496 } 1497 else { 1498 ok("A5$i" =~ /^A$o$/, "Qr$i"); 1499 ok("5$i" =~ /$o/, "Qr$i bare"); 1500 } 1501 } 1502 1503 my $o = bless [ bless [], "Qr1" ], 'Qr_indirect'; 1504 ok("A51" =~ /^A$o/, "Qr_indirect"); 1505 ok("51" =~ /$o/, "Qr_indirect bare"); 1506 } 1507 1508 { # Various flags weren't being set when a [] is optimized into an 1509 # EXACTish node 1510 ok("\x{017F}\x{017F}" =~ qr/^[$sharp_s]?$/i, "[] to EXACTish optimization"); 1511 } 1512 1513 { # Test that it avoids spllitting a multi-char fold across nodes. 1514 # These all fold to things that are like 'ss', which, if split across 1515 # nodes could fail to match a single character that folds to the 1516 # combination. 1F0 byte expands when folded; 1517 my $utf8_locale = find_utf8_ctype_locale(); 1518 for my $char('F', $sharp_s, "\x{1F0}", "\x{FB00}") { 1519 my $length = 260; # Long enough to overflow an EXACTFish regnode 1520 my $p = $char x $length; 1521 my $s = ($char eq $sharp_s) ? 'ss' 1522 : $char eq "\x{1F0}" 1523 ? "j\x{30c}" 1524 : 'ff'; 1525 $s = $s x $length; 1526 for my $charset (qw(u d l aa)) { 1527 for my $utf8 (0..1) { 1528 for my $locale ('C', $utf8_locale) { 1529 SKIP: 1530 { 1531 skip "test skipped for non-C locales", 2 1532 if $charset ne 'l' 1533 && (! defined $locale || $locale ne 'C'); 1534 if ($charset eq 'l') { 1535 skip 'Locales not available', 2 1536 unless $has_locales; 1537 if (! defined $locale) { 1538 skip "No UTF-8 locale", 2; 1539 } 1540 skip "Can't test in miniperl",2 1541 if is_miniperl(); 1542 1543 require POSIX; 1544 POSIX::setlocale(&LC_CTYPE, $locale); 1545 } 1546 1547 my $pat = $p; 1548 utf8::upgrade($pat) if $utf8; 1549 my $should_pass = 1550 ( $charset eq 'u' 1551 || ($charset eq 'd' && $utf8) 1552 || ($charset eq 'd' && ( $char =~ /[[:ascii:]]/ 1553 || ord $char > 255)) 1554 || ($charset eq 'aa' && $char =~ /[[:ascii:]]/) 1555 || ($charset eq 'l' && $locale ne 'C') 1556 || ($charset eq 'l' && $char =~ /[[:ascii:]]/) 1557 ); 1558 my $name = "(?i$charset), utf8=$utf8, locale=$locale," 1559 . " char=" . sprintf "%x", ord $char; 1560 no warnings 'locale'; 1561 is (eval " '$s' =~ qr/(?i$charset)$pat/;", 1562 $should_pass, $name); 1563 fail "$name: $@" if $@; 1564 is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;", 1565 $should_pass, "extra a, $name"); 1566 fail "$name: $@" if $@; 1567 } 1568 } 1569 } 1570 } 1571 } 1572 } 1573 1574 SKIP: 1575 { 1576 skip "no re debug", 5 if is_miniperl; 1577 my $s = ("0123456789" x 26214) x 2; # Should fill 2 LEXACTS, plus 1578 # small change 1579 my $pattern_prefix = "use utf8; use re qw(Debug COMPILE)"; 1580 my $pattern = "$pattern_prefix; qr/$s/;"; 1581 my $result = fresh_perl($pattern); 1582 if ($? != 0) { # Re-run so as to display STDERR. 1583 fail($pattern); 1584 fresh_perl($pattern, { stderr => 0, verbose => 1 }); 1585 } 1586 like($result, qr/Final program[^X]*\bLEXACT\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s, 1587 "Check that LEXACT nodes are generated"); 1588 like($s, qr/$s/, "Check that LEXACT nodes match"); 1589 like("a$s", qr/a$s/, "Previous test preceded by an 'a'"); 1590 substr($s, 260000, 1) = "\x{100}"; 1591 $pattern = "$pattern_prefix; qr/$s/;"; 1592 $result = fresh_perl($pattern, { 'wide_chars' => 1 } ); 1593 if ($? != 0) { # Re-run so as to display STDERR. 1594 fail($pattern); 1595 fresh_perl($pattern, { stderr => 0, verbose => 1 }); 1596 } 1597 like($result, qr/Final program[^X]*\bLEXACT_REQ8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s, 1598 "Check that an LEXACT_ONLY node is generated with a \\x{100}"); 1599 like($s, qr/$s/, "Check that LEXACT_REQ8 nodes match"); 1600 } 1601 1602 { 1603 for my $char (":", uni_to_native("\x{f7}"), "\x{2010}") { 1604 my $utf8_char = $char; 1605 utf8::upgrade($utf8_char); 1606 my $display = $char; 1607 $display = display($display); 1608 my $utf8_display = "utf8::upgrade(\"$display\")"; 1609 1610 like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/"); 1611 like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/"); 1612 like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/"); 1613 like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/"); 1614 } 1615 } 1616 1617 { 1618 # #116148: Pattern utf8ness sticks around globally 1619 # the utf8 in the first match was sticking around for the second 1620 # match 1621 1622 use feature 'unicode_strings'; 1623 1624 my $x = "\x{263a}"; 1625 $x =~ /$x/; 1626 1627 my $text = "Perl"; 1628 ok("Perl" =~ /P.*$/i, '#116148'); 1629 } 1630 1631 { # 118297: Mixing up- and down-graded strings in regex 1632 utf8::upgrade(my $u = "\x{e5}"); 1633 utf8::downgrade(my $d = "\x{e5}"); 1634 my $warned; 1635 local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ }; 1636 my $re = qr/$u$d/; 1637 ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern"); 1638 my $c = "\x{e5}\x{e5}"; 1639 utf8::downgrade($c); 1640 like($c, $re, "mixed up-/downgraded pattern matches downgraded string"); 1641 utf8::upgrade($c); 1642 like($c, $re, "mixed up-/downgraded pattern matches upgraded string"); 1643 } 1644 1645 { 1646 # if we have 87 capture buffers defined then \87 should refer to the 87th. 1647 # test that this is true for 1..100 1648 # Note that this test causes the engine to recurse at runtime, and 1649 # hence use a lot of C stack. 1650 1651 # Compiling for all 100 nested captures blows the stack under 1652 # clang and ASan; reduce. 1653 my $max_captures = $Config{ccflags} =~ /sanitize/ ? 20 : 100; 1654 1655 for my $i (1..100) { 1656 if ($i > $max_captures) { 1657 pass("skipping $i buffers under ASan aa"); 1658 pass("skipping $i buffers under ASan aba"); 1659 next; 1660 } 1661 my $capture= "a"; 1662 $capture= "($capture)" for 1 .. $i; 1663 for my $mid ("","b") { 1664 my $str= "a${mid}a"; 1665 my $backref= "\\$i"; 1666 eval { 1667 ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/"); 1668 1; 1669 } or do { 1670 is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/"); 1671 }; 1672 } 1673 } 1674 } 1675 1676 # this mixture of readonly (not COWable) and COWable strings 1677 # messed up the capture buffers under COW. The actual test results 1678 # are incidental; the issue is was an AddressSanitizer failure 1679 { 1680 my $c ='AB'; 1681 my $res = ''; 1682 for ($c, 'C', $c, 'DE') { 1683 ok(/(.)/, "COWable match"); 1684 $res .= $1; 1685 } 1686 is($res, "ACAD"); 1687 } 1688 1689 1690 { 1691 # RT #45667 1692 # /[#$x]/x didn't interpolate the var $x. 1693 my $b = 'cd'; 1694 my $s = 'abcd$%#&'; 1695 $s =~ s/[a#$b%]/X/g; 1696 is ($s, 'XbXX$XX&', 'RT #45667 without /x'); 1697 $s = 'abcd$%#&'; 1698 $s =~ s/[a#$b%]/X/gx; 1699 is ($s, 'XbXX$XX&', 'RT #45667 with /x'); 1700 } 1701 1702 { 1703 no warnings "uninitialized"; 1704 my @a; 1705 $a[1]++; 1706 /@a/; 1707 pass('no crash with /@a/ when array has nonexistent elems'); 1708 } 1709 1710 { 1711 is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'), 1712 "ok\n", 1713 'deleting *^R does not result in crashes'; 1714 no warnings 'once'; 1715 *^R = *caretRglobwithnoscalar; 1716 "" =~ /(?{42})/; 1717 is $^R, 42, 'assigning to *^R does not result in a crash'; 1718 is runperl( 1719 stderr => 1, 1720 prog => 'eval q|' 1721 .' q-..- =~ /(??{undef *^R;q--})(?{42})/; ' 1722 .' print qq-$^R\n-' 1723 .'|' 1724 ), 1725 "42\n", 1726 'undefining *^R within (??{}) does not result in a crash'; 1727 } 1728 1729 SKIP: { # Test literal range end point special handling 1730 unless ($::IS_EBCDIC) { 1731 skip "Valid only for EBCDIC", 24; 1732 } 1733 1734 like("\x89", qr/[i-j]/, '"\x89" should match [i-j]'); 1735 unlike("\x8A", qr/[i-j]/, '"\x8A" shouldnt match [i-j]'); 1736 unlike("\x90", qr/[i-j]/, '"\x90" shouldnt match [i-j]'); 1737 like("\x91", qr/[i-j]/, '"\x91" should match [i-j]'); 1738 1739 like("\x89", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x89" should match [i-\N{LATIN SMALL LETTER J}]'); 1740 unlike("\x8A", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1741 unlike("\x90", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1742 like("\x91", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x91" should match [i-\N{LATIN SMALL LETTER J}]'); 1743 1744 like("\x89", qr/[i-\N{U+6A}]/, '"\x89" should match [i-\N{U+6A}]'); 1745 unlike("\x8A", qr/[i-\N{U+6A}]/, '"\x8A" shouldnt match [i-\N{U+6A}]'); 1746 unlike("\x90", qr/[i-\N{U+6A}]/, '"\x90" shouldnt match [i-\N{U+6A}]'); 1747 like("\x91", qr/[i-\N{U+6A}]/, '"\x91" should match [i-\N{U+6A}]'); 1748 1749 like("\x89", qr/[\N{U+69}-\N{U+6A}]/, '"\x89" should match [\N{U+69}-\N{U+6A}]'); 1750 unlike("\x8A", qr/[\N{U+69}-\N{U+6A}]/, '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]'); 1751 unlike("\x90", qr/[\N{U+69}-\N{U+6A}]/, '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]'); 1752 like("\x91", qr/[\N{U+69}-\N{U+6A}]/, '"\x91" should match [\N{U+69}-\N{U+6A}]'); 1753 1754 like("\x89", qr/[i-\x{91}]/, '"\x89" should match [i-\x{91}]'); 1755 like("\x8A", qr/[i-\x{91}]/, '"\x8A" should match [i-\x{91}]'); 1756 like("\x90", qr/[i-\x{91}]/, '"\x90" should match [i-\x{91}]'); 1757 like("\x91", qr/[i-\x{91}]/, '"\x91" should match [i-\x{91}]'); 1758 1759 # Need to use eval, because tries to compile on ASCII platforms even 1760 # though the tests are skipped, and fails because 0x89-j is an illegal 1761 # range there. 1762 like("\x89", eval 'qr/[\x{89}-j]/', '"\x89" should match [\x{89}-j]'); 1763 like("\x8A", eval 'qr/[\x{89}-j]/', '"\x8A" should match [\x{89}-j]'); 1764 like("\x90", eval 'qr/[\x{89}-j]/', '"\x90" should match [\x{89}-j]'); 1765 like("\x91", eval 'qr/[\x{89}-j]/', '"\x91" should match [\x{89}-j]'); 1766 } 1767 1768 # These are based on looking at the code in regcomp.c 1769 # We don't look for specific code, just the existence of an SSC 1770 foreach my $re (qw( qr/a?c/ 1771 qr/a?c/i 1772 qr/[ab]?c/ 1773 qr/\R?c/ 1774 qr/\d?c/d 1775 qr/\w?c/l 1776 qr/\s?c/a 1777 qr/[[:lower:]]?c/u 1778 )) { 1779 SKIP: { 1780 skip "no re-debug under miniperl" if is_miniperl; 1781 my $prog = <<"EOP"; 1782use re qw(Debug COMPILE); 1783$re; 1784EOP 1785 fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class"); 1786 } 1787 } 1788 1789 { 1790 like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; 1791 } 1792 1793 SKIP: { 1794 skip("Tests are ASCII-centric, some would fail on EBCDIC", 12) if $::IS_EBCDIC; 1795 1796 # Verify that the very last Latin-1 U+00FF 1797 # (LATIN SMALL LETTER Y WITH DIAERESIS) 1798 # and its UPPER counterpart (U+0178 which is pure Unicode), 1799 # and likewise for the very first pure Unicode 1800 # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly, 1801 # and there are no off-by-one logic errors in the transition zone. 1802 1803 ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l"); 1804 ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u"); 1805 ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l"); 1806 ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u"); 1807 1808 # U+00FF with U+05D0 (non-casing Hebrew letter). 1809 ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l"); 1810 ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u"); 1811 ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l"); 1812 ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u"); 1813 1814 # U+0100. 1815 ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u"); 1816 ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l"); 1817 ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u"); 1818 ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l"); 1819 } 1820 1821 { 1822 use utf8; 1823 ok("abc" =~ /a b c/x, "NEL is white-space under /x"); 1824 } 1825 1826 { 1827 ok('a(b)c' =~ qr(a\(b\)c), "'\\(' is a literal in qr(...)"); 1828 ok('a[b]c' =~ qr[a\[b\]c], "'\\[' is a literal in qr[...]"); 1829 ok('a{3}c' =~ qr{a\{3\}c}, # Only failed when { could be a meta 1830 "'\\{' is a literal in qr{...}, where it could be a quantifier"); 1831 1832 # This one is for completeness 1833 ok('a<b>c' =~ qr<a\<b\>c>, "'\\<' is a literal in qr<...>)"); 1834 } 1835 1836 { # Was getting optimized into EXACT (non-folding node) 1837 my $x = qr/[x]/i; 1838 utf8::upgrade($x); 1839 like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case"); 1840 } 1841 1842 { # Special handling of literal-ended ranges in [...] was breaking this 1843 use utf8; 1844 like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]"); 1845 } 1846 1847 { # [perl #123539] 1848 like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1849 like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1850 } 1851 1852 { # [perl #123604] 1853 my($s, $x, @x) = ('abc', 'a', 'd'); 1854 my $long = 'b' x 2000; 1855 my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0}; 1856 $eval =~ s{bbb}{$long}; 1857 my $match = eval $eval; 1858 ok(1, "did not crash"); 1859 ok($match, "[bbb...] resolved as character class, not subscript"); 1860 } 1861 1862 { # [perl #123755] 1863 for my $pat ('(??', '(?P', '(?i-') { 1864 eval qq{ qr/$pat/ }; 1865 ok(1, "qr/$pat/ did not crash"); 1866 eval qq{ qr/${pat}\x{123}/ }; 1867 my $e = $@; 1868 like($e, qr{\x{123}}, 1869 "qr/${pat}x/ shows x in error even if it's a wide character"); 1870 } 1871 } 1872 1873 { 1874 # Expect one of these sizes to cause overflow and wrap to negative 1875 for my $bits (32, 64) { 1876 my $wrapneg = 2 ** ($bits - 2) * 3; 1877 for my $sign ('', '-') { 1878 my $pat = sprintf "qr/(?%s%u)/", $sign, $wrapneg; 1879 eval $pat; 1880 ok(1, "big backref $pat did not crash"); 1881 } 1882 } 1883 } 1884 { 1885 # Test that we handle qr/\8888888/ and variants without an infinite loop, 1886 # we use a test within a test so we can todo it, and make sure we don't 1887 # infinite loop our tests. 1888 # NOTE - Do not put quotes in the code! 1889 # NOTE - We have to triple escape the backref in the pattern below. 1890 my $code=' 1891 BEGIN{require q(./test.pl);} 1892 watchdog(3); 1893 for my $len (1 .. 20) { 1894 my $eights= q(8) x $len; 1895 eval qq{ qr/\\\\$eights/ }; 1896 } 1897 print q(No infinite loop here!); 1898 '; 1899 fresh_perl_is($code, "No infinite loop here!", {}, 1900 "test that we handle things like m/\\888888888/ without infinite loops" ); 1901 } 1902 1903 SKIP: 1904 { # Test that we handle some malformed UTF-8 without looping [perl 1905 # #123562] 1906 skip "no Encode", 1 if is_miniperl; 1907 my $code=' 1908 BEGIN{require q(./test.pl);} 1909 use Encode qw(_utf8_on); 1910 # \x80 and \x41 are continuation bytes in their respective 1911 # character sets 1912 my $malformed = (ord("A") == 65) ? "a\x80\n" : "a\x41\n"; 1913 utf8::downgrade($malformed); 1914 _utf8_on($malformed); 1915 watchdog(3); 1916 $malformed =~ /(\n\r|\r)$/; 1917 print q(No infinite loop here!); 1918 '; 1919 fresh_perl_like($code, qr/Malformed UTF-8 character/, {}, 1920 "test that we handle some UTF-8 malformations without looping" ); 1921 } 1922 1923 { 1924 # [perl #123843] hits SEGV trying to compile this pattern 1925 my $match; 1926 eval q{ ($match) = ("xxyxxyxy" =~ m{(x+(y(?1))*)}) }; 1927 ok(1, "compiled GOSUB in CURLYM ok"); 1928 is($match, 'xxyxxyx', "matched GOSUB in CURLYM"); 1929 } 1930 1931 { 1932 # [perl #123852] doesn't avoid all the capture-related work with 1933 # //n, leading to possible memory corruption 1934 eval q{ qr{()(?1)}n }; 1935 my $error = $@; 1936 ok(1, "qr{()(?1)}n didn't crash"); 1937 like($error, qr{Reference to nonexistent group}, 1938 'gave appropriate error for qr{()(?1)}n'); 1939 } 1940 1941 { 1942 # [perl #126406] panic with unmatchable quantifier 1943 my $code=' 1944 no warnings "regexp"; 1945 "" =~ m/(.0\N{6,0}0\N{6,0}000000000000000000000000000000000)/; 1946 '; 1947 fresh_perl_is($code, "", {}, 1948 "perl [#126406] panic"); 1949 } 1950 { 1951 my $bug="[perl #126182]"; # test for infinite pattern recursion 1952 for my $tuple ( 1953 [ 'q(a)=~/(.(?2))((?<=(?=(?1)).))/', "died", "look ahead left recursion fails fast" ], 1954 [ 'q(aa)=~/(?R)a/', "died", "left-recursion fails fast", ], 1955 [ 'q(bbaa)=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/', 1956 "died", "inter-cyclic optional left recursion dies" ], 1957 [ 'q(abc) =~ /a((?1)?)c/', "died", "optional left recursion dies" ], 1958 [ 'q(abc) =~ /a((?1)??)c/', "died", "min mod left recursion dies" ], 1959 [ 'q(abc) =~ /a((?1)*)c/', "died", "* left recursion dies" ], 1960 [ 'q(abc) =~ /a((?1)+)c/', "died", "+ left recursion dies" ], 1961 [ 'q(abc) =~ /a((?1){0,3})c/', "died", "{0,3} left recursion fails fast" ], 1962 1963 [ 'q(aaabbb)=~/a(?R)?b/', "matched", "optional self recursion works" ], 1964 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?0))*+\\\\))/', "matched", 1965 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1966 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?1))*+\\\\))/', "matched", 1967 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1968 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?0))*\\\\))/', "matched", 1969 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1970 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?1))*\\\\))/', "matched", 1971 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1972 ) { 1973 my ($expr, $expect, $test_name, $cap1)= @$tuple; 1974 # avoid quotes in this code! 1975 my $code=' 1976 BEGIN{require q(./test.pl);} 1977 watchdog(3); 1978 my $status= eval(q{ !(' . $expr . ') ? q(failed) : ' . 1979 ($cap1 ? '($1 ne q['.$cap1.']) ? qq(badmatch:$1) : ' : '') . 1980 ' q(matched) }) 1981 || ( ( $@ =~ /Infinite recursion/ ) ? qq(died) : q(strange-death) ); 1982 print $status; 1983 '; 1984 fresh_perl_is($code, $expect, {}, "$bug - $test_name" ); 1985 } 1986 } 1987 { 1988 my $is_cygwin = $^O eq "cygwin"; 1989 local $::TODO = "this flaps on github cygwin vm, but not on cygwin iron #18129" 1990 if $is_cygwin; 1991 my $expected = "Timeout"; 1992 my $code = ' 1993 BEGIN{require q(test.pl);} 1994 watchdog(3); 1995 $SIG{ALRM} = sub {print "'.$expected.'\n"; exit(1)}; 1996 alarm 1; 1997 $_ = "a" x 1000 . "b" x 1000 . "c" x 1000; 1998 /.*a.*b.*c.*[de]/; 1999 print "increase the multipliers in the regex above to run the regex longer"; 2000 '; 2001 # this flaps on github cygwin vm, but not on cygwin iron #18129 2002 # so on cygwin it's run for 50 seconds to see if it fails eventually 2003 my $max = $is_cygwin ? 50 : 1; 2004 my ($iter, $result, $status); 2005 for my $i (1..$max) { 2006 $iter = $i; 2007 $result = fresh_perl($code,{}); 2008 $status = $?; 2009 last if $result ne $expected; 2010 } 2011 is($result, $expected, "Test Perl 73464") 2012 or diag "PROG:", $code, "STATUS:", $status, "failed on iteration: $iter"; 2013 } 2014 2015 { # [perl #128686], crashed the the interpreter 2016 my $AE = chr utf8::unicode_to_native(0xC6); 2017 my $ae = chr utf8::unicode_to_native(0xE6); 2018 my $re = qr/[$ae\s]/i; 2019 ok($AE !~ $re, '/[\xE6\s]/i doesn\'t match \xC6 when not in UTF-8'); 2020 utf8::upgrade $AE; 2021 ok($AE =~ $re, '/[\xE6\s]/i matches \xC6 when in UTF-8'); 2022 } 2023 2024 { 2025 is(0+("\n" =~ m'\n'), 1, q|m'\n' should interpolate escapes|); 2026 } 2027 2028 { 2029 my $str = "a\xB6"; 2030 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - latin1 case" ); 2031 utf8::upgrade($str); 2032 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - utf8 case" ); 2033 } 2034 { 2035 my $got= run_perl( switches => [ '-l' ], prog => <<'EOF_CODE' ); 2036 my $died= !eval { 2037 $_=qq(ab); 2038 print; 2039 my $p=qr/(?{ s!!x! })/; 2040 /$p/; 2041 print; 2042 /a/; 2043 /$p/; 2044 print; 2045 /b/; 2046 /$p/; 2047 print; 2048 //; 2049 1; 2050 }; 2051 $error = $died ? ($@ || qq(Zombie)) : qq(none); 2052 print $died ? qq(died) : qq(lived); 2053 print qq(Error: $@); 2054EOF_CODE 2055 my @got= split /\n/, $got; 2056 is($got[0],"ab","empty pattern in regex codeblock: got expected start string"); 2057 is($got[1],"xab", 2058 "empty pattern in regex codeblock: first subst with no last-match worked right"); 2059 is($got[2],"xxb","empty pattern in regex codeblock: second subst worked right"); 2060 is($got[3],"xxx","empty pattern in regex codeblock: third subst worked right"); 2061 is($got[4],"died","empty pattern in regex codeblock: died as expected"); 2062 like($got[5],qr/Error: Infinite recursion via empty pattern/, 2063 "empty pattern in regex codeblock: produced the right exception message" ); 2064 } 2065 2066 # This test is based on the one directly above, which happened to 2067 # leak. Repeat the test, but stripped down to the bare essentials 2068 # of the leak, which is to die while executing a regex which is 2069 # already the current regex, thus causing the saved outer set of 2070 # capture offsets to leak. The test itself doesn't do anything 2071 # except sit around hoping not to be triggered by ASan 2072 { 2073 eval { 2074 my $s = "abcd"; 2075 $s =~ m{([abcd]) (?{ die if $1 eq 'd'; })}gx; 2076 $s =~ //g; 2077 $s =~ //g; 2078 $s =~ //g; 2079 }; 2080 pass("call to current regex doesn't leak"); 2081 } 2082 2083 2084 2085 { 2086 # [perl #130495] /x comment skipping stopped a byte short, leading 2087 # to assertion failure or 'malformed utf-8 character" warning 2088 fresh_perl_is( 2089 "use utf8; m{a#\x{124}}x", '', {wide_chars => 1}, 2090 '[perl #130495] utf-8 character at end of /x comment should not misparse', 2091 ); 2092 } 2093 { 2094 # [perl #130522] causes out-of-bounds read detected by clang with 2095 # address=sanitized when length of the STCLASS string is greater than 2096 # length of target string. 2097 my $re = qr{(?=\0z)\0?z?$}i; 2098 my($yes, $no) = (1, ""); 2099 for my $test ( 2100 [ $no, undef, '<undef>' ], 2101 [ $no, '', '' ], 2102 [ $no, "\0", '\0' ], 2103 [ $yes, "\0z", '\0z' ], 2104 [ $no, "\0z\0", '\0z\0' ], 2105 [ $yes, "\0z\n", '\0z\n' ], 2106 ) { 2107 my($result, $target, $disp) = @$test; 2108 no warnings qw/uninitialized/; 2109 is($target =~ $re, $result, "[perl #130522] with target '$disp'"); 2110 } 2111 } 2112 { 2113 # [perl #129377] backref to an unmatched capture should not cause 2114 # reading before start of string. 2115 SKIP: { 2116 skip "no re-debug under miniperl" if is_miniperl; 2117 my $prog = <<'EOP'; 2118use re qw(Debug EXECUTE); 2119"x" =~ m{ () y | () \1 }x; 2120EOP 2121 fresh_perl_like($prog, qr{ 2122 \A (?! .* ^ \s+ - ) 2123 }msx, { stderr => 1 }, "Offsets in debug output are not negative"); 2124 } 2125 } 2126 { 2127 # buffer overflow 2128 2129 # This test also used to leak - fixed by the commit which added 2130 # this line. 2131 2132 fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx", 2133 "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n", 2134 {}, "buffer overflow for regexp component"); 2135 } 2136 { 2137 # [perl #129281] buffer write overflow, detected by ASAN, valgrind 2138 fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much"); 2139 } 2140 { 2141 # RT #131893 - fails with ASAN -fsanitize=undefined 2142 fresh_perl_is('qr/0(0?(0||00*))|/', '', {}, "integer overflow during compilation"); 2143 } 2144 2145 { 2146 # RT #131575 intuit skipping back from the end to find the highest 2147 # possible start point, was potentially hopping back beyond pos() 2148 # and crashing by calling fbm_instr with a negative length 2149 2150 my $text = "=t=\x{5000}"; 2151 pos($text) = 3; 2152 ok(scalar($text !~ m{(~*=[a-z]=)}g), "RT #131575"); 2153 } 2154 { 2155 fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly"); 2156 } 2157 { 2158 fresh_perl_is('$_="0\x{1000000}";/^000?\0000/','',{},"dont throw assert errors trying to fbm past end of string"); 2159 } 2160 { # [perl $132227] 2161 fresh_perl_is("('0ba' . ('ss' x 300)) =~ m/0B\\N{U+41}" . $sharp_s x 150 . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 2162 2163 # A variation, but as far as khw knows not part of 132227 2164 fresh_perl_is("'0bssa' =~ m/0B" . $sharp_s . "\\N{U+41}" . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 2165 } 2166 { # [perl $132164] 2167 fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer"); 2168 } 2169 { # [perl #133642] 2170 fresh_perl_is('no warnings "experimental::vlb"; 2171 m/((?<=(0?)))/', "",{},"Was getting 'Double free'"); 2172 } 2173 { # [perl #133782] 2174 # this would panic on DEBUGGING builds 2175 fresh_perl_is(<<'CODE', "ok\nok\n",{}, 'Bad length magic was left on $^R'); 2176while( "\N{U+100}bc" =~ /(..?)(?{$^N})/g ) { 2177 print "ok\n" if length($^R)==length("$^R"); 2178} 2179CODE 2180 } 2181 { # [perl #133871], ASAN/valgrind out-of-bounds access 2182 fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]"); 2183 } 2184 { # [perl #133871], ASAN/valgrind out-of-bounds access 2185 fresh_perl_like('qr/\p{nv:NAnq}/', qr/Can't find Unicode property definition/, {}, "GH #17367"); 2186 } 2187 { # GH #17370, ASAN/valgrind out-of-bounds access 2188 fresh_perl_like('qr/\p{nv:qnan}/', qr/Can't find Unicode property definition/, {}, "GH #17370"); 2189 } 2190 { # GH #17371, segfault 2191 fresh_perl_like('qr/\p{nv=\\\\\}(?0)|\337ss|\337ss//', qr/Unicode property wildcard not terminated/, {}, "GH #17371"); 2192 } 2193 { # GH #17384, ASAN/valgrind out-of-bounds access 2194 fresh_perl_like('"q0" =~ /\p{__::Is0}/', qr/Unknown user-defined property name \\p\{__::Is0}/, {}, "GH #17384"); 2195 } 2196 2197 SKIP: 2198 { # [perl #133921], segfault 2199 skip "Not valid for EBCDIC", 5 if $::IS_EBCDIC; 2200 2201 fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\xÿÿÿþù\Q`\Qx`{0c!}e;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ Fn0t0c d;t ù ç!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); 2202 fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \xÿÿÿÿù\Q`\Qx`{0d ;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 0000000Fm0t0c d;t ù ç!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); 2203 2204fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿù\Q`\Q 2205 2206 2207 2208 2209 ç 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225x{0c!}\;\;çÿ q0/i0/!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ u00000F000t0p d? ù ç!00000000000000000000000m/0000000000000000000000000000000m/0\} )|i', "", {}, "[perl #133921]"); 2226 2227 fresh_perl_is('a aúúv sWtrt\ó||ß+Wüefù\Qx`\Qx`\x{1c!gGnuc given1111111111111O1111each111\jx` \xòÿÿÿù\Qx`\Q 2228 2229 2230 2231 2232 2233 ç 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249x{1c!}\;\;îçÿp qr/elsif/!eF/;îçÿù\QxÿÿÿÿùHQx`Lx{1c!}e;ù\Qx`\Qx`\x{1c!}\;ÿÿÿÿc!}\;îçÿù\Qx\xÿÿÿÿ>=\Qx`\Qx`ù\òx`ÿ>=\Qx`\Qx`ù\òx`\Qx`\x{1c!}8;\;îçÿp unshifteFnormat0cmp d?not ùp ç!0000000000000000000000000m/00000000000000000000000000000000m/0R\} )|\aï||K??p¿ÿÿfúd{\{gri{\x{1x/} ð¹NuntiÀh', "", {}, "[perl #133921]"); 2250 2251 fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0g0c 000n0000000000000O0h000x0 \xòÿÿÿù\Q`\Q 2252 2253 2254 2255 2256 2257 ç 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273x{0c!}\;\;îçÿ /0f/!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 000t0F000t0p d?n ù ç!00000000000000000000000m/0000000000000000000000000000000m/\} )|i', "", {}, "[perl #133933]"); 2274 } 2275 2276 { # perl #133998] 2277 fresh_perl_is('print "\x{110000}" =~ qr/(?l)|[^\S\pC\s]/', 1, {}, 2278 '/[\S\s]/l works'); 2279 } 2280 2281 { # perl #133995] 2282 use utf8; 2283 fresh_perl_is('"έδωσαν ελληνικήვე" =~ m/[^0](?=0)0?/', "", 2284 {wide_chars => 1}, 2285 '[^0] doesnt crash on UTF-8 target string'); 2286 } 2287 2288 { # [perl #133992] This is a tokenizer bug of parsing a pattern 2289 fresh_perl_is(q:$z = do { 2290 use utf8; 2291 "q!ÑеÑÑ! =~ m'" 2292 }; 2293 $z .= 'è(?#'; 2294 $z .= "'"; 2295 eval $z;:, "", {}, 'foo'); 2296 } 2297 2298 { # [perl #134325] 2299 my $quote="\\Q"; 2300 my $back="\\\\"; 2301 my $ff="\xff"; 2302 my $s = sprintf "/\\1|(|%s)%s%s /i", 2303 $quote x 8 . $back x 69, 2304 $quote x 5 . $back x 4, 2305 $ff x 48; 2306 like(fresh_perl("$s", { stderr => 1, }), qr/Unmatched \(/); 2307 } 2308 2309 { # GitHub #17196, caused assertion failure 2310 fresh_perl_like('("0" x 258) =~ /(?l)0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/', 2311 qr/^$/, 2312 {}, 2313 "Assertion failure with /l exact string longer than a single node"); 2314 } 2315 2316SKIP: 2317 { # [perl #134334], Assertion failure 2318 my $utf8_locale = find_utf8_ctype_locale(); 2319 skip "no UTF-8 locale available" unless $utf8_locale; 2320 fresh_perl_like("use POSIX; POSIX::setlocale(&LC_CTYPE, '$utf8_locale'); 'ssss' =~ /\xDF+?sX/il;", 2321 qr/^$/, 2322 {}, 2323 "Assertion failure matching /il on single char folding to multi"); 2324 } 2325 2326 { # Test ANYOFHs 2327 my $pat = qr/[\x{4000001}\x{4000003}\x{4000005}]+/; 2328 unlike("\x{4000000}", $pat, "4000000 isn't in pattern"); 2329 like("\x{4000001}", $pat, "4000001 is in pattern"); 2330 unlike("\x{4000002}", $pat, "4000002 isn't in pattern"); 2331 like("\x{4000003}", $pat, "4000003 is in pattern"); 2332 unlike("\x{4000004}", $pat, "4000004 isn't in pattern"); 2333 like("\x{4000005}", $pat, "4000005 is in pattern"); 2334 unlike("\x{4000006}", $pat, "4000006 isn't in pattern"); 2335 2336 # gh #17319 2337 $pat = qr/[\N{U+200D}\N{U+2000}]()/; 2338 unlike("\x{1FFF}", $pat, "1FFF isn't in pattern"); 2339 like("\x{2000}", $pat, "2000 is in pattern"); 2340 unlike("\x{2001}", $pat, "2001 isn't in pattern"); 2341 unlike("\x{200C}", $pat, "200C isn't in pattern"); 2342 like("\x{200D}", $pat, "200 is in pattern"); 2343 unlike("\x{200E}", $pat, "200E isn't in pattern"); 2344 } 2345 2346 # gh17490: test recursion check 2347 { 2348 my $eval = '(?{1})'; 2349 my $re = sprintf '(?&FOO)(?(DEFINE)(?<FOO>%sfoo))', $eval x 20; 2350 my $result = eval qq{"foo" =~ /$re/}; 2351 is($@ // '', '', "many evals did not die"); 2352 ok($result, "regexp correctly matched"); 2353 } 2354 2355 # gh16947: test regexp corruption (GOSUB) 2356 { 2357 fresh_perl_is(q{ 2358 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok' 2359 }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)'); 2360 } 2361 # gh16947: test fix doesn't break SUSPEND 2362 { 2363 fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' }, 2364 'ok', {}, "gh16947: test fix doesn't break SUSPEND"); 2365 } 2366 2367 # gh17730: should not crash 2368 { 2369 fresh_perl_is(q{ 2370 "q00" =~ m{(((*ACCEPT)0)*00)?0(?1)}; print "ok" 2371 }, 'ok', {}, 'gh17730: should not crash'); 2372 } 2373 2374 # gh17743: more regexp corruption via GOSUB 2375 { 2376 fresh_perl_is(q{ 2377 "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok" 2378 }, 'ok', {}, 'gh17743: test regexp corruption (1)'); 2379 2380 fresh_perl_is(q{ 2381 "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/; 2382 print "ok" 2383 }, 'ok', {}, 'gh17743: test regexp corruption (2)'); 2384 } 2385 2386 { 2387 # Test branch reset (?|...|...) in list context. This was reported 2388 # in GH Issue #20710, in relation to breaking App::pl. See 2389 # https://github.com/Perl/perl5/issues/20710#issuecomment-1404549785 2390 my $ok = 0; 2391 my ($w,$x,$y,$z); 2392 $ok = ($x,$y) = "ab"=~/(?|(p)(q)|(x)(y)|(a)(b))/; 2393 ok($ok,"Branch reset pattern 1 matched as expected"); 2394 is($x,"a","Branch reset in list context check 1 (a)"); 2395 is($y,"b","Branch reset in list context check 2 (b)"); 2396 2397 $ok = ($x,$y,$z) = "xyz"=~/(?|(p)(q)|(x)(y)|(a)(b))(z)/; 2398 ok($ok,"Branch reset pattern 2 matched as expected"); 2399 is($x,"x","Branch reset in list context check 3 (x)"); 2400 is($y,"y","Branch reset in list context check 4 (y)"); 2401 is($z,"z","Branch reset in list context check 5 (z)"); 2402 2403 $ok = ($w,$x,$y) = "wpq"=~/(w)(?|(p)(q)|(x)(y)|(a)(b))/; 2404 ok($ok,"Branch reset pattern 3 matched as expected"); 2405 is($w,"w","Branch reset in list context check 6 (w)"); 2406 is($x,"p","Branch reset in list context check 7 (p)"); 2407 is($y,"q","Branch reset in list context check 8 (q)"); 2408 2409 $ok = ($w,$x,$y,$z) = "wabz"=~/(w)(?|(p)(q)|(x)(y)|(a)(b))(z)/; 2410 ok($ok,"Branch reset pattern 4 matched as expected"); 2411 is($w,"w","Branch reset in list context check 9 (w)"); 2412 is($x,"a","Branch reset in list context check 10 (a)"); 2413 is($y,"b","Branch reset in list context check 11 (b)"); 2414 is($z,"z","Branch reset in list context check 12 (z)"); 2415 } 2416 { 2417 # Test for GH Issue #20826. Save stack overflow introduced in 2418 # 92373dea9d7bcc0a017f20cb37192c1d8400767f PR #20530. 2419 # Note this test depends on an assert so it will only fail 2420 # under DEBUGGING. 2421 fresh_perl_is(q{ 2422 $_ = "x" x 1000; 2423 my $pat = '(.)' x 200; 2424 $pat = qr/($pat)+/; 2425 m/$pat/; 2426 print "ok"; 2427 }, 'ok', {}, 'gh20826: test regex save stack overflow'); 2428 } 2429 { 2430 my ($x, $y); 2431 ok( "aaa" =~ /(?:(a)?\1)+/, 2432 "GH Issue #18865 'aaa' - pattern matches"); 2433 $x = "($-[0],$+[0])"; 2434 ok( "aaa" =~ /(?:((?{})a)?\1)+/, 2435 "GH Issue #18865 'aaa' - deoptimized pattern matches"); 2436 $y = "($-[0],$+[0])"; 2437 { 2438 local $::TODO = "Not Yet Implemented"; 2439 is( $y, $x, 2440 "GH Issue #18865 'aaa' - test optimization"); 2441 } 2442 ok( "ababab" =~ /(?:(?:(ab))?\1)+/, 2443 "GH Issue #18865 'ababab' - pattern matches"); 2444 $x = "($-[0],$+[0])"; 2445 ok( "ababab" =~ /(?:(?:((?{})ab))?\1)+/, 2446 "GH Issue #18865 'ababab' - deoptimized pattern matches"); 2447 $y = "($-[0],$+[0])"; 2448 { 2449 local $::TODO = "Not Yet Implemented"; 2450 is( $y, $x, 2451 "GH Issue #18865 'ababab' - test optimization"); 2452 } 2453 ok( "XaaXbbXb" =~ /(?:X([ab])?\1)+/, 2454 "GH Issue #18865 'XaaXbbXb' - pattern matches"); 2455 $x = "($-[0],$+[0])"; 2456 ok( "XaaXbbXb" =~ /(?:X((?{})[ab])?\1)+/, 2457 "GH Issue #18865 'XaaXbbXb' - deoptimized pattern matches"); 2458 $y = "($-[0],$+[0])"; 2459 { 2460 local $::TODO = "Not Yet Implemented"; 2461 is( $y, $x, 2462 "GH Issue #18865 'XaaXbbXb' - test optimization"); 2463 } 2464 } 2465 { 2466 # Test that ${^LAST_SUCCESSFUL_PATTERN} works as expected. 2467 # It should match like the empty pattern does, and it should be dynamic 2468 # in the same was as $1 is dynamic. 2469 my ($str,$pat); 2470 $str = "ABCD"; 2471 $str =~/(D)/; 2472 is("$1", "D", '$1 is "D"'); 2473 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2474 is($pat, "(?^:(D))", 'Outer ${^LAST_SUCCESSFUL_PATTERN} is as expected'); 2475 { 2476 if ($str=~/BX/ || $str=~/(BC)/) { 2477 is("$1", "BC",'$1 is now "BC"'); 2478 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2479 ok($str =~ s//ZZ/, "Empty pattern matched as expected"); 2480 is($str, "AZZD", "Empty pattern in s/// has result we expected"); 2481 } 2482 } 2483 is("$1", "D", '$1 should now be "D" again'); 2484 is($pat, "(?^:(BC))", 'inner ${^LAST_SUCCESSFUL_PATTERN} is as expected'); 2485 ok($str=~s//Q/, 'Empty pattern to "Q" was successful'); 2486 is($str, "AZZQ", "Empty pattern in s/// has result we expected (try2)"); 2487 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2488 is($pat, "(?^:(D))", 'Outer ${^LAST_SUCCESSFUL_PATTERN} restored to its previous value as expected'); 2489 2490 $str = "ABCD"; 2491 { 2492 if ($str=~/BX/ || $str=~/(BC)/) { 2493 is("$1", "BC",'$1 is now "BC"'); 2494 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2495 ok($str=~s/${^LAST_SUCCESSFUL_PATTERN}/ZZ/, '${^LAST_SUCCESSFUL_PATTERN} matched as expected'); 2496 is($str, "AZZD", '${^LAST_SUCCESSFUL_PATTERN} in s/// has result we expected'); 2497 } 2498 } 2499 is("$1", "D", '$1 should now be "D" again'); 2500 is($pat, "(?^:(BC))", 'inner ${^LAST_SUCCESSFUL_PATTERN} is as expected'); 2501 is($str, "AZZD", 'Using ${^LAST_SUCCESSFUL_PATTERN} as a pattern has same result as empty pattern'); 2502 ok($str=~s/${^LAST_SUCCESSFUL_PATTERN}/Q/, '${^LAST_SUCCESSFUL_PATTERN} to "Q" was successful'); 2503 is($str, "AZZQ", '${^LAST_SUCCESSFUL_PATTERN} in s/// has result we expected'); 2504 ok($str=~/ZQ/, "/ZQ/ matched as expected"); 2505 $pat = "${^LAST_SUCCESSFUL_PATTERN}"; 2506 is($pat, "(?^:ZQ)", '${^LAST_SUCCESSFUL_PATTERN} changed as expected'); 2507 2508 $str = "foobarfoo"; 2509 ok($str =~ s/foo//, "matched foo"); 2510 my $copy= ${^LAST_SUCCESSFUL_PATTERN}; 2511 ok(defined($copy), '$copy is defined'); 2512 ok($str =~ s/bar//,"matched bar"); 2513 ok($str =~ s/$copy/PQR/, 'replaced $copy with PQR'); 2514 is($str, "PQR", 'final string should be PQR'); 2515 } 2516} # End of sub run_tests 2517 25181; 2519 2520# 2521# ex: set ts=8 sts=4 sw=4 et: 2522# 2523