1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9no warnings 'experimental::vlb'; 10use 5.010; 11 12sub run_tests; 13 14$| = 1; 15 16 17BEGIN { 18 chdir 't' if -d 't'; 19 require './test.pl'; 20 set_up_inc('../lib', '.', '../ext/re'); 21 require Config; import Config; 22 require './charset_tools.pl'; 23 require './loc_tools.pl'; 24} 25 26skip_all_without_unicode_tables(); 27 28my $has_locales = locales_enabled('LC_CTYPE'); 29 30plan tests => 1046; # Update this when adding/deleting tests. 31 32run_tests() unless caller; 33 34# 35# Tests start here. 36# 37sub run_tests { 38 my $sharp_s = uni_to_native("\xdf"); 39 40 { 41 my $x = "abc\ndef\n"; 42 (my $x_pretty = $x) =~ s/\n/\\n/g; 43 44 ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/]; 45 ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/]; 46 47 # used to be a test for $* 48 ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m]; 49 50 ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]); 51 ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]); 52 53 ok $x =~ /def/, qq ["$x_pretty" =~ /def/]; 54 ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]); 55 56 ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/]; 57 ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]); 58 59 ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/]; 60 ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]); 61 } 62 63 { 64 $_ = '123'; 65 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; 66 } 67 68 { 69 $_ = 'aaabbbccc'; 70 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', 71 qq [\$_ = '$_'; /(a*b*)(c*)/]; 72 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; 73 unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]); 74 75 $_ = 'aaabccc'; 76 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; 77 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 78 79 $_ = 'aaaccc'; 80 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 81 unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]); 82 83 $_ = 'abcdef'; 84 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; 85 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; 86 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; 87 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; 88 } 89 90 { 91 # used to be a test for $* 92 ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m]; 93 } 94 95 { 96 our %XXX = map {($_ => $_)} 123, 234, 345; 97 98 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); 99 while ($_ = shift(@XXX)) { 100 my $e = index ($_, 'not') >= 0 ? '' : 1; 101 my $r = m?(.*)?; 102 is($r, $e, "?(.*)?"); 103 /not/ && reset; 104 if (/not ok 2/) { 105 if ($^O eq 'VMS') { 106 $_ = shift(@XXX); 107 } 108 else { 109 reset 'X'; 110 } 111 } 112 } 113 114 SKIP: { 115 if ($^O eq 'VMS') { 116 skip "Reset 'X'", 1; 117 } 118 ok !keys %XXX, "%XXX is empty"; 119 } 120 121 } 122 123 { 124 my $message = "Test empty pattern"; 125 my $xyz = 'xyz'; 126 my $cde = 'cde'; 127 128 $cde =~ /[^ab]*/; 129 $xyz =~ //; 130 is($&, $xyz, $message); 131 132 my $foo = '[^ab]*'; 133 $cde =~ /$foo/; 134 $xyz =~ //; 135 is($&, $xyz, $message); 136 137 $cde =~ /$foo/; 138 my $null; 139 no warnings 'uninitialized'; 140 $xyz =~ /$null/; 141 is($&, $xyz, $message); 142 143 $null = ""; 144 $xyz =~ /$null/; 145 is($&, $xyz, $message); 146 147 # each entry: regexp, match string, $&, //o match success 148 my @tests = 149 ( 150 [ "", "xy", "x", 1 ], 151 [ "y", "yz", "y", !1 ], 152 ); 153 for my $test (@tests) { 154 my ($re, $str, $matched, $omatch) = @$test; 155 $xyz =~ /x/o; 156 ok($str =~ /$re/, "$str matches /$re/"); 157 is($&, $matched, "on $matched"); 158 $xyz =~ /x/o; 159 is($str =~ /$re/o, $omatch, "$str matches /$re/o (or not)"); 160 } 161 } 162 163 { 164 my $message = q !Check $`, $&, $'!; 165 $_ = 'abcdefghi'; 166 /def/; # optimized up to cmd 167 is("$`:$&:$'", 'abc:def:ghi', $message); 168 169 no warnings 'void'; 170 /cde/ + 0; # optimized only to spat 171 is("$`:$&:$'", 'ab:cde:fghi', $message); 172 173 /[d][e][f]/; # not optimized 174 is("$`:$&:$'", 'abc:def:ghi', $message); 175 } 176 177 { 178 $_ = 'now is the {time for all} good men to come to.'; 179 / \{([^}]*)}/; 180 is($1, 'time for all', "Match braces"); 181 } 182 183 { 184 my $message = "{N,M} quantifier"; 185 $_ = 'xxx {3,4} yyy zzz'; 186 ok(/( {3,4})/, $message); 187 is($1, ' ', $message); 188 unlike($_, qr/( {4,})/, $message); 189 ok(/( {2,3}.)/, $message); 190 is($1, ' y', $message); 191 ok(/(y{2,3}.)/, $message); 192 is($1, 'yyy ', $message); 193 unlike($_, qr/x {3,4}/, $message); 194 unlike($_, qr/^xxx {3,4}/, $message); 195 } 196 197 { 198 my $message = "Test /g"; 199 local $" = ":"; 200 $_ = "now is the time for all good men to come to."; 201 my @words = /(\w+)/g; 202 my $exp = "now:is:the:time:for:all:good:men:to:come:to"; 203 204 is("@words", $exp, $message); 205 206 @words = (); 207 while (/\w+/g) { 208 push (@words, $&); 209 } 210 is("@words", $exp, $message); 211 212 @words = (); 213 pos = 0; 214 while (/to/g) { 215 push(@words, $&); 216 } 217 is("@words", "to:to", $message); 218 219 pos $_ = 0; 220 @words = /to/g; 221 is("@words", "to:to", $message); 222 } 223 224 { 225 $_ = "abcdefghi"; 226 227 my $pat1 = 'def'; 228 my $pat2 = '^def'; 229 my $pat3 = '.def.'; 230 my $pat4 = 'abc'; 231 my $pat5 = '^abc'; 232 my $pat6 = 'abc$'; 233 my $pat7 = 'ghi'; 234 my $pat8 = '\w*ghi'; 235 my $pat9 = 'ghi$'; 236 237 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = 238 my $t6 = my $t7 = my $t8 = my $t9 = 0; 239 240 for my $iter (1 .. 5) { 241 $t1++ if /$pat1/o; 242 $t2++ if /$pat2/o; 243 $t3++ if /$pat3/o; 244 $t4++ if /$pat4/o; 245 $t5++ if /$pat5/o; 246 $t6++ if /$pat6/o; 247 $t7++ if /$pat7/o; 248 $t8++ if /$pat8/o; 249 $t9++ if /$pat9/o; 250 } 251 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; 252 is($x, '505550555', "Test /o"); 253 } 254 255 { 256 my $xyz = 'xyz'; 257 ok "abc" =~ /^abc$|$xyz/, "| after \$"; 258 259 # perl 4.009 says "unmatched ()" 260 my $message = '$ inside ()'; 261 262 my $result; 263 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; 264 is($@, "", $message); 265 is($result, "abc:bc", $message); 266 } 267 268 { 269 my $message = "Scalar /g"; 270 $_ = "abcfooabcbar"; 271 272 ok( /abc/g && $` eq "", $message); 273 ok( /abc/g && $` eq "abcfoo", $message); 274 ok(!/abc/g, $message); 275 276 $message = "Scalar /gi"; 277 pos = 0; 278 ok( /ABC/gi && $` eq "", $message); 279 ok( /ABC/gi && $` eq "abcfoo", $message); 280 ok(!/ABC/gi, $message); 281 282 $message = "Scalar /g"; 283 pos = 0; 284 ok( /abc/g && $' eq "fooabcbar", $message); 285 ok( /abc/g && $' eq "bar", $message); 286 287 $_ .= ''; 288 my @x = /abc/g; 289 is(@x, 2, "/g reset after assignment"); 290 } 291 292 { 293 my $message = '/g, \G and pos'; 294 $_ = "abdc"; 295 pos $_ = 2; 296 /\Gc/gc; 297 is(pos $_, 2, $message); 298 /\Gc/g; 299 is(pos $_, undef, $message); 300 } 301 302 { 303 my $message = '(?{ })'; 304 our $out = 1; 305 'abc' =~ m'a(?{ $out = 2 })b'; 306 is($out, 2, $message); 307 308 $out = 1; 309 'abc' =~ m'a(?{ $out = 3 })c'; 310 is($out, 1, $message); 311 } 312 313 { 314 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; 315 my @out = /(?<!foo)bar./g; 316 is("@out", 'bar2 barf', "Negative lookbehind"); 317 } 318 319 { 320 my $message = "REG_INFTY tests"; 321 # Tests which depend on REG_INFTY 322 323 # Defaults assumed if this fails 324 eval { require Config; }; 325 $::reg_infty = $Config::Config{reg_infty} // 65535; 326 $::reg_infty_m = $::reg_infty - 1; 327 $::reg_infty_p = $::reg_infty + 1; 328 $::reg_infty_m = $::reg_infty_m; # Suppress warning. 329 330 # As well as failing if the pattern matches do unexpected things, the 331 # next three tests will fail if you should have picked up a lower-than- 332 # default value for $reg_infty from Config.pm, but have not. 333 334 is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message); 335 is($@, '', $message); 336 is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message); 337 is($@, '', $message); 338 isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message); 339 is($@, '', $message); 340 341 eval "'aaa' =~ /a{1,$::reg_infty}/"; 342 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 343 eval "'aaa' =~ /a{1,$::reg_infty_p}/"; 344 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 345 346 # It should be 'a' x 2147483647, but that exhausts memory on 347 # reasonably sized modern machines 348 like('a' x $::reg_infty_p, qr/a{1,}/, 349 "{1,} matches more times than REG_INFTY"); 350 } 351 352 { 353 # Poke a couple more parse failures 354 my $context = 'x' x 256; 355 eval qq("${context}y" =~ /(?<=$context)y/); 356 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; 357 } 358 359 SKIP: 360 { # Long Monsters 361 362 my @trials = (125, 140, 250, 270, 300000, 30); 363 364 skip('limited memory', @trials * 4) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'}; 365 366 for my $l (@trials) { # Ordered to free memory 367 my $a = 'a' x $l; 368 my $message = "Long monster, length = $l"; 369 like("ba$a=", qr/a$a=/, $message); 370 unlike("b$a=", qr/a$a=/, $message); 371 like("b$a=", qr/ba+=/, $message); 372 373 like("ba$a=", qr/b(?:a|b)+=/, $message); 374 } 375 } 376 377 SKIP: 378 { # 20000 nodes, each taking 3 words per string, and 1 per branch 379 380 my %ans = ( 'ax13876y25677lbc' => 1, 381 'ax13876y25677mcb' => 0, # not b. 382 'ax13876y35677nbc' => 0, # Num too big 383 'ax13876y25677y21378obc' => 1, 384 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] 385 'ax13876y25677y21378y21378kbc' => 1, 386 'ax13876y25677y21378y21378kcb' => 0, # Not b. 387 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs 388 ); 389 390 skip('limited memory', 2 * scalar keys %ans) if $ENV{'PERL_SKIP_BIG_MEM_TESTS'}; 391 392 my $long_constant_len = join '|', 12120 .. 32645; 393 my $long_var_len = join '|', 8120 .. 28645; 394 395 for (keys %ans) { 396 my $message = "20000 nodes, const-len '$_'"; 397 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message; 398 399 $message = "20000 nodes, var-len '$_'"; 400 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message; 401 } 402 } 403 404 { 405 my $message = "Complicated backtracking"; 406 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; 407 my $expect = "(bla()) ((l)u((e))) (l(e)e)"; 408 409 our $c; 410 sub matchit { 411 m/ 412 ( 413 \( 414 (?{ $c = 1 }) # Initialize 415 (?: 416 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop 417 (?! 418 ) # Fail: will unwind one iteration back 419 ) 420 (?: 421 [^()]+ # Match a big chunk 422 (?= 423 [()] 424 ) # Do not try to match subchunks 425 | 426 \( 427 (?{ ++$c }) 428 | 429 \) 430 (?{ --$c }) 431 ) 432 )+ # This may not match with different subblocks 433 ) 434 (?(?{ $c != 0 }) 435 (?! 436 ) # Fail 437 ) # Otherwise the chunk 1 may succeed with $c>0 438 /xg; 439 } 440 441 my @ans = (); 442 my $res; 443 push @ans, $res while $res = matchit; 444 is("@ans", "1 1 1", $message); 445 446 @ans = matchit; 447 is("@ans", $expect, $message); 448 449 $message = "Recursion with (??{ })"; 450 our $matched; 451 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; 452 453 @ans = my @ans1 = (); 454 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; 455 456 is("@ans", "1 1 1", $message); 457 is("@ans1", $expect, $message); 458 459 @ans = m/$matched/g; 460 is("@ans", $expect, $message); 461 462 } 463 464 { 465 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; 466 } 467 468 { 469 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad 470 is("@ans", 'a/ b', "Stack may be bad"); 471 } 472 473 { 474 my $message = "Eval-group not allowed at runtime"; 475 my $code = '{$blah = 45}'; 476 our $blah = 12; 477 eval { /(?$code)/ }; 478 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 479 480 $blah = 12; 481 my $res = eval { "xx" =~ /(?$code)/o }; 482 { 483 no warnings 'uninitialized'; 484 chomp $@; my $message = "$message '$@', '$res', '$blah'"; 485 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 486 } 487 488 $code = '=xx'; 489 $blah = 12; 490 $res = eval { "xx" =~ /(?$code)/o }; 491 { 492 no warnings 'uninitialized'; 493 my $message = "$message '$@', '$res', '$blah'"; 494 ok(!$@ && $res, $message); 495 } 496 497 $code = '{$blah = 45}'; 498 $blah = 12; 499 eval "/(?$code)/"; 500 is($blah, 45, $message); 501 502 $blah = 12; 503 /(?{$blah = 45})/; 504 is($blah, 45, $message); 505 } 506 507 { 508 my $message = "Pos checks"; 509 my $x = 'banana'; 510 $x =~ /.a/g; 511 is(pos $x, 2, $message); 512 513 $x =~ /.z/gc; 514 is(pos $x, 2, $message); 515 516 sub f { 517 my $p = $_[0]; 518 return $p; 519 } 520 521 $x =~ /.a/g; 522 is(f (pos $x), 4, $message); 523 } 524 525 { 526 my $message = 'Checking $^R'; 527 our $x = $^R = 67; 528 'foot' =~ /foo(?{$x = 12; 75})[t]/; 529 is($^R, 75, $message); 530 531 $x = $^R = 67; 532 'foot' =~ /foo(?{$x = 12; 75})[xy]/; 533 ok($^R eq '67' && $x eq '12', $message); 534 535 $x = $^R = 67; 536 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; 537 ok($^R eq '79' && $x eq '12', $message); 538 } 539 540 { 541 is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i'); 542 is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'); 543 is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'); 544 is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'); 545 is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'); 546 is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'); 547 } 548 549 { # Test that charset modifier work, and are interpolated 550 is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'); 551 is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'); 552 is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'); 553 is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'); 554 is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); 555 556 my $dual = qr/\b\v$/; 557 my $locale; 558 559 SKIP: { 560 skip 'Locales not available', 1 unless $has_locales; 561 562 use locale; 563 $locale = qr/\b\v$/; 564 is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); 565 no locale; 566 } 567 568 use feature 'unicode_strings'; 569 my $unicode = qr/\b\v$/; 570 is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); 571 is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 572 573 SKIP: { 574 skip 'Locales not available', 1 unless $has_locales; 575 576 is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); 577 } 578 579 no feature 'unicode_strings'; 580 SKIP: { 581 skip 'Locales not available', 1 unless $has_locales; 582 is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); 583 } 584 585 is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); 586 587 SKIP: { 588 skip 'Locales not available', 2 unless $has_locales; 589 590 use locale; 591 is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 592 is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); 593 } 594 } 595 596 { 597 my $message = "Look around"; 598 $_ = 'xabcx'; 599 foreach my $ans ('', 'c') { 600 ok(/(?<=(?=a)..)((?=c)|.)/g, $message); 601 is($1, $ans, $message); 602 } 603 } 604 605 { 606 my $message = "Empty clause"; 607 $_ = 'a'; 608 foreach my $ans ('', 'a', '') { 609 ok(/^|a|$/g, $message); 610 is($&, $ans, $message); 611 } 612 } 613 614 { 615 sub prefixify { 616 my $message = "Prefixify"; 617 { 618 my ($v, $a, $b, $res) = @_; 619 ok($v =~ s/\Q$a\E/$b/, $message); 620 is($v, $res, $message); 621 } 622 } 623 624 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); 625 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); 626 } 627 628 { 629 $_ = 'var="foo"'; 630 /(\")/; 631 ok $1 && /$1/, "Capture a quote"; 632 } 633 634 { 635 no warnings 'closure'; 636 my $message = '(?{ $var } refers to package vars'; 637 package aa; 638 our $c = 2; 639 $::c = 3; 640 '' =~ /(?{ $c = 4 })/; 641 main::is($c, 4, $message); 642 main::is($::c, 3, $message); 643 } 644 645 { 646 is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef); 647 like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/, 648 'POSIX class [: :] must have valid name'); 649 650 for my $d (qw [= .]) { 651 is(eval "/[[${d}foo${d}]]/", undef); 652 like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/, 653 "POSIX syntax [[$d $d]] is an error"); 654 } 655 } 656 657 { 658 # test if failure of patterns returns empty list 659 my $message = "Failed pattern returns empty list"; 660 $_ = 'aaa'; 661 @_ = /bbb/; 662 is("@_", "", $message); 663 664 @_ = /bbb/g; 665 is("@_", "", $message); 666 667 @_ = /(bbb)/; 668 is("@_", "", $message); 669 670 @_ = /(bbb)/g; 671 is("@_", "", $message); 672 } 673 { 674 my $message = 'ACCEPT and CLOSE - '; 675 $_ = "aced"; 676 #12 3 4 5 677 /((a?(*ACCEPT)())())()/ 678 or die "Failed to match"; 679 is($1,"a",$message . "buffer 1 is defined with expected value"); 680 is($2,"a",$message . "buffer 2 is defined with expected value"); 681 ok(!defined($3),$message . "buffer 3 is not defined"); 682 ok(!defined($4),$message . "buffer 4 is not defined"); 683 ok(!defined($5),$message . "buffer 5 is not defined"); 684 ok(!defined($6),$message . "buffer 6 is not defined"); 685 $message= 'NO ACCEPT and CLOSE - '; 686 /((a?())())()/ 687 or die "Failed to match"; 688 is($1,"a",$message . "buffer 1 is defined with expected value"); 689 is($2,"a",$message . "buffer 2 is defined with expected value"); 690 is($3,"", $message . "buffer 3 is defined with expected value"); 691 is($4,"", $message . "buffer 4 is defined with expected value"); 692 is($5,"",$message . "buffer 5 is defined with expected value"); 693 ok(!defined($6),$message . "buffer 6 is not defined"); 694 #12 3 4 5 695 $message = 'ACCEPT and CLOSE - '; 696 /((a?(*ACCEPT)(c))(e))(d)/ 697 or die "Failed to match"; 698 is($1,"a",$message . "buffer 1 is defined with expected value"); 699 is($2,"a",$message . "buffer 2 is defined with expected value"); 700 ok(!defined($3),$message . "buffer 3 is not defined"); 701 ok(!defined($4),$message . "buffer 4 is not defined"); 702 ok(!defined($5),$message . "buffer 5 is not defined"); 703 ok(!defined($6),$message . "buffer 6 is not defined"); 704 $message= 'NO ACCEPT and CLOSE - '; 705 /((a?(c))(e))(d)/ 706 or die "Failed to match"; 707 is($1,"ace", $message . "buffer 1 is defined with expected value"); 708 is($2,"ac", $message . "buffer 2 is defined with expected value"); 709 is($3,"c", $message . "buffer 3 is defined with expected value"); 710 is($4,"e", $message . "buffer 4 is defined with expected value"); 711 is($5,"d", $message . "buffer 5 is defined with expected value"); 712 ok(!defined($6),$message . "buffer 6 is not defined"); 713 } 714 { 715 my $message = '@- and @+ and @{^CAPTURE} tests'; 716 717 $_= "ace"; 718 /c(?=.$)/; 719 is($#{^CAPTURE}, -1, $message); 720 is($#+, 0, $message); 721 is($#-, 0, $message); 722 is($+ [0], 2, $message); 723 is($- [0], 1, $message); 724 ok(!defined $+ [1] && !defined $- [1] && 725 !defined $+ [2] && !defined $- [2], $message); 726 727 /a(c)(e)/; 728 is($#{^CAPTURE}, 1, $message); # one less than $#- 729 is($#+, 2, $message); 730 is($#-, 2, $message); 731 is($+ [0], 3, $message); 732 is($- [0], 0, $message); 733 is(${^CAPTURE}[0], "c", $message); 734 is($+ [1], 2, $message); 735 is($- [1], 1, $message); 736 is(${^CAPTURE}[1], "e", $message); 737 is($+ [2], 3, $message); 738 is($- [2], 2, $message); 739 ok(!defined $+ [3] && !defined $- [3] && 740 !defined ${^CAPTURE}[2] && !defined ${^CAPTURE}[3] && 741 !defined $+ [4] && !defined $- [4], $message); 742 743 # Exists has a special check for @-/@+ - bug 45147 744 ok(exists $-[0], $message); 745 ok(exists $+[0], $message); 746 ok(exists ${^CAPTURE}[0], $message); 747 ok(exists ${^CAPTURE}[1], $message); 748 ok(exists $-[2], $message); 749 ok(exists $+[2], $message); 750 ok(!exists ${^CAPTURE}[2], $message); 751 ok(!exists $-[3], $message); 752 ok(!exists $+[3], $message); 753 ok(exists ${^CAPTURE}[-1], $message); 754 ok(exists ${^CAPTURE}[-2], $message); 755 ok(exists $-[-1], $message); 756 ok(exists $+[-1], $message); 757 ok(exists $-[-3], $message); 758 ok(exists $+[-3], $message); 759 ok(!exists $-[-4], $message); 760 ok(!exists $+[-4], $message); 761 ok(!exists ${^CAPTURE}[-3], $message); 762 763 764 /.(c)(b)?(e)/; 765 is($#{^CAPTURE}, 2, $message); # one less than $#- 766 is($#+, 3, $message); 767 is($#-, 3, $message); 768 is(${^CAPTURE}[0], "c", $message); 769 is(${^CAPTURE}[2], "e", $message . "[$1 $3]"); 770 is($+ [1], 2, $message); 771 is($- [1], 1, $message); 772 is($+ [3], 3, $message); 773 is($- [3], 2, $message); 774 ok(!defined $+ [2] && !defined $- [2] && 775 !defined $+ [4] && !defined $- [4] && 776 !defined ${^CAPTURE}[1], $message); 777 778 /.(c)/; 779 is($#{^CAPTURE}, 0, $message); # one less than $#- 780 is($#+, 1, $message); 781 is($#-, 1, $message); 782 is(${^CAPTURE}[0], "c", $message); 783 is($+ [0], 2, $message); 784 is($- [0], 0, $message); 785 is($+ [1], 2, $message); 786 is($- [1], 1, $message); 787 ok(!defined $+ [2] && !defined $- [2] && 788 !defined $+ [3] && !defined $- [3] && 789 !defined ${^CAPTURE}[1], $message); 790 791 /.(c)(ba*)?/; 792 is($#{^CAPTURE}, 0, $message); # one less than $#- 793 is($#+, 2, $message); 794 is($#-, 1, $message); 795 796 # Check that values don't stick 797 " "=~/()()()(.)(..)/; 798 my($m,$p,$q) = (\$-[5], \$+[5], \${^CAPTURE}[4]); 799 () = "$$_" for $m, $p, $q; # FETCH (or eqv.) 800 " " =~ /()/; 801 is $$m, undef, 'values do not stick to @- elements'; 802 is $$p, undef, 'values do not stick to @+ elements'; 803 is $$q, undef, 'values do not stick to @{^CAPTURE} elements'; 804 } 805 806 foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', 807 '${^CAPTURE}[0] = 13', 808 '@- = qw (foo bar)', '$^N = 42') { 809 is(eval $_, undef); 810 like($@, qr/^Modification of a read-only value attempted/, 811 '$^N, @- and @+ are read-only'); 812 } 813 814 { 815 my $message = '\G testing'; 816 $_ = 'aaa'; 817 pos = 1; 818 my @a = /\Ga/g; 819 is("@a", "a a", $message); 820 821 my $str = 'abcde'; 822 pos $str = 2; 823 unlike($str, qr/^\G/, $message); 824 unlike($str, qr/^.\G/, $message); 825 like($str, qr/^..\G/, $message); 826 unlike($str, qr/^...\G/, $message); 827 ok($str =~ /\G../ && $& eq 'cd', $message); 828 ok($str =~ /.\G./ && $& eq 'bc', $message); 829 830 } 831 832 { 833 my $message = '\G and intuit and anchoring'; 834 $_ = "abcdef"; 835 pos = 0; 836 ok($_ =~ /\Gabc/, $message); 837 ok($_ =~ /^\Gabc/, $message); 838 839 pos = 3; 840 ok($_ =~ /\Gdef/, $message); 841 pos = 3; 842 ok($_ =~ /\Gdef$/, $message); 843 pos = 3; 844 ok($_ =~ /abc\Gdef$/, $message); 845 pos = 3; 846 ok($_ =~ /^abc\Gdef$/, $message); 847 pos = 3; 848 ok($_ =~ /c\Gd/, $message); 849 pos = 3; 850 ok($_ =~ /..\GX?def/, $message); 851 } 852 853 { 854 my $s = '123'; 855 pos($s) = 1; 856 my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1 857 is("@a", "1", '\G looping'); 858 } 859 860 861 { 862 my $message = 'pos inside (?{ })'; 863 my $str = 'abcde'; 864 our ($foo, $bar); 865 like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message); 866 is($foo, $str, $message); 867 is($bar, 2, $message); 868 is(pos $str, undef, $message); 869 870 undef $foo; 871 undef $bar; 872 pos $str = undef; 873 ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message); 874 is($foo, $str, $message); 875 is($bar, 2, $message); 876 is(pos $str, 3, $message); 877 878 $_ = $str; 879 undef $foo; 880 undef $bar; 881 like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message); 882 is($foo, $str, $message); 883 is($bar, 2, $message); 884 885 undef $foo; 886 undef $bar; 887 ok(/b(?{$foo = $_; $bar = pos})c/g, $message); 888 is($foo, $str, $message); 889 is($bar, 2, $message); 890 is(pos, 3, $message); 891 892 undef $foo; 893 undef $bar; 894 pos = undef; 895 1 while /b(?{$foo = $_; $bar = pos})c/g; 896 is($foo, $str, $message); 897 is($bar, 2, $message); 898 is(pos, undef, $message); 899 900 undef $foo; 901 undef $bar; 902 $_ = 'abcde|abcde'; 903 ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message); 904 is($foo, 'abcde|abcde', $message); 905 is($bar, 8, $message); 906 is($_, 'axde|axde', $message); 907 908 # List context: 909 $_ = 'abcde|abcde'; 910 our @res; 911 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; 912 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 913 is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message); 914 915 @res = (); 916 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; 917 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 918 is("@res", "'' 'ab' 'cde|abcde' " . 919 "'' 'abc' 'de|abcde' " . 920 "'abcd' 'e|' 'abcde' " . 921 "'abcde|' 'ab' 'cde' " . 922 "'abcde|' 'abc' 'de'", $message); 923 } 924 925 { 926 my $message = '\G anchor checks'; 927 my $foo = 'aabbccddeeffgg'; 928 pos ($foo) = 1; 929 930 ok($foo =~ /.\G(..)/g, $message); 931 is($1, 'ab', $message); 932 933 pos ($foo) += 1; 934 ok($foo =~ /.\G(..)/g, $message); 935 is($1, 'cc', $message); 936 937 pos ($foo) += 1; 938 ok($foo =~ /.\G(..)/g, $message); 939 is($1, 'de', $message); 940 941 ok($foo =~ /\Gef/g, $message); 942 943 undef pos $foo; 944 ok($foo =~ /\G(..)/g, $message); 945 is($1, 'aa', $message); 946 947 ok($foo =~ /\G(..)/g, $message); 948 is($1, 'bb', $message); 949 950 pos ($foo) = 5; 951 ok($foo =~ /\G(..)/g, $message); 952 is($1, 'cd', $message); 953 } 954 955 { 956 my $message = 'basic \G floating checks'; 957 my $foo = 'aabbccddeeffgg'; 958 pos ($foo) = 1; 959 960 ok($foo =~ /a+\G(..)/g, "$message: a+\\G"); 961 is($1, 'ab', "$message: ab"); 962 963 pos ($foo) += 1; 964 ok($foo =~ /b+\G(..)/g, "$message: b+\\G"); 965 is($1, 'cc', "$message: cc"); 966 967 pos ($foo) += 1; 968 ok($foo =~ /d+\G(..)/g, "$message: d+\\G"); 969 is($1, 'de', "$message: de"); 970 971 ok($foo =~ /\Gef/g, "$message: \\Gef"); 972 973 pos ($foo) = 1; 974 975 ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)"); 976 is($1, 'aa', "$message: aa"); 977 978 pos ($foo) = 2; 979 980 ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)"); 981 is($1, 'ab', "$message: ab"); 982 983 } 984 985 { 986 $_ = '123x123'; 987 my @res = /(\d*|x)/g; 988 local $" = '|'; 989 is("@res", "123||x|123|", "0 match in alternation"); 990 } 991 992 { 993 my $message = "Match against temporaries (created via pp_helem())" . 994 " is safe"; 995 ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message); 996 is($1, "bar", $message); 997 } 998 999 { 1000 my $message = 'package $i inside (?{ }), ' . 1001 'saved substrings and changing $_'; 1002 our @a = qw [foo bar]; 1003 our @b = (); 1004 s/(\w)(?{push @b, $1})/,$1,/g for @a; 1005 is("@b", "f o o b a r", $message); 1006 is("@a", ",f,,o,,o, ,b,,a,,r,", $message); 1007 1008 $message = 'lexical $i inside (?{ }), ' . 1009 'saved substrings and changing $_'; 1010 no warnings 'closure'; 1011 my @c = qw [foo bar]; 1012 my @d = (); 1013 s/(\w)(?{push @d, $1})/,$1,/g for @c; 1014 is("@d", "f o o b a r", $message); 1015 is("@c", ",f,,o,,o, ,b,,a,,r,", $message); 1016 } 1017 1018 { 1019 my $message = 'Brackets'; 1020 our $brackets; 1021 $brackets = qr { 1022 { (?> [^{}]+ | (??{ $brackets }) )* } 1023 }x; 1024 1025 ok("{{}" =~ $brackets, $message); 1026 is($&, "{}", $message); 1027 ok("something { long { and } hairy" =~ $brackets, $message); 1028 is($&, "{ and }", $message); 1029 ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message); 1030 is($&, "{ and }", $message); 1031 } 1032 1033 { 1034 $_ = "a-a\nxbb"; 1035 pos = 1; 1036 ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'); 1037 } 1038 1039 { 1040 my $message = '\G anchor checks'; 1041 my $text = "aaXbXcc"; 1042 pos ($text) = 0; 1043 ok($text !~ /\GXb*X/g, $message); 1044 } 1045 1046 { 1047 $_ = "xA\n" x 500; 1048 unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'); 1049 1050 my $text = "abc dbf"; 1051 my @res = ($text =~ /.*?(b).*?\b/g); 1052 is("@res", "b b", '\b is not special'); 1053 } 1054 1055 { 1056 my $message = '\S, [\S], \s, [\s]'; 1057 my @a = map chr, 0 .. 255; 1058 my @b = grep m/\S/, @a; 1059 my @c = grep m/[^\s]/, @a; 1060 is("@b", "@c", $message); 1061 1062 @b = grep /\S/, @a; 1063 @c = grep /[\S]/, @a; 1064 is("@b", "@c", $message); 1065 1066 @b = grep /\s/, @a; 1067 @c = grep /[^\S]/, @a; 1068 is("@b", "@c", $message); 1069 1070 @b = grep /\s/, @a; 1071 @c = grep /[\s]/, @a; 1072 is("@b", "@c", $message); 1073 1074 # Test an inverted posix class with a char also in the class. 1075 my $nbsp = chr utf8::unicode_to_native(0xA0); 1076 my $non_s = chr utf8::unicode_to_native(0xA1); 1077 my $pat_string = "[^\\S ]"; 1078 unlike(" ", qr/$pat_string/, "Verify ' ' !~ /$pat_string/"); 1079 like("\t", qr/$pat_string/, "Verify '\\t =~ /$pat_string/"); 1080 unlike($nbsp, qr/$pat_string/, "Verify non-utf8-NBSP !~ /$pat_string/"); 1081 utf8::upgrade($nbsp); 1082 like($nbsp, qr/$pat_string/, "Verify utf8-NBSP =~ /$pat_string/"); 1083 unlike($non_s, qr/$pat_string/, "Verify non-utf8-inverted-bang !~ /$pat_string/"); 1084 utf8::upgrade($non_s); 1085 unlike($non_s, qr/$pat_string/, "Verify utf8-inverted-bang !~ /$pat_string/"); 1086 } 1087 { 1088 my $message = '\D, [\D], \d, [\d]'; 1089 my @a = map chr, 0 .. 255; 1090 my @b = grep /\D/, @a; 1091 my @c = grep /[^\d]/, @a; 1092 is("@b", "@c", $message); 1093 1094 @b = grep /\D/, @a; 1095 @c = grep /[\D]/, @a; 1096 is("@b", "@c", $message); 1097 1098 @b = grep /\d/, @a; 1099 @c = grep /[^\D]/, @a; 1100 is("@b", "@c", $message); 1101 1102 @b = grep /\d/, @a; 1103 @c = grep /[\d]/, @a; 1104 is("@b", "@c", $message); 1105 } 1106 { 1107 my $message = '\W, [\W], \w, [\w]'; 1108 my @a = map chr, 0 .. 255; 1109 my @b = grep /\W/, @a; 1110 my @c = grep /[^\w]/, @a; 1111 is("@b", "@c", $message); 1112 1113 @b = grep /\W/, @a; 1114 @c = grep /[\W]/, @a; 1115 is("@b", "@c", $message); 1116 1117 @b = grep /\w/, @a; 1118 @c = grep /[^\W]/, @a; 1119 is("@b", "@c", $message); 1120 1121 @b = grep /\w/, @a; 1122 @c = grep /[\w]/, @a; 1123 is("@b", "@c", $message); 1124 } 1125 1126 { 1127 # see if backtracking optimization works correctly 1128 my $message = 'Backtrack optimization'; 1129 like("\n\n", qr/\n $ \n/x, $message); 1130 like("\n\n", qr/\n* $ \n/x, $message); 1131 like("\n\n", qr/\n+ $ \n/x, $message); 1132 like("\n\n", qr/\n? $ \n/x, $message); 1133 like("\n\n", qr/\n*? $ \n/x, $message); 1134 like("\n\n", qr/\n+? $ \n/x, $message); 1135 like("\n\n", qr/\n?? $ \n/x, $message); 1136 unlike("\n\n", qr/\n*+ $ \n/x, $message); 1137 unlike("\n\n", qr/\n++ $ \n/x, $message); 1138 like("\n\n", qr/\n?+ $ \n/x, $message); 1139 } 1140 1141 { 1142 package S; 1143 use overload '""' => sub {'Object S'}; 1144 sub new {bless []} 1145 1146 my $message = "Ref stringification"; 1147 ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message); 1148 ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message); 1149 ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message); 1150 ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message); 1151 ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message); 1152 } 1153 1154 { 1155 my $message = "Test result of match used as match"; 1156 ok('a1b' =~ ('xyz' =~ /y/), $message); 1157 is($`, 'a', $message); 1158 ok('a1b' =~ ('xyz' =~ /t/), $message); 1159 is($`, 'a', $message); 1160 } 1161 1162 { 1163 my $message = '"1" is not \s'; 1164 warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)}, 1165 undef, "$message (did not warn)"); 1166 } 1167 1168 { 1169 my $message = '\s, [[:space:]] and [[:blank:]]'; 1170 my %space = (spc => " ", 1171 tab => "\t", 1172 cr => "\r", 1173 lf => "\n", 1174 ff => "\f", 1175 # There's no \v but the vertical tabulator seems miraculously 1176 # be 11 both in ASCII and EBCDIC. 1177 vt => chr(11), 1178 false => "space"); 1179 1180 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; 1181 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; 1182 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; 1183 1184 is("@space0", "cr ff lf spc tab vt", $message); 1185 is("@space1", "cr ff lf spc tab vt", $message); 1186 is("@space2", "spc tab", $message); 1187 } 1188 1189 { 1190 my $n= 50; 1191 # this must be a high number and go from 0 to N, as the bug we are looking for doesn't 1192 # seem to be predictable. Slight changes to the test make it fail earlier or later. 1193 foreach my $i (0 .. $n) 1194 { 1195 my $str= "\n" x $i; 1196 ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i"; 1197 } 1198 } 1199 { 1200 # we are actually testing that we dont die when executing these patterns 1201 use utf8; 1202 my $e = "Böck"; 1203 ok(utf8::is_utf8($e),"got a unicode string - rt75680"); 1204 1205 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680"); 1206 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680"); 1207 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680"); 1208 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680"); 1209 } 1210 { 1211 # we are actually testing that we dont die when executing these patterns 1212 my $e = "B" . uni_to_native("\x{f6}") . "ck"; 1213 ok(!utf8::is_utf8($e), "got a latin string - rt75680"); 1214 1215 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680"); 1216 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680"); 1217 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680"); 1218 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680"); 1219 } 1220 1221 { 1222 # 1223 # Tests for bug 77414. 1224 # 1225 1226 my $message = '\p property after empty * match'; 1227 { 1228 like("1", qr/\s*\pN/, $message); 1229 like("-", qr/\s*\p{Dash}/, $message); 1230 like(" ", qr/\w*\p{Blank}/, $message); 1231 } 1232 1233 like("1", qr/\s*\pN+/, $message); 1234 like("-", qr/\s*\p{Dash}{1}/, $message); 1235 like(" ", qr/\w*\p{Blank}{1,4}/, $message); 1236 1237 } 1238 1239 { # Some constructs with Latin1 characters cause a utf8 string not 1240 # to match itself in non-utf8 1241 my $c = uni_to_native("\xc0"); 1242 my $pattern = my $utf8_pattern = qr/(($c)+,?)/; 1243 utf8::upgrade($utf8_pattern); 1244 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; 1245 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; 1246 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; 1247 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; 1248 utf8::upgrade($c); 1249 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; 1250 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; 1251 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; 1252 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; 1253 } 1254 1255 { # Make sure can override the formatting 1256 use feature 'unicode_strings'; 1257 ok uni_to_native("\xc0") =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/'; 1258 ok uni_to_native("\xc0") !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/'; 1259 } 1260 1261 { 1262 my $str= "\x{100}"; 1263 chop $str; 1264 my $qr= qr/$str/; 1265 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212"); 1266 $str= ""; 1267 $qr= qr/$str/; 1268 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212"); 1269 1270 } 1271 1272 { 1273 local $::TODO = "[perl #38133]"; 1274 1275 "A" =~ /(((?:A))?)+/; 1276 my $first = $2; 1277 1278 "A" =~ /(((A))?)+/; 1279 my $second = $2; 1280 1281 is($first, $second); 1282 } 1283 1284 { 1285 # RT #3516: \G in a m//g expression causes problems 1286 my $count = 0; 1287 while ("abc" =~ m/(\G[ac])?/g) { 1288 last if $count++ > 10; 1289 } 1290 ok($count < 10, 'RT #3516 A'); 1291 1292 $count = 0; 1293 while ("abc" =~ m/(\G|.)[ac]/g) { 1294 last if $count++ > 10; 1295 } 1296 ok($count < 10, 'RT #3516 B'); 1297 1298 $count = 0; 1299 while ("abc" =~ m/(\G?[ac])?/g) { 1300 last if $count++ > 10; 1301 } 1302 ok($count < 10, 'RT #3516 C'); 1303 } 1304 { 1305 # RT #84294: Is this a bug in the simple Perl regex? 1306 # : Nested buffers and (?{...}) dont play nicely on partial matches 1307 our @got= (); 1308 ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match"); 1309 my $want= "'ab', 'a', 'b'"; 1310 my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got); 1311 is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); 1312 } 1313 1314 { 1315 # Suppress warnings, as the non-unicode one comes out even if turn off 1316 # warnings here (because the execution is done in another scope). 1317 local $SIG{__WARN__} = sub {}; 1318 my $str = "\x{110000}"; 1319 1320 unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}"); 1321 like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}"); 1322 like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}"); 1323 unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}"); 1324 } 1325 1326 { 1327 # Test that IDstart works, but because the author (khw) knows 1328 # regexes much better than the rest of the core, it is being done here 1329 # in the context of a regex which relies on buffer names beginng with 1330 # IDStarts. 1331 use utf8; 1332 my $str = "abc"; 1333 like($str, qr/(?<a>abc)/, "'a' is legal IDStart"); 1334 like($str, qr/(?<_>abc)/, "'_' is legal IDStart"); 1335 like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart"); 1336 like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart"); 1337 1338 # This test works on Unicode 6.0 in which U+2118 and U+212E are legal 1339 # IDStarts there, but are not Word characters, and therefore Perl 1340 # doesn't allow them to be IDStarts. But there is no guarantee that 1341 # Unicode won't change things around in the future so that at some 1342 # future Unicode revision these tests would need to be revised. 1343 foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) { 1344 my $prog = <<"EOP"; 1345use utf8;; 1346"abc" =~ qr/(?<$char>abc)/; 1347EOP 1348 utf8::encode($prog); 1349 fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {}, 1350 sprintf("'U+%04X not legal IDFirst'", ord($char))); 1351 } 1352 } 1353 1354 { # [perl #101710] 1355 my $pat = "b"; 1356 utf8::upgrade($pat); 1357 like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); 1358 } 1359 1360 { # Crash with @a =~ // warning 1361 local $SIG{__WARN__} = sub { 1362 pass 'no crash for @a =~ // warning' 1363 }; 1364 eval ' sub { my @a =~ // } '; 1365 } 1366 1367 { # Concat overloading and qr// thingies 1368 my @refs; 1369 my $qr = qr//; 1370 package Cat { 1371 require overload; 1372 overload->import( 1373 '""' => sub { ${$_[0]} }, 1374 '.' => sub { 1375 push @refs, ref $_[1] if ref $_[1]; 1376 bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]" 1377 } 1378 ); 1379 } 1380 my $s = "foo"; 1381 my $o = bless \$s, Cat::; 1382 /$o$qr/; 1383 is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth'; 1384 } 1385 1386 { 1387 my $count=0; 1388 my $str="\n"; 1389 $count++ while $str=~/.*/g; 1390 is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g'; 1391 my $class_count= 0; 1392 $class_count++ while $str=~/[^\n]*/g; 1393 is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same'; 1394 my $anch_count= 0; 1395 $anch_count++ while $str=~/^.*/mg; 1396 is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once'; 1397 } 1398 1399 { # [perl #111174] 1400 use re '/u'; 1401 my $A_grave = uni_to_native("\xc0"); 1402 like uni_to_native("\xe0"), qr/(?i:$A_grave)/, "(?i: shouldn't lose the passed in /u"; 1403 use re '/a'; 1404 unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; 1405 use re '/aa'; 1406 unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; 1407 unlike 'k', qr'(?i:\N{KELVIN SIGN})', "(?i: shouldn't lose the passed in /aa"; 1408 } 1409 1410 { 1411 # the test for whether the pattern should be re-compiled should 1412 # consider the UTF8ness of the previous and current pattern 1413 # string, as well as the physical bytes of the pattern string 1414 1415 for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") { 1416 ok($s =~ /^$s$/, "re-compile check is UTF8-aware"); 1417 } 1418 } 1419 1420 # #113682 more overloading and qr// 1421 # when doing /foo$overloaded/, if $overloaded returns 1422 # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval' 1423 # shouldn't be required. Via '.', it still is. 1424 { 1425 package Qr0; 1426 use overload 'qr' => sub { qr/(??{50})/ }; 1427 1428 package Qr1; 1429 use overload '""' => sub { qr/(??{51})/ }; 1430 1431 package Qr2; 1432 use overload '.' => sub { $_[1] . qr/(??{52})/ }; 1433 1434 package Qr3; 1435 use overload '""' => sub { qr/(??{7})/ }, 1436 '.' => sub { $_[1] . qr/(??{53})/ }; 1437 1438 package Qr_indirect; 1439 use overload '""' => sub { $_[0][0] }; 1440 1441 package main; 1442 1443 for my $i (0..3) { 1444 my $o = bless [], "Qr$i"; 1445 if ((0,0,1,1)[$i]) { 1446 eval { "A5$i" =~ /^A$o$/ }; 1447 like($@, qr/Eval-group not allowed/, "Qr$i"); 1448 eval { "5$i" =~ /$o/ }; 1449 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1450 "Qr$i bare"); 1451 { 1452 use re 'eval'; 1453 ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval"); 1454 eval { "5$i" =~ /$o/ }; 1455 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1456 "Qr$i bare - with use re eval"); 1457 } 1458 } 1459 else { 1460 ok("A5$i" =~ /^A$o$/, "Qr$i"); 1461 ok("5$i" =~ /$o/, "Qr$i bare"); 1462 } 1463 } 1464 1465 my $o = bless [ bless [], "Qr1" ], 'Qr_indirect'; 1466 ok("A51" =~ /^A$o/, "Qr_indirect"); 1467 ok("51" =~ /$o/, "Qr_indirect bare"); 1468 } 1469 1470 { # Various flags weren't being set when a [] is optimized into an 1471 # EXACTish node 1472 ok("\x{017F}\x{017F}" =~ qr/^[$sharp_s]?$/i, "[] to EXACTish optimization"); 1473 } 1474 1475 { # Test that it avoids spllitting a multi-char fold across nodes. 1476 # These all fold to things that are like 'ss', which, if split across 1477 # nodes could fail to match a single character that folds to the 1478 # combination. 1F0 byte expands when folded; 1479 my $utf8_locale = find_utf8_ctype_locale(); 1480 for my $char('F', $sharp_s, "\x{1F0}", "\x{FB00}") { 1481 my $length = 260; # Long enough to overflow an EXACTFish regnode 1482 my $p = $char x $length; 1483 my $s = ($char eq $sharp_s) ? 'ss' 1484 : $char eq "\x{1F0}" 1485 ? "j\x{30c}" 1486 : 'ff'; 1487 $s = $s x $length; 1488 for my $charset (qw(u d l aa)) { 1489 for my $utf8 (0..1) { 1490 for my $locale ('C', $utf8_locale) { 1491 SKIP: 1492 { 1493 skip "test skipped for non-C locales", 2 1494 if $charset ne 'l' 1495 && (! defined $locale || $locale ne 'C'); 1496 if ($charset eq 'l') { 1497 skip 'Locales not available', 2 1498 unless $has_locales; 1499 if (! defined $locale) { 1500 skip "No UTF-8 locale", 2; 1501 } 1502 skip "Can't test in miniperl",2 1503 if is_miniperl(); 1504 1505 require POSIX; 1506 POSIX::setlocale(&LC_CTYPE, $locale); 1507 } 1508 1509 my $pat = $p; 1510 utf8::upgrade($pat) if $utf8; 1511 my $should_pass = 1512 ( $charset eq 'u' 1513 || ($charset eq 'd' && $utf8) 1514 || ($charset eq 'd' && ( $char =~ /[[:ascii:]]/ 1515 || ord $char > 255)) 1516 || ($charset eq 'aa' && $char =~ /[[:ascii:]]/) 1517 || ($charset eq 'l' && $locale ne 'C') 1518 || ($charset eq 'l' && $char =~ /[[:ascii:]]/) 1519 ); 1520 my $name = "(?i$charset), utf8=$utf8, locale=$locale," 1521 . " char=" . sprintf "%x", ord $char; 1522 no warnings 'locale'; 1523 is (eval " '$s' =~ qr/(?i$charset)$pat/;", 1524 $should_pass, $name); 1525 fail "$name: $@" if $@; 1526 is (eval " 'a$s' =~ qr/(?i$charset)a$pat/;", 1527 $should_pass, "extra a, $name"); 1528 fail "$name: $@" if $@; 1529 } 1530 } 1531 } 1532 } 1533 } 1534 } 1535 1536 SKIP: 1537 { 1538 skip "no re debug", 5 if is_miniperl; 1539 my $s = ("0123456789" x 26214) x 2; # Should fill 2 LEXACTS, plus 1540 # small change 1541 my $pattern_prefix = "use utf8; use re qw(Debug COMPILE)"; 1542 my $pattern = "$pattern_prefix; qr/$s/;"; 1543 my $result = fresh_perl($pattern); 1544 if ($? != 0) { # Re-run so as to display STDERR. 1545 fail($pattern); 1546 fresh_perl($pattern, { stderr => 0, verbose => 1 }); 1547 } 1548 like($result, qr/Final program[^X]*\bLEXACT\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s, 1549 "Check that LEXACT nodes are generated"); 1550 like($s, qr/$s/, "Check that LEXACT nodes match"); 1551 like("a$s", qr/a$s/, "Previous test preceded by an 'a'"); 1552 substr($s, 260000, 1) = "\x{100}"; 1553 $pattern = "$pattern_prefix; qr/$s/;"; 1554 $result = fresh_perl($pattern, { 'wide_chars' => 1 } ); 1555 if ($? != 0) { # Re-run so as to display STDERR. 1556 fail($pattern); 1557 fresh_perl($pattern, { stderr => 0, verbose => 1 }); 1558 } 1559 like($result, qr/Final program[^X]*\bLEXACT_REQ8\b[^X]*\bLEXACT\b[^X]*\bEXACT\b[^X]*\bEND\b/s, 1560 "Check that an LEXACT_ONLY node is generated with a \\x{100}"); 1561 like($s, qr/$s/, "Check that LEXACT_REQ8 nodes match"); 1562 } 1563 1564 { 1565 for my $char (":", uni_to_native("\x{f7}"), "\x{2010}") { 1566 my $utf8_char = $char; 1567 utf8::upgrade($utf8_char); 1568 my $display = $char; 1569 $display = display($display); 1570 my $utf8_display = "utf8::upgrade(\"$display\")"; 1571 1572 like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/"); 1573 like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/"); 1574 like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/"); 1575 like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/"); 1576 } 1577 } 1578 1579 { 1580 # #116148: Pattern utf8ness sticks around globally 1581 # the utf8 in the first match was sticking around for the second 1582 # match 1583 1584 use feature 'unicode_strings'; 1585 1586 my $x = "\x{263a}"; 1587 $x =~ /$x/; 1588 1589 my $text = "Perl"; 1590 ok("Perl" =~ /P.*$/i, '#116148'); 1591 } 1592 1593 { # 118297: Mixing up- and down-graded strings in regex 1594 utf8::upgrade(my $u = "\x{e5}"); 1595 utf8::downgrade(my $d = "\x{e5}"); 1596 my $warned; 1597 local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ }; 1598 my $re = qr/$u$d/; 1599 ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern"); 1600 my $c = "\x{e5}\x{e5}"; 1601 utf8::downgrade($c); 1602 like($c, $re, "mixed up-/downgraded pattern matches downgraded string"); 1603 utf8::upgrade($c); 1604 like($c, $re, "mixed up-/downgraded pattern matches upgraded string"); 1605 } 1606 1607 { 1608 # if we have 87 capture buffers defined then \87 should refer to the 87th. 1609 # test that this is true for 1..100 1610 # Note that this test causes the engine to recurse at runtime, and 1611 # hence use a lot of C stack. 1612 1613 # Compiling for all 100 nested captures blows the stack under 1614 # clang and ASan; reduce. 1615 my $max_captures = $Config{ccflags} =~ /sanitize/ ? 20 : 100; 1616 1617 for my $i (1..100) { 1618 if ($i > $max_captures) { 1619 pass("skipping $i buffers under ASan aa"); 1620 pass("skipping $i buffers under ASan aba"); 1621 next; 1622 } 1623 my $capture= "a"; 1624 $capture= "($capture)" for 1 .. $i; 1625 for my $mid ("","b") { 1626 my $str= "a${mid}a"; 1627 my $backref= "\\$i"; 1628 eval { 1629 ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/"); 1630 1; 1631 } or do { 1632 is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/"); 1633 }; 1634 } 1635 } 1636 } 1637 1638 # this mixture of readonly (not COWable) and COWable strings 1639 # messed up the capture buffers under COW. The actual test results 1640 # are incidental; the issue is was an AddressSanitizer failure 1641 { 1642 my $c ='AB'; 1643 my $res = ''; 1644 for ($c, 'C', $c, 'DE') { 1645 ok(/(.)/, "COWable match"); 1646 $res .= $1; 1647 } 1648 is($res, "ACAD"); 1649 } 1650 1651 1652 { 1653 # RT #45667 1654 # /[#$x]/x didn't interpolate the var $x. 1655 my $b = 'cd'; 1656 my $s = 'abcd$%#&'; 1657 $s =~ s/[a#$b%]/X/g; 1658 is ($s, 'XbXX$XX&', 'RT #45667 without /x'); 1659 $s = 'abcd$%#&'; 1660 $s =~ s/[a#$b%]/X/gx; 1661 is ($s, 'XbXX$XX&', 'RT #45667 with /x'); 1662 } 1663 1664 { 1665 no warnings "uninitialized"; 1666 my @a; 1667 $a[1]++; 1668 /@a/; 1669 pass('no crash with /@a/ when array has nonexistent elems'); 1670 } 1671 1672 { 1673 is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'), 1674 "ok\n", 1675 'deleting *^R does not result in crashes'; 1676 no warnings 'once'; 1677 *^R = *caretRglobwithnoscalar; 1678 "" =~ /(?{42})/; 1679 is $^R, 42, 'assigning to *^R does not result in a crash'; 1680 is runperl( 1681 stderr => 1, 1682 prog => 'eval q|' 1683 .' q-..- =~ /(??{undef *^R;q--})(?{42})/; ' 1684 .' print qq-$^R\n-' 1685 .'|' 1686 ), 1687 "42\n", 1688 'undefining *^R within (??{}) does not result in a crash'; 1689 } 1690 1691 SKIP: { # Test literal range end point special handling 1692 unless ($::IS_EBCDIC) { 1693 skip "Valid only for EBCDIC", 24; 1694 } 1695 1696 like("\x89", qr/[i-j]/, '"\x89" should match [i-j]'); 1697 unlike("\x8A", qr/[i-j]/, '"\x8A" shouldnt match [i-j]'); 1698 unlike("\x90", qr/[i-j]/, '"\x90" shouldnt match [i-j]'); 1699 like("\x91", qr/[i-j]/, '"\x91" should match [i-j]'); 1700 1701 like("\x89", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x89" should match [i-\N{LATIN SMALL LETTER J}]'); 1702 unlike("\x8A", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1703 unlike("\x90", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1704 like("\x91", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x91" should match [i-\N{LATIN SMALL LETTER J}]'); 1705 1706 like("\x89", qr/[i-\N{U+6A}]/, '"\x89" should match [i-\N{U+6A}]'); 1707 unlike("\x8A", qr/[i-\N{U+6A}]/, '"\x8A" shouldnt match [i-\N{U+6A}]'); 1708 unlike("\x90", qr/[i-\N{U+6A}]/, '"\x90" shouldnt match [i-\N{U+6A}]'); 1709 like("\x91", qr/[i-\N{U+6A}]/, '"\x91" should match [i-\N{U+6A}]'); 1710 1711 like("\x89", qr/[\N{U+69}-\N{U+6A}]/, '"\x89" should match [\N{U+69}-\N{U+6A}]'); 1712 unlike("\x8A", qr/[\N{U+69}-\N{U+6A}]/, '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]'); 1713 unlike("\x90", qr/[\N{U+69}-\N{U+6A}]/, '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]'); 1714 like("\x91", qr/[\N{U+69}-\N{U+6A}]/, '"\x91" should match [\N{U+69}-\N{U+6A}]'); 1715 1716 like("\x89", qr/[i-\x{91}]/, '"\x89" should match [i-\x{91}]'); 1717 like("\x8A", qr/[i-\x{91}]/, '"\x8A" should match [i-\x{91}]'); 1718 like("\x90", qr/[i-\x{91}]/, '"\x90" should match [i-\x{91}]'); 1719 like("\x91", qr/[i-\x{91}]/, '"\x91" should match [i-\x{91}]'); 1720 1721 # Need to use eval, because tries to compile on ASCII platforms even 1722 # though the tests are skipped, and fails because 0x89-j is an illegal 1723 # range there. 1724 like("\x89", eval 'qr/[\x{89}-j]/', '"\x89" should match [\x{89}-j]'); 1725 like("\x8A", eval 'qr/[\x{89}-j]/', '"\x8A" should match [\x{89}-j]'); 1726 like("\x90", eval 'qr/[\x{89}-j]/', '"\x90" should match [\x{89}-j]'); 1727 like("\x91", eval 'qr/[\x{89}-j]/', '"\x91" should match [\x{89}-j]'); 1728 } 1729 1730 # These are based on looking at the code in regcomp.c 1731 # We don't look for specific code, just the existence of an SSC 1732 foreach my $re (qw( qr/a?c/ 1733 qr/a?c/i 1734 qr/[ab]?c/ 1735 qr/\R?c/ 1736 qr/\d?c/d 1737 qr/\w?c/l 1738 qr/\s?c/a 1739 qr/[[:lower:]]?c/u 1740 )) { 1741 SKIP: { 1742 skip "no re-debug under miniperl" if is_miniperl; 1743 my $prog = <<"EOP"; 1744use re qw(Debug COMPILE); 1745$re; 1746EOP 1747 fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class"); 1748 } 1749 } 1750 1751 { 1752 like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; 1753 } 1754 1755 SKIP: { 1756 skip("Tests are ASCII-centric, some would fail on EBCDIC", 12) if $::IS_EBCDIC; 1757 1758 # Verify that the very last Latin-1 U+00FF 1759 # (LATIN SMALL LETTER Y WITH DIAERESIS) 1760 # and its UPPER counterpart (U+0178 which is pure Unicode), 1761 # and likewise for the very first pure Unicode 1762 # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly, 1763 # and there are no off-by-one logic errors in the transition zone. 1764 1765 ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l"); 1766 ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u"); 1767 ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l"); 1768 ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u"); 1769 1770 # U+00FF with U+05D0 (non-casing Hebrew letter). 1771 ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l"); 1772 ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u"); 1773 ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l"); 1774 ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u"); 1775 1776 # U+0100. 1777 ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u"); 1778 ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l"); 1779 ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u"); 1780 ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l"); 1781 } 1782 1783 { 1784 use utf8; 1785 ok("abc" =~ /a b c/x, "NEL is white-space under /x"); 1786 } 1787 1788 { 1789 ok('a(b)c' =~ qr(a\(b\)c), "'\\(' is a literal in qr(...)"); 1790 ok('a[b]c' =~ qr[a\[b\]c], "'\\[' is a literal in qr[...]"); 1791 ok('a{3}c' =~ qr{a\{3\}c}, # Only failed when { could be a meta 1792 "'\\{' is a literal in qr{...}, where it could be a quantifier"); 1793 1794 # This one is for completeness 1795 ok('a<b>c' =~ qr<a\<b\>c>, "'\\<' is a literal in qr<...>)"); 1796 } 1797 1798 { # Was getting optimized into EXACT (non-folding node) 1799 my $x = qr/[x]/i; 1800 utf8::upgrade($x); 1801 like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case"); 1802 } 1803 1804 { # Special handling of literal-ended ranges in [...] was breaking this 1805 use utf8; 1806 like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]"); 1807 } 1808 1809 { # [perl #123539] 1810 like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1811 like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1812 } 1813 1814 { # [perl #123604] 1815 my($s, $x, @x) = ('abc', 'a', 'd'); 1816 my $long = 'b' x 2000; 1817 my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0}; 1818 $eval =~ s{bbb}{$long}; 1819 my $match = eval $eval; 1820 ok(1, "did not crash"); 1821 ok($match, "[bbb...] resolved as character class, not subscript"); 1822 } 1823 1824 { # [perl #123755] 1825 for my $pat ('(??', '(?P', '(?i-') { 1826 eval qq{ qr/$pat/ }; 1827 ok(1, "qr/$pat/ did not crash"); 1828 eval qq{ qr/${pat}\x{123}/ }; 1829 my $e = $@; 1830 like($e, qr{\x{123}}, 1831 "qr/${pat}x/ shows x in error even if it's a wide character"); 1832 } 1833 } 1834 1835 { 1836 # Expect one of these sizes to cause overflow and wrap to negative 1837 for my $bits (32, 64) { 1838 my $wrapneg = 2 ** ($bits - 2) * 3; 1839 for my $sign ('', '-') { 1840 my $pat = sprintf "qr/(?%s%u)/", $sign, $wrapneg; 1841 eval $pat; 1842 ok(1, "big backref $pat did not crash"); 1843 } 1844 } 1845 } 1846 { 1847 # Test that we handle qr/\8888888/ and variants without an infinite loop, 1848 # we use a test within a test so we can todo it, and make sure we don't 1849 # infinite loop our tests. 1850 # NOTE - Do not put quotes in the code! 1851 # NOTE - We have to triple escape the backref in the pattern below. 1852 my $code=' 1853 BEGIN{require q(./test.pl);} 1854 watchdog(3); 1855 for my $len (1 .. 20) { 1856 my $eights= q(8) x $len; 1857 eval qq{ qr/\\\\$eights/ }; 1858 } 1859 print q(No infinite loop here!); 1860 '; 1861 fresh_perl_is($code, "No infinite loop here!", {}, 1862 "test that we handle things like m/\\888888888/ without infinite loops" ); 1863 } 1864 1865 SKIP: 1866 { # Test that we handle some malformed UTF-8 without looping [perl 1867 # #123562] 1868 skip "no Encode", 1 if is_miniperl; 1869 my $code=' 1870 BEGIN{require q(./test.pl);} 1871 use Encode qw(_utf8_on); 1872 # \x80 and \x41 are continuation bytes in their respective 1873 # character sets 1874 my $malformed = (ord("A") == 65) ? "a\x80\n" : "a\x41\n"; 1875 utf8::downgrade($malformed); 1876 _utf8_on($malformed); 1877 watchdog(3); 1878 $malformed =~ /(\n\r|\r)$/; 1879 print q(No infinite loop here!); 1880 '; 1881 fresh_perl_like($code, qr/Malformed UTF-8 character/, {}, 1882 "test that we handle some UTF-8 malformations without looping" ); 1883 } 1884 1885 { 1886 # [perl #123843] hits SEGV trying to compile this pattern 1887 my $match; 1888 eval q{ ($match) = ("xxyxxyxy" =~ m{(x+(y(?1))*)}) }; 1889 ok(1, "compiled GOSUB in CURLYM ok"); 1890 is($match, 'xxyxxyx', "matched GOSUB in CURLYM"); 1891 } 1892 1893 { 1894 # [perl #123852] doesn't avoid all the capture-related work with 1895 # //n, leading to possible memory corruption 1896 eval q{ qr{()(?1)}n }; 1897 my $error = $@; 1898 ok(1, "qr{()(?1)}n didn't crash"); 1899 like($error, qr{Reference to nonexistent group}, 1900 'gave appropriate error for qr{()(?1)}n'); 1901 } 1902 1903 { 1904 # [perl #126406] panic with unmatchable quantifier 1905 my $code=' 1906 no warnings "regexp"; 1907 "" =~ m/(.0\N{6,0}0\N{6,0}000000000000000000000000000000000)/; 1908 '; 1909 fresh_perl_is($code, "", {}, 1910 "perl [#126406] panic"); 1911 } 1912 { 1913 my $bug="[perl #126182]"; # test for infinite pattern recursion 1914 for my $tuple ( 1915 [ 'q(a)=~/(.(?2))((?<=(?=(?1)).))/', "died", "look ahead left recursion fails fast" ], 1916 [ 'q(aa)=~/(?R)a/', "died", "left-recursion fails fast", ], 1917 [ 'q(bbaa)=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/', 1918 "died", "inter-cyclic optional left recursion dies" ], 1919 [ 'q(abc) =~ /a((?1)?)c/', "died", "optional left recursion dies" ], 1920 [ 'q(abc) =~ /a((?1)??)c/', "died", "min mod left recursion dies" ], 1921 [ 'q(abc) =~ /a((?1)*)c/', "died", "* left recursion dies" ], 1922 [ 'q(abc) =~ /a((?1)+)c/', "died", "+ left recursion dies" ], 1923 [ 'q(abc) =~ /a((?1){0,3})c/', "died", "{0,3} left recursion fails fast" ], 1924 1925 [ 'q(aaabbb)=~/a(?R)?b/', "matched", "optional self recursion works" ], 1926 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?0))*+\\\\))/', "matched", 1927 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1928 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?1))*+\\\\))/', "matched", 1929 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1930 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?0))*\\\\))/', "matched", 1931 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1932 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?1))*\\\\))/', "matched", 1933 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1934 ) { 1935 my ($expr, $expect, $test_name, $cap1)= @$tuple; 1936 # avoid quotes in this code! 1937 my $code=' 1938 BEGIN{require q(./test.pl);} 1939 watchdog(3); 1940 my $status= eval(q{ !(' . $expr . ') ? q(failed) : ' . 1941 ($cap1 ? '($1 ne q['.$cap1.']) ? qq(badmatch:$1) : ' : '') . 1942 ' q(matched) }) 1943 || ( ( $@ =~ /Infinite recursion/ ) ? qq(died) : q(strange-death) ); 1944 print $status; 1945 '; 1946 fresh_perl_is($code, $expect, {}, "$bug - $test_name" ); 1947 } 1948 } 1949 { 1950 my $is_cygwin = $^O eq "cygwin"; 1951 local $::TODO = "this flaps on github cygwin vm, but not on cygwin iron #18129" 1952 if $is_cygwin; 1953 my $expected = "Timeout"; 1954 my $code = ' 1955 BEGIN{require q(test.pl);} 1956 watchdog(3); 1957 $SIG{ALRM} = sub {print "'.$expected.'\n"; exit(1)}; 1958 alarm 1; 1959 $_ = "a" x 1000 . "b" x 1000 . "c" x 1000; 1960 /.*a.*b.*c.*[de]/; 1961 print "increase the multipliers in the regex above to run the regex longer"; 1962 '; 1963 # this flaps on github cygwin vm, but not on cygwin iron #18129 1964 # so on cygwin it's run for 50 seconds to see if it fails eventually 1965 my $max = $is_cygwin ? 50 : 1; 1966 my ($iter, $result, $status); 1967 for my $i (1..$max) { 1968 $iter = $i; 1969 $result = fresh_perl($code,{}); 1970 $status = $?; 1971 last if $result ne $expected; 1972 } 1973 is($result, $expected, "Test Perl 73464") 1974 or diag "PROG:", $code, "STATUS:", $status, "failed on iteration: $iter"; 1975 } 1976 1977 { # [perl #128686], crashed the the interpreter 1978 my $AE = chr utf8::unicode_to_native(0xC6); 1979 my $ae = chr utf8::unicode_to_native(0xE6); 1980 my $re = qr/[$ae\s]/i; 1981 ok($AE !~ $re, '/[\xE6\s]/i doesn\'t match \xC6 when not in UTF-8'); 1982 utf8::upgrade $AE; 1983 ok($AE =~ $re, '/[\xE6\s]/i matches \xC6 when in UTF-8'); 1984 } 1985 1986 { 1987 is(0+("\n" =~ m'\n'), 1, q|m'\n' should interpolate escapes|); 1988 } 1989 1990 { 1991 my $str = "a\xB6"; 1992 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - latin1 case" ); 1993 utf8::upgrade($str); 1994 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - utf8 case" ); 1995 } 1996 { 1997 my $got= run_perl( switches => [ '-l' ], prog => <<'EOF_CODE' ); 1998 my $died= !eval { 1999 $_=qq(ab); 2000 print; 2001 my $p=qr/(?{ s!!x! })/; 2002 /$p/; 2003 print; 2004 /a/; 2005 /$p/; 2006 print; 2007 /b/; 2008 /$p/; 2009 print; 2010 //; 2011 1; 2012 }; 2013 $error = $died ? ($@ || qq(Zombie)) : qq(none); 2014 print $died ? qq(died) : qq(lived); 2015 print qq(Error: $@); 2016EOF_CODE 2017 my @got= split /\n/, $got; 2018 is($got[0],"ab","empty pattern in regex codeblock: got expected start string"); 2019 is($got[1],"xab", 2020 "empty pattern in regex codeblock: first subst with no last-match worked right"); 2021 is($got[2],"xxb","empty pattern in regex codeblock: second subst worked right"); 2022 is($got[3],"xxx","empty pattern in regex codeblock: third subst worked right"); 2023 is($got[4],"died","empty pattern in regex codeblock: died as expected"); 2024 like($got[5],qr/Error: Infinite recursion via empty pattern/, 2025 "empty pattern in regex codeblock: produced the right exception message" ); 2026 } 2027 2028 # This test is based on the one directly above, which happened to 2029 # leak. Repeat the test, but stripped down to the bare essentials 2030 # of the leak, which is to die while executing a regex which is 2031 # already the current regex, thus causing the saved outer set of 2032 # capture offsets to leak. The test itself doesn't do anything 2033 # except sit around hoping not to be triggered by ASan 2034 { 2035 eval { 2036 my $s = "abcd"; 2037 $s =~ m{([abcd]) (?{ die if $1 eq 'd'; })}gx; 2038 $s =~ //g; 2039 $s =~ //g; 2040 $s =~ //g; 2041 }; 2042 pass("call to current regex doesn't leak"); 2043 } 2044 2045 2046 2047 { 2048 # [perl #130495] /x comment skipping stopped a byte short, leading 2049 # to assertion failure or 'malformed utf-8 character" warning 2050 fresh_perl_is( 2051 "use utf8; m{a#\x{124}}x", '', {wide_chars => 1}, 2052 '[perl #130495] utf-8 character at end of /x comment should not misparse', 2053 ); 2054 } 2055 { 2056 # [perl #130522] causes out-of-bounds read detected by clang with 2057 # address=sanitized when length of the STCLASS string is greater than 2058 # length of target string. 2059 my $re = qr{(?=\0z)\0?z?$}i; 2060 my($yes, $no) = (1, ""); 2061 for my $test ( 2062 [ $no, undef, '<undef>' ], 2063 [ $no, '', '' ], 2064 [ $no, "\0", '\0' ], 2065 [ $yes, "\0z", '\0z' ], 2066 [ $no, "\0z\0", '\0z\0' ], 2067 [ $yes, "\0z\n", '\0z\n' ], 2068 ) { 2069 my($result, $target, $disp) = @$test; 2070 no warnings qw/uninitialized/; 2071 is($target =~ $re, $result, "[perl #130522] with target '$disp'"); 2072 } 2073 } 2074 { 2075 # [perl #129377] backref to an unmatched capture should not cause 2076 # reading before start of string. 2077 SKIP: { 2078 skip "no re-debug under miniperl" if is_miniperl; 2079 my $prog = <<'EOP'; 2080use re qw(Debug EXECUTE); 2081"x" =~ m{ () y | () \1 }x; 2082EOP 2083 fresh_perl_like($prog, qr{ 2084 \A (?! .* ^ \s+ - ) 2085 }msx, { stderr => 1 }, "Offsets in debug output are not negative"); 2086 } 2087 } 2088 { 2089 # buffer overflow 2090 2091 # This test also used to leak - fixed by the commit which added 2092 # this line. 2093 2094 fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx", 2095 "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n", 2096 {}, "buffer overflow for regexp component"); 2097 } 2098 { 2099 # [perl #129281] buffer write overflow, detected by ASAN, valgrind 2100 fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much"); 2101 } 2102 { 2103 # RT #131893 - fails with ASAN -fsanitize=undefined 2104 fresh_perl_is('qr/0(0?(0||00*))|/', '', {}, "integer overflow during compilation"); 2105 } 2106 2107 { 2108 # RT #131575 intuit skipping back from the end to find the highest 2109 # possible start point, was potentially hopping back beyond pos() 2110 # and crashing by calling fbm_instr with a negative length 2111 2112 my $text = "=t=\x{5000}"; 2113 pos($text) = 3; 2114 ok(scalar($text !~ m{(~*=[a-z]=)}g), "RT #131575"); 2115 } 2116 { 2117 fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly"); 2118 } 2119 { 2120 fresh_perl_is('$_="0\x{1000000}";/^000?\0000/','',{},"dont throw assert errors trying to fbm past end of string"); 2121 } 2122 { # [perl $132227] 2123 fresh_perl_is("('0ba' . ('ss' x 300)) =~ m/0B\\N{U+41}" . $sharp_s x 150 . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 2124 2125 # A variation, but as far as khw knows not part of 132227 2126 fresh_perl_is("'0bssa' =~ m/0B" . $sharp_s . "\\N{U+41}" . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 2127 } 2128 { # [perl $132164] 2129 fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer"); 2130 } 2131 { # [perl #133642] 2132 fresh_perl_is('no warnings "experimental::vlb"; 2133 m/((?<=(0?)))/', "",{},"Was getting 'Double free'"); 2134 } 2135 { # [perl #133782] 2136 # this would panic on DEBUGGING builds 2137 fresh_perl_is(<<'CODE', "ok\nok\n",{}, 'Bad length magic was left on $^R'); 2138while( "\N{U+100}bc" =~ /(..?)(?{$^N})/g ) { 2139 print "ok\n" if length($^R)==length("$^R"); 2140} 2141CODE 2142 } 2143 { # [perl #133871], ASAN/valgrind out-of-bounds access 2144 fresh_perl_like('qr/(?|(())|())|//', qr/syntax error/, {}, "[perl #133871]"); 2145 } 2146 { # [perl #133871], ASAN/valgrind out-of-bounds access 2147 fresh_perl_like('qr/\p{nv:NAnq}/', qr/Can't find Unicode property definition/, {}, "GH #17367"); 2148 } 2149 { # GH #17370, ASAN/valgrind out-of-bounds access 2150 fresh_perl_like('qr/\p{nv:qnan}/', qr/Can't find Unicode property definition/, {}, "GH #17370"); 2151 } 2152 { # GH #17371, segfault 2153 fresh_perl_like('qr/\p{nv=\\\\\}(?0)|\337ss|\337ss//', qr/Unicode property wildcard not terminated/, {}, "GH #17371"); 2154 } 2155 { # GH #17384, ASAN/valgrind out-of-bounds access 2156 fresh_perl_like('"q0" =~ /\p{__::Is0}/', qr/Unknown user-defined property name \\p\{__::Is0}/, {}, "GH #17384"); 2157 } 2158 2159 SKIP: 2160 { # [perl #133921], segfault 2161 skip "Not valid for EBCDIC", 5 if $::IS_EBCDIC; 2162 2163 fresh_perl_is('qr0||ß+p00000F00000ù\Q00000ÿ00000x00000x0c0e0\Qx0\Qx0\x{0c!}\;\;î0\xÿÿÿþù\Q`\Qx`{0c!}e;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{c!}\;\;îç!}\;îçÿù\Q \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ Fn0t0c d;t ù ç!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); 2164 fresh_perl_is('|ß+W0ü0r0\Qx0\Qx0x0c0G00000000000000000O000000000x0x0x0c!}\;îçÿù\Q0 \xÿÿÿÿù\Q`\Qx`{0d ;ù\ò`\Qm`\x{0c!}\;\;îçÿ ç!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qq`\x{c!}\;\;îç!}\;îçÿù\Q \xÿÿÿÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 0000000Fm0t0c d;t ù ç!00000000000000000000000m/0000000000000000000000000000000m/\x{){} )|i', "", {}, "[perl #133921]"); 2165 2166fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0G0xgive0000000000000O0h000x0 \xòÿÿÿù\Q`\Q 2167 2168 2169 2170 2171 ç 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187x{0c!}\;\;çÿ q0/i0/!F/;îçÿù\Qxÿÿÿÿù`x{0c!}e;ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ u00000F000t0p d? ù ç!00000000000000000000000m/0000000000000000000000000000000m/0\} )|i', "", {}, "[perl #133921]"); 2188 2189 fresh_perl_is('a aúúv sWtrt\ó||ß+Wüefù\Qx`\Qx`\x{1c!gGnuc given1111111111111O1111each111\jx` \xòÿÿÿù\Qx`\Q 2190 2191 2192 2193 2194 2195 ç 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211x{1c!}\;\;îçÿp qr/elsif/!eF/;îçÿù\QxÿÿÿÿùHQx`Lx{1c!}e;ù\Qx`\Qx`\x{1c!}\;ÿÿÿÿc!}\;îçÿù\Qx\xÿÿÿÿ>=\Qx`\Qx`ù\òx`ÿ>=\Qx`\Qx`ù\òx`\Qx`\x{1c!}8;\;îçÿp unshifteFnormat0cmp d?not ùp ç!0000000000000000000000000m/00000000000000000000000000000000m/0R\} )|\aï||K??p¿ÿÿfúd{\{gri{\x{1x/} ð¹NuntiÀh', "", {}, "[perl #133921]"); 2212 2213 fresh_perl_is('s|ß+W0ü0f0\Qx0\Qx0x0c0g0c 000n0000000000000O0h000x0 \xòÿÿÿù\Q`\Q 2214 2215 2216 2217 2218 2219 ç 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235x{0c!}\;\;îçÿ /0f/!F/;îçÿù\Qxÿÿÿÿù`x{0c!};ù\Q`\Qx`\x{0c!}\;ÿÿÿÿ!}\;îçÿù\Q\xÿÿÿÿ>=\Qx`\Qx`ù\ò`ÿ>=\Qx`\Qx`ù\ò`\Qx`\x{0c!};\;îçÿ 000t0F000t0p d?n ù ç!00000000000000000000000m/0000000000000000000000000000000m/\} )|i', "", {}, "[perl #133933]"); 2236 } 2237 2238 { # perl #133998] 2239 fresh_perl_is('print "\x{110000}" =~ qr/(?l)|[^\S\pC\s]/', 1, {}, 2240 '/[\S\s]/l works'); 2241 } 2242 2243 { # perl #133995] 2244 use utf8; 2245 fresh_perl_is('"έδωσαν ελληνικήვე" =~ m/[^0](?=0)0?/', "", 2246 {wide_chars => 1}, 2247 '[^0] doesnt crash on UTF-8 target string'); 2248 } 2249 2250 { # [perl #133992] This is a tokenizer bug of parsing a pattern 2251 fresh_perl_is(q:$z = do { 2252 use utf8; 2253 "q!ÑеÑÑ! =~ m'" 2254 }; 2255 $z .= 'è(?#'; 2256 $z .= "'"; 2257 eval $z;:, "", {}, 'foo'); 2258 } 2259 2260 { # [perl #134325] 2261 my $quote="\\Q"; 2262 my $back="\\\\"; 2263 my $ff="\xff"; 2264 my $s = sprintf "/\\1|(|%s)%s%s /i", 2265 $quote x 8 . $back x 69, 2266 $quote x 5 . $back x 4, 2267 $ff x 48; 2268 like(fresh_perl("$s", { stderr => 1, }), qr/Unmatched \(/); 2269 } 2270 2271 { # GitHub #17196, caused assertion failure 2272 fresh_perl_like('("0" x 258) =~ /(?l)0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000/', 2273 qr/^$/, 2274 {}, 2275 "Assertion failure with /l exact string longer than a single node"); 2276 } 2277 2278SKIP: 2279 { # [perl #134334], Assertion failure 2280 my $utf8_locale = find_utf8_ctype_locale(); 2281 skip "no UTF-8 locale available" unless $utf8_locale; 2282 fresh_perl_like("use POSIX; POSIX::setlocale(&LC_CTYPE, '$utf8_locale'); 'ssss' =~ /\xDF+?sX/il;", 2283 qr/^$/, 2284 {}, 2285 "Assertion failure matching /il on single char folding to multi"); 2286 } 2287 2288 { # Test ANYOFHs 2289 my $pat = qr/[\x{4000001}\x{4000003}\x{4000005}]+/; 2290 unlike("\x{4000000}", $pat, "4000000 isn't in pattern"); 2291 like("\x{4000001}", $pat, "4000001 is in pattern"); 2292 unlike("\x{4000002}", $pat, "4000002 isn't in pattern"); 2293 like("\x{4000003}", $pat, "4000003 is in pattern"); 2294 unlike("\x{4000004}", $pat, "4000004 isn't in pattern"); 2295 like("\x{4000005}", $pat, "4000005 is in pattern"); 2296 unlike("\x{4000006}", $pat, "4000006 isn't in pattern"); 2297 2298 # gh #17319 2299 $pat = qr/[\N{U+200D}\N{U+2000}]()/; 2300 unlike("\x{1FFF}", $pat, "1FFF isn't in pattern"); 2301 like("\x{2000}", $pat, "2000 is in pattern"); 2302 unlike("\x{2001}", $pat, "2001 isn't in pattern"); 2303 unlike("\x{200C}", $pat, "200C isn't in pattern"); 2304 like("\x{200D}", $pat, "200 is in pattern"); 2305 unlike("\x{200E}", $pat, "200E isn't in pattern"); 2306 } 2307 2308 # gh17490: test recursion check 2309 { 2310 my $eval = '(?{1})'; 2311 my $re = sprintf '(?&FOO)(?(DEFINE)(?<FOO>%sfoo))', $eval x 20; 2312 my $result = eval qq{"foo" =~ /$re/}; 2313 is($@ // '', '', "many evals did not die"); 2314 ok($result, "regexp correctly matched"); 2315 } 2316 2317 # gh16947: test regexp corruption (GOSUB) 2318 { 2319 fresh_perl_is(q{ 2320 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok' 2321 }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)'); 2322 } 2323 # gh16947: test fix doesn't break SUSPEND 2324 { 2325 fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' }, 2326 'ok', {}, "gh16947: test fix doesn't break SUSPEND"); 2327 } 2328 2329 # gh17730: should not crash 2330 { 2331 fresh_perl_is(q{ 2332 "q00" =~ m{(((*ACCEPT)0)*00)?0(?1)}; print "ok" 2333 }, 'ok', {}, 'gh17730: should not crash'); 2334 } 2335 2336 # gh17743: more regexp corruption via GOSUB 2337 { 2338 fresh_perl_is(q{ 2339 "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok" 2340 }, 'ok', {}, 'gh17743: test regexp corruption (1)'); 2341 2342 fresh_perl_is(q{ 2343 "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/; 2344 print "ok" 2345 }, 'ok', {}, 'gh17743: test regexp corruption (2)'); 2346 } 2347 2348} # End of sub run_tests 2349 23501; 2351 2352# 2353# ex: set ts=8 sts=4 sw=4 et: 2354# 2355