1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9use 5.010; 10 11sub run_tests; 12 13$| = 1; 14 15 16BEGIN { 17 chdir 't' if -d 't'; 18 require Config; import Config; 19 require './test.pl'; require './charset_tools.pl'; 20 require './loc_tools.pl'; 21 set_up_inc('../lib', '.', '../ext/re'); 22} 23 skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; 24 skip_all_without_unicode_tables(); 25 26plan tests => 848; # Update this when adding/deleting tests. 27 28run_tests() unless caller; 29 30# 31# Tests start here. 32# 33sub run_tests { 34 35 my $sharp_s = uni_to_native("\xdf"); 36 37 { 38 my $x = "abc\ndef\n"; 39 (my $x_pretty = $x) =~ s/\n/\\n/g; 40 41 ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/]; 42 ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/]; 43 44 # used to be a test for $* 45 ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m]; 46 47 ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]); 48 ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]); 49 50 ok $x =~ /def/, qq ["$x_pretty" =~ /def/]; 51 ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]); 52 53 ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/]; 54 ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]); 55 56 ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/]; 57 ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]); 58 } 59 60 { 61 $_ = '123'; 62 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; 63 } 64 65 { 66 $_ = 'aaabbbccc'; 67 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', 68 qq [\$_ = '$_'; /(a*b*)(c*)/]; 69 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; 70 unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]); 71 72 $_ = 'aaabccc'; 73 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; 74 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 75 76 $_ = 'aaaccc'; 77 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 78 unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]); 79 80 $_ = 'abcdef'; 81 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; 82 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; 83 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; 84 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; 85 } 86 87 { 88 # used to be a test for $* 89 ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m]; 90 } 91 92 { 93 our %XXX = map {($_ => $_)} 123, 234, 345; 94 95 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); 96 while ($_ = shift(@XXX)) { 97 my $e = index ($_, 'not') >= 0 ? '' : 1; 98 my $r = m?(.*)?; 99 is($r, $e, "?(.*)?"); 100 /not/ && reset; 101 if (/not ok 2/) { 102 if ($^O eq 'VMS') { 103 $_ = shift(@XXX); 104 } 105 else { 106 reset 'X'; 107 } 108 } 109 } 110 111 SKIP: { 112 if ($^O eq 'VMS') { 113 skip "Reset 'X'", 1; 114 } 115 ok !keys %XXX, "%XXX is empty"; 116 } 117 118 } 119 120 { 121 my $message = "Test empty pattern"; 122 my $xyz = 'xyz'; 123 my $cde = 'cde'; 124 125 $cde =~ /[^ab]*/; 126 $xyz =~ //; 127 is($&, $xyz, $message); 128 129 my $foo = '[^ab]*'; 130 $cde =~ /$foo/; 131 $xyz =~ //; 132 is($&, $xyz, $message); 133 134 $cde =~ /$foo/; 135 my $null; 136 no warnings 'uninitialized'; 137 $xyz =~ /$null/; 138 is($&, $xyz, $message); 139 140 $null = ""; 141 $xyz =~ /$null/; 142 is($&, $xyz, $message); 143 144 # each entry: regexp, match string, $&, //o match success 145 my @tests = 146 ( 147 [ "", "xy", "x", 1 ], 148 [ "y", "yz", "y", !1 ], 149 ); 150 for my $test (@tests) { 151 my ($re, $str, $matched, $omatch) = @$test; 152 $xyz =~ /x/o; 153 ok($str =~ /$re/, "$str matches /$re/"); 154 is($&, $matched, "on $matched"); 155 $xyz =~ /x/o; 156 is($str =~ /$re/o, $omatch, "$str matches /$re/o (or not)"); 157 } 158 } 159 160 { 161 my $message = q !Check $`, $&, $'!; 162 $_ = 'abcdefghi'; 163 /def/; # optimized up to cmd 164 is("$`:$&:$'", 'abc:def:ghi', $message); 165 166 no warnings 'void'; 167 /cde/ + 0; # optimized only to spat 168 is("$`:$&:$'", 'ab:cde:fghi', $message); 169 170 /[d][e][f]/; # not optimized 171 is("$`:$&:$'", 'abc:def:ghi', $message); 172 } 173 174 { 175 $_ = 'now is the {time for all} good men to come to.'; 176 / \{([^}]*)}/; 177 is($1, 'time for all', "Match braces"); 178 } 179 180 { 181 my $message = "{N,M} quantifier"; 182 $_ = 'xxx {3,4} yyy zzz'; 183 ok(/( {3,4})/, $message); 184 is($1, ' ', $message); 185 unlike($_, qr/( {4,})/, $message); 186 ok(/( {2,3}.)/, $message); 187 is($1, ' y', $message); 188 ok(/(y{2,3}.)/, $message); 189 is($1, 'yyy ', $message); 190 unlike($_, qr/x {3,4}/, $message); 191 unlike($_, qr/^xxx {3,4}/, $message); 192 } 193 194 { 195 my $message = "Test /g"; 196 local $" = ":"; 197 $_ = "now is the time for all good men to come to."; 198 my @words = /(\w+)/g; 199 my $exp = "now:is:the:time:for:all:good:men:to:come:to"; 200 201 is("@words", $exp, $message); 202 203 @words = (); 204 while (/\w+/g) { 205 push (@words, $&); 206 } 207 is("@words", $exp, $message); 208 209 @words = (); 210 pos = 0; 211 while (/to/g) { 212 push(@words, $&); 213 } 214 is("@words", "to:to", $message); 215 216 pos $_ = 0; 217 @words = /to/g; 218 is("@words", "to:to", $message); 219 } 220 221 { 222 $_ = "abcdefghi"; 223 224 my $pat1 = 'def'; 225 my $pat2 = '^def'; 226 my $pat3 = '.def.'; 227 my $pat4 = 'abc'; 228 my $pat5 = '^abc'; 229 my $pat6 = 'abc$'; 230 my $pat7 = 'ghi'; 231 my $pat8 = '\w*ghi'; 232 my $pat9 = 'ghi$'; 233 234 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = 235 my $t6 = my $t7 = my $t8 = my $t9 = 0; 236 237 for my $iter (1 .. 5) { 238 $t1++ if /$pat1/o; 239 $t2++ if /$pat2/o; 240 $t3++ if /$pat3/o; 241 $t4++ if /$pat4/o; 242 $t5++ if /$pat5/o; 243 $t6++ if /$pat6/o; 244 $t7++ if /$pat7/o; 245 $t8++ if /$pat8/o; 246 $t9++ if /$pat9/o; 247 } 248 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; 249 is($x, '505550555', "Test /o"); 250 } 251 252 { 253 my $xyz = 'xyz'; 254 ok "abc" =~ /^abc$|$xyz/, "| after \$"; 255 256 # perl 4.009 says "unmatched ()" 257 my $message = '$ inside ()'; 258 259 my $result; 260 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; 261 is($@, "", $message); 262 is($result, "abc:bc", $message); 263 } 264 265 { 266 my $message = "Scalar /g"; 267 $_ = "abcfooabcbar"; 268 269 ok( /abc/g && $` eq "", $message); 270 ok( /abc/g && $` eq "abcfoo", $message); 271 ok(!/abc/g, $message); 272 273 $message = "Scalar /gi"; 274 pos = 0; 275 ok( /ABC/gi && $` eq "", $message); 276 ok( /ABC/gi && $` eq "abcfoo", $message); 277 ok(!/ABC/gi, $message); 278 279 $message = "Scalar /g"; 280 pos = 0; 281 ok( /abc/g && $' eq "fooabcbar", $message); 282 ok( /abc/g && $' eq "bar", $message); 283 284 $_ .= ''; 285 my @x = /abc/g; 286 is(@x, 2, "/g reset after assignment"); 287 } 288 289 { 290 my $message = '/g, \G and pos'; 291 $_ = "abdc"; 292 pos $_ = 2; 293 /\Gc/gc; 294 is(pos $_, 2, $message); 295 /\Gc/g; 296 is(pos $_, undef, $message); 297 } 298 299 { 300 my $message = '(?{ })'; 301 our $out = 1; 302 'abc' =~ m'a(?{ $out = 2 })b'; 303 is($out, 2, $message); 304 305 $out = 1; 306 'abc' =~ m'a(?{ $out = 3 })c'; 307 is($out, 1, $message); 308 } 309 310 { 311 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; 312 my @out = /(?<!foo)bar./g; 313 is("@out", 'bar2 barf', "Negative lookbehind"); 314 } 315 316 { 317 my $message = "REG_INFTY tests"; 318 # Tests which depend on REG_INFTY 319 320 # Defaults assumed if this fails 321 eval { require Config; }; 322 $::reg_infty = $Config::Config{reg_infty} // 32767; 323 $::reg_infty_m = $::reg_infty - 1; 324 $::reg_infty_p = $::reg_infty + 1; 325 $::reg_infty_m = $::reg_infty_m; # Suppress warning. 326 327 # As well as failing if the pattern matches do unexpected things, the 328 # next three tests will fail if you should have picked up a lower-than- 329 # default value for $reg_infty from Config.pm, but have not. 330 331 is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message); 332 is($@, '', $message); 333 is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message); 334 is($@, '', $message); 335 isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message); 336 is($@, '', $message); 337 338 eval "'aaa' =~ /a{1,$::reg_infty}/"; 339 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 340 eval "'aaa' =~ /a{1,$::reg_infty_p}/"; 341 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 342 } 343 344 { 345 # Poke a couple more parse failures 346 my $context = 'x' x 256; 347 eval qq("${context}y" =~ /(?<=$context)y/); 348 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; 349 } 350 351 { 352 # Long Monsters 353 for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory 354 my $a = 'a' x $l; 355 my $message = "Long monster, length = $l"; 356 like("ba$a=", qr/a$a=/, $message); 357 unlike("b$a=", qr/a$a=/, $message); 358 like("b$a=", qr/ba+=/, $message); 359 360 like("ba$a=", qr/b(?:a|b)+=/, $message); 361 } 362 } 363 364 { 365 # 20000 nodes, each taking 3 words per string, and 1 per branch 366 my $long_constant_len = join '|', 12120 .. 32645; 367 my $long_var_len = join '|', 8120 .. 28645; 368 my %ans = ( 'ax13876y25677lbc' => 1, 369 'ax13876y25677mcb' => 0, # not b. 370 'ax13876y35677nbc' => 0, # Num too big 371 'ax13876y25677y21378obc' => 1, 372 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] 373 'ax13876y25677y21378y21378kbc' => 1, 374 'ax13876y25677y21378y21378kcb' => 0, # Not b. 375 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs 376 ); 377 378 for (keys %ans) { 379 my $message = "20000 nodes, const-len '$_'"; 380 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message; 381 382 $message = "20000 nodes, var-len '$_'"; 383 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message; 384 } 385 } 386 387 { 388 my $message = "Complicated backtracking"; 389 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; 390 my $expect = "(bla()) ((l)u((e))) (l(e)e)"; 391 392 our $c; 393 sub matchit { 394 m/ 395 ( 396 \( 397 (?{ $c = 1 }) # Initialize 398 (?: 399 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop 400 (?! 401 ) # Fail: will unwind one iteration back 402 ) 403 (?: 404 [^()]+ # Match a big chunk 405 (?= 406 [()] 407 ) # Do not try to match subchunks 408 | 409 \( 410 (?{ ++$c }) 411 | 412 \) 413 (?{ --$c }) 414 ) 415 )+ # This may not match with different subblocks 416 ) 417 (?(?{ $c != 0 }) 418 (?! 419 ) # Fail 420 ) # Otherwise the chunk 1 may succeed with $c>0 421 /xg; 422 } 423 424 my @ans = (); 425 my $res; 426 push @ans, $res while $res = matchit; 427 is("@ans", "1 1 1", $message); 428 429 @ans = matchit; 430 is("@ans", $expect, $message); 431 432 $message = "Recursion with (??{ })"; 433 our $matched; 434 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; 435 436 @ans = my @ans1 = (); 437 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; 438 439 is("@ans", "1 1 1", $message); 440 is("@ans1", $expect, $message); 441 442 @ans = m/$matched/g; 443 is("@ans", $expect, $message); 444 445 } 446 447 { 448 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; 449 } 450 451 { 452 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad 453 is("@ans", 'a/ b', "Stack may be bad"); 454 } 455 456 { 457 my $message = "Eval-group not allowed at runtime"; 458 my $code = '{$blah = 45}'; 459 our $blah = 12; 460 eval { /(?$code)/ }; 461 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 462 463 $blah = 12; 464 my $res = eval { "xx" =~ /(?$code)/o }; 465 { 466 no warnings 'uninitialized'; 467 chomp $@; my $message = "$message '$@', '$res', '$blah'"; 468 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 469 } 470 471 $code = '=xx'; 472 $blah = 12; 473 $res = eval { "xx" =~ /(?$code)/o }; 474 { 475 no warnings 'uninitialized'; 476 my $message = "$message '$@', '$res', '$blah'"; 477 ok(!$@ && $res, $message); 478 } 479 480 $code = '{$blah = 45}'; 481 $blah = 12; 482 eval "/(?$code)/"; 483 is($blah, 45, $message); 484 485 $blah = 12; 486 /(?{$blah = 45})/; 487 is($blah, 45, $message); 488 } 489 490 { 491 my $message = "Pos checks"; 492 my $x = 'banana'; 493 $x =~ /.a/g; 494 is(pos $x, 2, $message); 495 496 $x =~ /.z/gc; 497 is(pos $x, 2, $message); 498 499 sub f { 500 my $p = $_[0]; 501 return $p; 502 } 503 504 $x =~ /.a/g; 505 is(f (pos $x), 4, $message); 506 } 507 508 { 509 my $message = 'Checking $^R'; 510 our $x = $^R = 67; 511 'foot' =~ /foo(?{$x = 12; 75})[t]/; 512 is($^R, 75, $message); 513 514 $x = $^R = 67; 515 'foot' =~ /foo(?{$x = 12; 75})[xy]/; 516 ok($^R eq '67' && $x eq '12', $message); 517 518 $x = $^R = 67; 519 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; 520 ok($^R eq '79' && $x eq '12', $message); 521 } 522 523 { 524 is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i'); 525 is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'); 526 is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'); 527 is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'); 528 is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'); 529 is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'); 530 } 531 532 { # Test that charset modifier work, and are interpolated 533 is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'); 534 is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'); 535 is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'); 536 is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'); 537 is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); 538 539 my $dual = qr/\b\v$/; 540 my $locale; 541 542 SKIP: { 543 skip 'Locales not available', 1 unless locales_enabled('LC_CTYPE'); 544 545 use locale; 546 $locale = qr/\b\v$/; 547 is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); 548 no locale; 549 } 550 551 use feature 'unicode_strings'; 552 my $unicode = qr/\b\v$/; 553 is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); 554 is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 555 556 SKIP: { 557 skip 'Locales not available', 1 unless locales_enabled('LC_CTYPE'); 558 559 is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); 560 } 561 562 no feature 'unicode_strings'; 563 SKIP: { 564 skip 'Locales not available', 1 unless locales_enabled('LC_CTYPE'); 565 is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); 566 } 567 568 is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); 569 570 SKIP: { 571 skip 'Locales not available', 2 unless locales_enabled('LC_CTYPE'); 572 573 use locale; 574 is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 575 is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); 576 } 577 } 578 579 { 580 my $message = "Look around"; 581 $_ = 'xabcx'; 582 foreach my $ans ('', 'c') { 583 ok(/(?<=(?=a)..)((?=c)|.)/g, $message); 584 is($1, $ans, $message); 585 } 586 } 587 588 { 589 my $message = "Empty clause"; 590 $_ = 'a'; 591 foreach my $ans ('', 'a', '') { 592 ok(/^|a|$/g, $message); 593 is($&, $ans, $message); 594 } 595 } 596 597 { 598 sub prefixify { 599 my $message = "Prefixify"; 600 { 601 my ($v, $a, $b, $res) = @_; 602 ok($v =~ s/\Q$a\E/$b/, $message); 603 is($v, $res, $message); 604 } 605 } 606 607 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); 608 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); 609 } 610 611 { 612 $_ = 'var="foo"'; 613 /(\")/; 614 ok $1 && /$1/, "Capture a quote"; 615 } 616 617 { 618 no warnings 'closure'; 619 my $message = '(?{ $var } refers to package vars'; 620 package aa; 621 our $c = 2; 622 $::c = 3; 623 '' =~ /(?{ $c = 4 })/; 624 main::is($c, 4, $message); 625 main::is($::c, 3, $message); 626 } 627 628 { 629 is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef); 630 like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/, 631 'POSIX class [: :] must have valid name'); 632 633 for my $d (qw [= .]) { 634 is(eval "/[[${d}foo${d}]]/", undef); 635 like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/, 636 "POSIX syntax [[$d $d]] is an error"); 637 } 638 } 639 640 { 641 # test if failure of patterns returns empty list 642 my $message = "Failed pattern returns empty list"; 643 $_ = 'aaa'; 644 @_ = /bbb/; 645 is("@_", "", $message); 646 647 @_ = /bbb/g; 648 is("@_", "", $message); 649 650 @_ = /(bbb)/; 651 is("@_", "", $message); 652 653 @_ = /(bbb)/g; 654 is("@_", "", $message); 655 } 656 657 { 658 my $message = '@- and @+ and @{^CAPTURE} tests'; 659 660 $_= "ace"; 661 /c(?=.$)/; 662 is($#{^CAPTURE}, -1, $message); 663 is($#+, 0, $message); 664 is($#-, 0, $message); 665 is($+ [0], 2, $message); 666 is($- [0], 1, $message); 667 ok(!defined $+ [1] && !defined $- [1] && 668 !defined $+ [2] && !defined $- [2], $message); 669 670 /a(c)(e)/; 671 is($#{^CAPTURE}, 1, $message); # one less than $#- 672 is($#+, 2, $message); 673 is($#-, 2, $message); 674 is($+ [0], 3, $message); 675 is($- [0], 0, $message); 676 is(${^CAPTURE}[0], "c", $message); 677 is($+ [1], 2, $message); 678 is($- [1], 1, $message); 679 is(${^CAPTURE}[1], "e", $message); 680 is($+ [2], 3, $message); 681 is($- [2], 2, $message); 682 ok(!defined $+ [3] && !defined $- [3] && 683 !defined ${^CAPTURE}[2] && !defined ${^CAPTURE}[3] && 684 !defined $+ [4] && !defined $- [4], $message); 685 686 # Exists has a special check for @-/@+ - bug 45147 687 ok(exists $-[0], $message); 688 ok(exists $+[0], $message); 689 ok(exists ${^CAPTURE}[0], $message); 690 ok(exists ${^CAPTURE}[1], $message); 691 ok(exists $-[2], $message); 692 ok(exists $+[2], $message); 693 ok(!exists ${^CAPTURE}[2], $message); 694 ok(!exists $-[3], $message); 695 ok(!exists $+[3], $message); 696 ok(exists ${^CAPTURE}[-1], $message); 697 ok(exists ${^CAPTURE}[-2], $message); 698 ok(exists $-[-1], $message); 699 ok(exists $+[-1], $message); 700 ok(exists $-[-3], $message); 701 ok(exists $+[-3], $message); 702 ok(!exists $-[-4], $message); 703 ok(!exists $+[-4], $message); 704 ok(!exists ${^CAPTURE}[-3], $message); 705 706 707 /.(c)(b)?(e)/; 708 is($#{^CAPTURE}, 2, $message); # one less than $#- 709 is($#+, 3, $message); 710 is($#-, 3, $message); 711 is(${^CAPTURE}[0], "c", $message); 712 is(${^CAPTURE}[2], "e", $message . "[$1 $3]"); 713 is($+ [1], 2, $message); 714 is($- [1], 1, $message); 715 is($+ [3], 3, $message); 716 is($- [3], 2, $message); 717 ok(!defined $+ [2] && !defined $- [2] && 718 !defined $+ [4] && !defined $- [4] && 719 !defined ${^CAPTURE}[1], $message); 720 721 /.(c)/; 722 is($#{^CAPTURE}, 0, $message); # one less than $#- 723 is($#+, 1, $message); 724 is($#-, 1, $message); 725 is(${^CAPTURE}[0], "c", $message); 726 is($+ [0], 2, $message); 727 is($- [0], 0, $message); 728 is($+ [1], 2, $message); 729 is($- [1], 1, $message); 730 ok(!defined $+ [2] && !defined $- [2] && 731 !defined $+ [3] && !defined $- [3] && 732 !defined ${^CAPTURE}[1], $message); 733 734 /.(c)(ba*)?/; 735 is($#{^CAPTURE}, 0, $message); # one less than $#- 736 is($#+, 2, $message); 737 is($#-, 1, $message); 738 739 # Check that values don’t stick 740 " "=~/()()()(.)(..)/; 741 my($m,$p,$q) = (\$-[5], \$+[5], \${^CAPTURE}[4]); 742 () = "$$_" for $m, $p, $q; # FETCH (or eqv.) 743 " " =~ /()/; 744 is $$m, undef, 'values do not stick to @- elements'; 745 is $$p, undef, 'values do not stick to @+ elements'; 746 is $$q, undef, 'values do not stick to @{^CAPTURE} elements'; 747 } 748 749 foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', 750 '${^CAPTURE}[0] = 13', 751 '@- = qw (foo bar)', '$^N = 42') { 752 is(eval $_, undef); 753 like($@, qr/^Modification of a read-only value attempted/, 754 '$^N, @- and @+ are read-only'); 755 } 756 757 { 758 my $message = '\G testing'; 759 $_ = 'aaa'; 760 pos = 1; 761 my @a = /\Ga/g; 762 is("@a", "a a", $message); 763 764 my $str = 'abcde'; 765 pos $str = 2; 766 unlike($str, qr/^\G/, $message); 767 unlike($str, qr/^.\G/, $message); 768 like($str, qr/^..\G/, $message); 769 unlike($str, qr/^...\G/, $message); 770 ok($str =~ /\G../ && $& eq 'cd', $message); 771 ok($str =~ /.\G./ && $& eq 'bc', $message); 772 773 } 774 775 { 776 my $message = '\G and intuit and anchoring'; 777 $_ = "abcdef"; 778 pos = 0; 779 ok($_ =~ /\Gabc/, $message); 780 ok($_ =~ /^\Gabc/, $message); 781 782 pos = 3; 783 ok($_ =~ /\Gdef/, $message); 784 pos = 3; 785 ok($_ =~ /\Gdef$/, $message); 786 pos = 3; 787 ok($_ =~ /abc\Gdef$/, $message); 788 pos = 3; 789 ok($_ =~ /^abc\Gdef$/, $message); 790 pos = 3; 791 ok($_ =~ /c\Gd/, $message); 792 pos = 3; 793 ok($_ =~ /..\GX?def/, $message); 794 } 795 796 { 797 my $s = '123'; 798 pos($s) = 1; 799 my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1 800 is("@a", "1", '\G looping'); 801 } 802 803 804 { 805 my $message = 'pos inside (?{ })'; 806 my $str = 'abcde'; 807 our ($foo, $bar); 808 like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message); 809 is($foo, $str, $message); 810 is($bar, 2, $message); 811 is(pos $str, undef, $message); 812 813 undef $foo; 814 undef $bar; 815 pos $str = undef; 816 ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message); 817 is($foo, $str, $message); 818 is($bar, 2, $message); 819 is(pos $str, 3, $message); 820 821 $_ = $str; 822 undef $foo; 823 undef $bar; 824 like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message); 825 is($foo, $str, $message); 826 is($bar, 2, $message); 827 828 undef $foo; 829 undef $bar; 830 ok(/b(?{$foo = $_; $bar = pos})c/g, $message); 831 is($foo, $str, $message); 832 is($bar, 2, $message); 833 is(pos, 3, $message); 834 835 undef $foo; 836 undef $bar; 837 pos = undef; 838 1 while /b(?{$foo = $_; $bar = pos})c/g; 839 is($foo, $str, $message); 840 is($bar, 2, $message); 841 is(pos, undef, $message); 842 843 undef $foo; 844 undef $bar; 845 $_ = 'abcde|abcde'; 846 ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message); 847 is($foo, 'abcde|abcde', $message); 848 is($bar, 8, $message); 849 is($_, 'axde|axde', $message); 850 851 # List context: 852 $_ = 'abcde|abcde'; 853 our @res; 854 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; 855 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 856 is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message); 857 858 @res = (); 859 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; 860 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 861 is("@res", "'' 'ab' 'cde|abcde' " . 862 "'' 'abc' 'de|abcde' " . 863 "'abcd' 'e|' 'abcde' " . 864 "'abcde|' 'ab' 'cde' " . 865 "'abcde|' 'abc' 'de'", $message); 866 } 867 868 { 869 my $message = '\G anchor checks'; 870 my $foo = 'aabbccddeeffgg'; 871 pos ($foo) = 1; 872 873 ok($foo =~ /.\G(..)/g, $message); 874 is($1, 'ab', $message); 875 876 pos ($foo) += 1; 877 ok($foo =~ /.\G(..)/g, $message); 878 is($1, 'cc', $message); 879 880 pos ($foo) += 1; 881 ok($foo =~ /.\G(..)/g, $message); 882 is($1, 'de', $message); 883 884 ok($foo =~ /\Gef/g, $message); 885 886 undef pos $foo; 887 ok($foo =~ /\G(..)/g, $message); 888 is($1, 'aa', $message); 889 890 ok($foo =~ /\G(..)/g, $message); 891 is($1, 'bb', $message); 892 893 pos ($foo) = 5; 894 ok($foo =~ /\G(..)/g, $message); 895 is($1, 'cd', $message); 896 } 897 898 { 899 my $message = 'basic \G floating checks'; 900 my $foo = 'aabbccddeeffgg'; 901 pos ($foo) = 1; 902 903 ok($foo =~ /a+\G(..)/g, "$message: a+\\G"); 904 is($1, 'ab', "$message: ab"); 905 906 pos ($foo) += 1; 907 ok($foo =~ /b+\G(..)/g, "$message: b+\\G"); 908 is($1, 'cc', "$message: cc"); 909 910 pos ($foo) += 1; 911 ok($foo =~ /d+\G(..)/g, "$message: d+\\G"); 912 is($1, 'de', "$message: de"); 913 914 ok($foo =~ /\Gef/g, "$message: \\Gef"); 915 916 pos ($foo) = 1; 917 918 ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)"); 919 is($1, 'aa', "$message: aa"); 920 921 pos ($foo) = 2; 922 923 ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)"); 924 is($1, 'ab', "$message: ab"); 925 926 } 927 928 { 929 $_ = '123x123'; 930 my @res = /(\d*|x)/g; 931 local $" = '|'; 932 is("@res", "123||x|123|", "0 match in alternation"); 933 } 934 935 { 936 my $message = "Match against temporaries (created via pp_helem())" . 937 " is safe"; 938 ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message); 939 is($1, "bar", $message); 940 } 941 942 { 943 my $message = 'package $i inside (?{ }), ' . 944 'saved substrings and changing $_'; 945 our @a = qw [foo bar]; 946 our @b = (); 947 s/(\w)(?{push @b, $1})/,$1,/g for @a; 948 is("@b", "f o o b a r", $message); 949 is("@a", ",f,,o,,o, ,b,,a,,r,", $message); 950 951 $message = 'lexical $i inside (?{ }), ' . 952 'saved substrings and changing $_'; 953 no warnings 'closure'; 954 my @c = qw [foo bar]; 955 my @d = (); 956 s/(\w)(?{push @d, $1})/,$1,/g for @c; 957 is("@d", "f o o b a r", $message); 958 is("@c", ",f,,o,,o, ,b,,a,,r,", $message); 959 } 960 961 { 962 my $message = 'Brackets'; 963 our $brackets; 964 $brackets = qr { 965 { (?> [^{}]+ | (??{ $brackets }) )* } 966 }x; 967 968 ok("{{}" =~ $brackets, $message); 969 is($&, "{}", $message); 970 ok("something { long { and } hairy" =~ $brackets, $message); 971 is($&, "{ and }", $message); 972 ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message); 973 is($&, "{ and }", $message); 974 } 975 976 { 977 $_ = "a-a\nxbb"; 978 pos = 1; 979 ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'); 980 } 981 982 { 983 my $message = '\G anchor checks'; 984 my $text = "aaXbXcc"; 985 pos ($text) = 0; 986 ok($text !~ /\GXb*X/g, $message); 987 } 988 989 { 990 $_ = "xA\n" x 500; 991 unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'); 992 993 my $text = "abc dbf"; 994 my @res = ($text =~ /.*?(b).*?\b/g); 995 is("@res", "b b", '\b is not special'); 996 } 997 998 { 999 my $message = '\S, [\S], \s, [\s]'; 1000 my @a = map chr, 0 .. 255; 1001 my @b = grep m/\S/, @a; 1002 my @c = grep m/[^\s]/, @a; 1003 is("@b", "@c", $message); 1004 1005 @b = grep /\S/, @a; 1006 @c = grep /[\S]/, @a; 1007 is("@b", "@c", $message); 1008 1009 @b = grep /\s/, @a; 1010 @c = grep /[^\S]/, @a; 1011 is("@b", "@c", $message); 1012 1013 @b = grep /\s/, @a; 1014 @c = grep /[\s]/, @a; 1015 is("@b", "@c", $message); 1016 1017 # Test an inverted posix class with a char also in the class. 1018 my $nbsp = chr utf8::unicode_to_native(0xA0); 1019 my $non_s = chr utf8::unicode_to_native(0xA1); 1020 my $pat_string = "[^\\S ]"; 1021 unlike(" ", qr/$pat_string/, "Verify ' ' !~ /$pat_string/"); 1022 like("\t", qr/$pat_string/, "Verify '\\t =~ /$pat_string/"); 1023 unlike($nbsp, qr/$pat_string/, "Verify non-utf8-NBSP !~ /$pat_string/"); 1024 utf8::upgrade($nbsp); 1025 like($nbsp, qr/$pat_string/, "Verify utf8-NBSP =~ /$pat_string/"); 1026 unlike($non_s, qr/$pat_string/, "Verify non-utf8-inverted-bang !~ /$pat_string/"); 1027 utf8::upgrade($non_s); 1028 unlike($non_s, qr/$pat_string/, "Verify utf8-inverted-bang !~ /$pat_string/"); 1029 } 1030 { 1031 my $message = '\D, [\D], \d, [\d]'; 1032 my @a = map chr, 0 .. 255; 1033 my @b = grep /\D/, @a; 1034 my @c = grep /[^\d]/, @a; 1035 is("@b", "@c", $message); 1036 1037 @b = grep /\D/, @a; 1038 @c = grep /[\D]/, @a; 1039 is("@b", "@c", $message); 1040 1041 @b = grep /\d/, @a; 1042 @c = grep /[^\D]/, @a; 1043 is("@b", "@c", $message); 1044 1045 @b = grep /\d/, @a; 1046 @c = grep /[\d]/, @a; 1047 is("@b", "@c", $message); 1048 } 1049 { 1050 my $message = '\W, [\W], \w, [\w]'; 1051 my @a = map chr, 0 .. 255; 1052 my @b = grep /\W/, @a; 1053 my @c = grep /[^\w]/, @a; 1054 is("@b", "@c", $message); 1055 1056 @b = grep /\W/, @a; 1057 @c = grep /[\W]/, @a; 1058 is("@b", "@c", $message); 1059 1060 @b = grep /\w/, @a; 1061 @c = grep /[^\W]/, @a; 1062 is("@b", "@c", $message); 1063 1064 @b = grep /\w/, @a; 1065 @c = grep /[\w]/, @a; 1066 is("@b", "@c", $message); 1067 } 1068 1069 { 1070 # see if backtracking optimization works correctly 1071 my $message = 'Backtrack optimization'; 1072 like("\n\n", qr/\n $ \n/x, $message); 1073 like("\n\n", qr/\n* $ \n/x, $message); 1074 like("\n\n", qr/\n+ $ \n/x, $message); 1075 like("\n\n", qr/\n? $ \n/x, $message); 1076 like("\n\n", qr/\n*? $ \n/x, $message); 1077 like("\n\n", qr/\n+? $ \n/x, $message); 1078 like("\n\n", qr/\n?? $ \n/x, $message); 1079 unlike("\n\n", qr/\n*+ $ \n/x, $message); 1080 unlike("\n\n", qr/\n++ $ \n/x, $message); 1081 like("\n\n", qr/\n?+ $ \n/x, $message); 1082 } 1083 1084 { 1085 package S; 1086 use overload '""' => sub {'Object S'}; 1087 sub new {bless []} 1088 1089 my $message = "Ref stringification"; 1090 ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message); 1091 ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message); 1092 ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message); 1093 ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message); 1094 ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message); 1095 } 1096 1097 { 1098 my $message = "Test result of match used as match"; 1099 ok('a1b' =~ ('xyz' =~ /y/), $message); 1100 is($`, 'a', $message); 1101 ok('a1b' =~ ('xyz' =~ /t/), $message); 1102 is($`, 'a', $message); 1103 } 1104 1105 { 1106 my $message = '"1" is not \s'; 1107 warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)}, 1108 undef, "$message (did not warn)"); 1109 } 1110 1111 { 1112 my $message = '\s, [[:space:]] and [[:blank:]]'; 1113 my %space = (spc => " ", 1114 tab => "\t", 1115 cr => "\r", 1116 lf => "\n", 1117 ff => "\f", 1118 # There's no \v but the vertical tabulator seems miraculously 1119 # be 11 both in ASCII and EBCDIC. 1120 vt => chr(11), 1121 false => "space"); 1122 1123 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; 1124 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; 1125 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; 1126 1127 is("@space0", "cr ff lf spc tab vt", $message); 1128 is("@space1", "cr ff lf spc tab vt", $message); 1129 is("@space2", "spc tab", $message); 1130 } 1131 1132 { 1133 my $n= 50; 1134 # this must be a high number and go from 0 to N, as the bug we are looking for doesn't 1135 # seem to be predictable. Slight changes to the test make it fail earlier or later. 1136 foreach my $i (0 .. $n) 1137 { 1138 my $str= "\n" x $i; 1139 ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i"; 1140 } 1141 } 1142 { 1143 # we are actually testing that we dont die when executing these patterns 1144 use utf8; 1145 my $e = "Böck"; 1146 ok(utf8::is_utf8($e),"got a unicode string - rt75680"); 1147 1148 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680"); 1149 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680"); 1150 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680"); 1151 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680"); 1152 } 1153 { 1154 # we are actually testing that we dont die when executing these patterns 1155 my $e = "B" . uni_to_native("\x{f6}") . "ck"; 1156 ok(!utf8::is_utf8($e), "got a latin string - rt75680"); 1157 1158 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680"); 1159 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680"); 1160 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680"); 1161 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680"); 1162 } 1163 1164 { 1165 # 1166 # Tests for bug 77414. 1167 # 1168 1169 my $message = '\p property after empty * match'; 1170 { 1171 like("1", qr/\s*\pN/, $message); 1172 like("-", qr/\s*\p{Dash}/, $message); 1173 like(" ", qr/\w*\p{Blank}/, $message); 1174 } 1175 1176 like("1", qr/\s*\pN+/, $message); 1177 like("-", qr/\s*\p{Dash}{1}/, $message); 1178 like(" ", qr/\w*\p{Blank}{1,4}/, $message); 1179 1180 } 1181 1182 { # Some constructs with Latin1 characters cause a utf8 string not 1183 # to match itself in non-utf8 1184 my $c = uni_to_native("\xc0"); 1185 my $pattern = my $utf8_pattern = qr/(($c)+,?)/; 1186 utf8::upgrade($utf8_pattern); 1187 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; 1188 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; 1189 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; 1190 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; 1191 utf8::upgrade($c); 1192 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; 1193 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; 1194 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; 1195 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; 1196 } 1197 1198 { # Make sure can override the formatting 1199 use feature 'unicode_strings'; 1200 ok uni_to_native("\xc0") =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/'; 1201 ok uni_to_native("\xc0") !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/'; 1202 } 1203 1204 { 1205 my $str= "\x{100}"; 1206 chop $str; 1207 my $qr= qr/$str/; 1208 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212"); 1209 $str= ""; 1210 $qr= qr/$str/; 1211 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212"); 1212 1213 } 1214 1215 { 1216 local $::TODO = "[perl #38133]"; 1217 1218 "A" =~ /(((?:A))?)+/; 1219 my $first = $2; 1220 1221 "A" =~ /(((A))?)+/; 1222 my $second = $2; 1223 1224 is($first, $second); 1225 } 1226 1227 { 1228 # RT #3516: \G in a m//g expression causes problems 1229 my $count = 0; 1230 while ("abc" =~ m/(\G[ac])?/g) { 1231 last if $count++ > 10; 1232 } 1233 ok($count < 10, 'RT #3516 A'); 1234 1235 $count = 0; 1236 while ("abc" =~ m/(\G|.)[ac]/g) { 1237 last if $count++ > 10; 1238 } 1239 ok($count < 10, 'RT #3516 B'); 1240 1241 $count = 0; 1242 while ("abc" =~ m/(\G?[ac])?/g) { 1243 last if $count++ > 10; 1244 } 1245 ok($count < 10, 'RT #3516 C'); 1246 } 1247 { 1248 # RT #84294: Is this a bug in the simple Perl regex? 1249 # : Nested buffers and (?{...}) dont play nicely on partial matches 1250 our @got= (); 1251 ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match"); 1252 my $want= "'ab', 'a', 'b'"; 1253 my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got); 1254 is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); 1255 } 1256 1257 { 1258 # Suppress warnings, as the non-unicode one comes out even if turn off 1259 # warnings here (because the execution is done in another scope). 1260 local $SIG{__WARN__} = sub {}; 1261 my $str = "\x{110000}"; 1262 1263 unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}"); 1264 like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}"); 1265 like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}"); 1266 unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}"); 1267 } 1268 1269 { 1270 # Test that IDstart works, but because the author (khw) knows 1271 # regexes much better than the rest of the core, it is being done here 1272 # in the context of a regex which relies on buffer names beginng with 1273 # IDStarts. 1274 use utf8; 1275 my $str = "abc"; 1276 like($str, qr/(?<a>abc)/, "'a' is legal IDStart"); 1277 like($str, qr/(?<_>abc)/, "'_' is legal IDStart"); 1278 like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart"); 1279 like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart"); 1280 1281 # This test works on Unicode 6.0 in which U+2118 and U+212E are legal 1282 # IDStarts there, but are not Word characters, and therefore Perl 1283 # doesn't allow them to be IDStarts. But there is no guarantee that 1284 # Unicode won't change things around in the future so that at some 1285 # future Unicode revision these tests would need to be revised. 1286 foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) { 1287 my $prog = <<"EOP"; 1288use utf8;; 1289"abc" =~ qr/(?<$char>abc)/; 1290EOP 1291 utf8::encode($prog); 1292 fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {}, 1293 sprintf("'U+%04X not legal IDFirst'", ord($char))); 1294 } 1295 } 1296 1297 { # [perl #101710] 1298 my $pat = "b"; 1299 utf8::upgrade($pat); 1300 like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); 1301 } 1302 1303 { # Crash with @a =~ // warning 1304 local $SIG{__WARN__} = sub { 1305 pass 'no crash for @a =~ // warning' 1306 }; 1307 eval ' sub { my @a =~ // } '; 1308 } 1309 1310 { # Concat overloading and qr// thingies 1311 my @refs; 1312 my $qr = qr//; 1313 package Cat { 1314 require overload; 1315 overload->import( 1316 '""' => sub { ${$_[0]} }, 1317 '.' => sub { 1318 push @refs, ref $_[1] if ref $_[1]; 1319 bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]" 1320 } 1321 ); 1322 } 1323 my $s = "foo"; 1324 my $o = bless \$s, Cat::; 1325 /$o$qr/; 1326 is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth'; 1327 } 1328 1329 { 1330 my $count=0; 1331 my $str="\n"; 1332 $count++ while $str=~/.*/g; 1333 is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g'; 1334 my $class_count= 0; 1335 $class_count++ while $str=~/[^\n]*/g; 1336 is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same'; 1337 my $anch_count= 0; 1338 $anch_count++ while $str=~/^.*/mg; 1339 is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once'; 1340 } 1341 1342 { # [perl #111174] 1343 use re '/u'; 1344 my $A_grave = uni_to_native("\xc0"); 1345 like uni_to_native("\xe0"), qr/(?i:$A_grave)/, "(?i: shouldn't lose the passed in /u"; 1346 use re '/a'; 1347 unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; 1348 use re '/aa'; 1349 unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; 1350 } 1351 1352 { 1353 # the test for whether the pattern should be re-compiled should 1354 # consider the UTF8ness of the previous and current pattern 1355 # string, as well as the physical bytes of the pattern string 1356 1357 for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") { 1358 ok($s =~ /^$s$/, "re-compile check is UTF8-aware"); 1359 } 1360 } 1361 1362 # #113682 more overloading and qr// 1363 # when doing /foo$overloaded/, if $overloaded returns 1364 # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval' 1365 # shouldn't be required. Via '.', it still is. 1366 { 1367 package Qr0; 1368 use overload 'qr' => sub { qr/(??{50})/ }; 1369 1370 package Qr1; 1371 use overload '""' => sub { qr/(??{51})/ }; 1372 1373 package Qr2; 1374 use overload '.' => sub { $_[1] . qr/(??{52})/ }; 1375 1376 package Qr3; 1377 use overload '""' => sub { qr/(??{7})/ }, 1378 '.' => sub { $_[1] . qr/(??{53})/ }; 1379 1380 package Qr_indirect; 1381 use overload '""' => sub { $_[0][0] }; 1382 1383 package main; 1384 1385 for my $i (0..3) { 1386 my $o = bless [], "Qr$i"; 1387 if ((0,0,1,1)[$i]) { 1388 eval { "A5$i" =~ /^A$o$/ }; 1389 like($@, qr/Eval-group not allowed/, "Qr$i"); 1390 eval { "5$i" =~ /$o/ }; 1391 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1392 "Qr$i bare"); 1393 { 1394 use re 'eval'; 1395 ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval"); 1396 eval { "5$i" =~ /$o/ }; 1397 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1398 "Qr$i bare - with use re eval"); 1399 } 1400 } 1401 else { 1402 ok("A5$i" =~ /^A$o$/, "Qr$i"); 1403 ok("5$i" =~ /$o/, "Qr$i bare"); 1404 } 1405 } 1406 1407 my $o = bless [ bless [], "Qr1" ], 'Qr_indirect'; 1408 ok("A51" =~ /^A$o/, "Qr_indirect"); 1409 ok("51" =~ /$o/, "Qr_indirect bare"); 1410 } 1411 1412 { # Various flags weren't being set when a [] is optimized into an 1413 # EXACTish node 1414 ok("\x{017F}\x{017F}" =~ qr/^[$sharp_s]?$/i, "[] to EXACTish optimization"); 1415 } 1416 1417 { 1418 for my $char (":", uni_to_native("\x{f7}"), "\x{2010}") { 1419 my $utf8_char = $char; 1420 utf8::upgrade($utf8_char); 1421 my $display = $char; 1422 $display = display($display); 1423 my $utf8_display = "utf8::upgrade(\"$display\")"; 1424 1425 like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/"); 1426 like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/"); 1427 like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/"); 1428 like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/"); 1429 } 1430 } 1431 1432 { 1433 # #116148: Pattern utf8ness sticks around globally 1434 # the utf8 in the first match was sticking around for the second 1435 # match 1436 1437 use feature 'unicode_strings'; 1438 1439 my $x = "\x{263a}"; 1440 $x =~ /$x/; 1441 1442 my $text = "Perl"; 1443 ok("Perl" =~ /P.*$/i, '#116148'); 1444 } 1445 1446 { # 118297: Mixing up- and down-graded strings in regex 1447 utf8::upgrade(my $u = "\x{e5}"); 1448 utf8::downgrade(my $d = "\x{e5}"); 1449 my $warned; 1450 local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ }; 1451 my $re = qr/$u$d/; 1452 ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern"); 1453 my $c = "\x{e5}\x{e5}"; 1454 utf8::downgrade($c); 1455 like($c, $re, "mixed up-/downgraded pattern matches downgraded string"); 1456 utf8::upgrade($c); 1457 like($c, $re, "mixed up-/downgraded pattern matches upgraded string"); 1458 } 1459 1460 { 1461 # if we have 87 capture buffers defined then \87 should refer to the 87th. 1462 # test that this is true for 1..100 1463 # Note that this test causes the engine to recurse at runtime, and 1464 # hence use a lot of C stack. 1465 for my $i (1..100) { 1466 my $capture= "a"; 1467 $capture= "($capture)" for 1 .. $i; 1468 for my $mid ("","b") { 1469 my $str= "a${mid}a"; 1470 my $backref= "\\$i"; 1471 eval { 1472 ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/"); 1473 1; 1474 } or do { 1475 is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/"); 1476 }; 1477 } 1478 } 1479 } 1480 1481 # this mixture of readonly (not COWable) and COWable strings 1482 # messed up the capture buffers under COW. The actual test results 1483 # are incidental; the issue is was an AddressSanitizer failure 1484 { 1485 my $c ='AB'; 1486 my $res = ''; 1487 for ($c, 'C', $c, 'DE') { 1488 ok(/(.)/, "COWable match"); 1489 $res .= $1; 1490 } 1491 is($res, "ACAD"); 1492 } 1493 1494 1495 { 1496 # RT #45667 1497 # /[#$x]/x didn't interpolate the var $x. 1498 my $b = 'cd'; 1499 my $s = 'abcd$%#&'; 1500 $s =~ s/[a#$b%]/X/g; 1501 is ($s, 'XbXX$XX&', 'RT #45667 without /x'); 1502 $s = 'abcd$%#&'; 1503 $s =~ s/[a#$b%]/X/gx; 1504 is ($s, 'XbXX$XX&', 'RT #45667 with /x'); 1505 } 1506 1507 { 1508 no warnings "uninitialized"; 1509 my @a; 1510 $a[1]++; 1511 /@a/; 1512 pass('no crash with /@a/ when array has nonexistent elems'); 1513 } 1514 1515 { 1516 is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'), 1517 "ok\n", 1518 'deleting *^R does not result in crashes'; 1519 no warnings 'once'; 1520 *^R = *caretRglobwithnoscalar; 1521 "" =~ /(?{42})/; 1522 is $^R, 42, 'assigning to *^R does not result in a crash'; 1523 is runperl( 1524 stderr => 1, 1525 prog => 'eval q|' 1526 .' q-..- =~ /(??{undef *^R;q--})(?{42})/; ' 1527 .' print qq-$^R\n-' 1528 .'|' 1529 ), 1530 "42\n", 1531 'undefining *^R within (??{}) does not result in a crash'; 1532 } 1533 1534 SKIP: { # Test literal range end point special handling 1535 unless ($::IS_EBCDIC) { 1536 skip "Valid only for EBCDIC", 24; 1537 } 1538 1539 like("\x89", qr/[i-j]/, '"\x89" should match [i-j]'); 1540 unlike("\x8A", qr/[i-j]/, '"\x8A" shouldnt match [i-j]'); 1541 unlike("\x90", qr/[i-j]/, '"\x90" shouldnt match [i-j]'); 1542 like("\x91", qr/[i-j]/, '"\x91" should match [i-j]'); 1543 1544 like("\x89", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x89" should match [i-\N{LATIN SMALL LETTER J}]'); 1545 unlike("\x8A", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x8A" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1546 unlike("\x90", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x90" shouldnt match [i-\N{LATIN SMALL LETTER J}]'); 1547 like("\x91", qr/[i-\N{LATIN SMALL LETTER J}]/, '"\x91" should match [i-\N{LATIN SMALL LETTER J}]'); 1548 1549 like("\x89", qr/[i-\N{U+6A}]/, '"\x89" should match [i-\N{U+6A}]'); 1550 unlike("\x8A", qr/[i-\N{U+6A}]/, '"\x8A" shouldnt match [i-\N{U+6A}]'); 1551 unlike("\x90", qr/[i-\N{U+6A}]/, '"\x90" shouldnt match [i-\N{U+6A}]'); 1552 like("\x91", qr/[i-\N{U+6A}]/, '"\x91" should match [i-\N{U+6A}]'); 1553 1554 like("\x89", qr/[\N{U+69}-\N{U+6A}]/, '"\x89" should match [\N{U+69}-\N{U+6A}]'); 1555 unlike("\x8A", qr/[\N{U+69}-\N{U+6A}]/, '"\x8A" shouldnt match [\N{U+69}-\N{U+6A}]'); 1556 unlike("\x90", qr/[\N{U+69}-\N{U+6A}]/, '"\x90" shouldnt match [\N{U+69}-\N{U+6A}]'); 1557 like("\x91", qr/[\N{U+69}-\N{U+6A}]/, '"\x91" should match [\N{U+69}-\N{U+6A}]'); 1558 1559 like("\x89", qr/[i-\x{91}]/, '"\x89" should match [i-\x{91}]'); 1560 like("\x8A", qr/[i-\x{91}]/, '"\x8A" should match [i-\x{91}]'); 1561 like("\x90", qr/[i-\x{91}]/, '"\x90" should match [i-\x{91}]'); 1562 like("\x91", qr/[i-\x{91}]/, '"\x91" should match [i-\x{91}]'); 1563 1564 # Need to use eval, because tries to compile on ASCII platforms even 1565 # though the tests are skipped, and fails because 0x89-j is an illegal 1566 # range there. 1567 like("\x89", eval 'qr/[\x{89}-j]/', '"\x89" should match [\x{89}-j]'); 1568 like("\x8A", eval 'qr/[\x{89}-j]/', '"\x8A" should match [\x{89}-j]'); 1569 like("\x90", eval 'qr/[\x{89}-j]/', '"\x90" should match [\x{89}-j]'); 1570 like("\x91", eval 'qr/[\x{89}-j]/', '"\x91" should match [\x{89}-j]'); 1571 } 1572 1573 # These are based on looking at the code in regcomp.c 1574 # We don't look for specific code, just the existence of an SSC 1575 foreach my $re (qw( qr/a?c/ 1576 qr/a?c/i 1577 qr/[ab]?c/ 1578 qr/\R?c/ 1579 qr/\d?c/d 1580 qr/\w?c/l 1581 qr/\s?c/a 1582 qr/[[:lower:]]?c/u 1583 )) { 1584 SKIP: { 1585 skip "no re-debug under miniperl" if is_miniperl; 1586 my $prog = <<"EOP"; 1587use re qw(Debug COMPILE); 1588$re; 1589EOP 1590 fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class"); 1591 } 1592 } 1593 1594 { 1595 like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; 1596 } 1597 1598 SKIP: { 1599 skip("Tests are ASCII-centric, some would fail on EBCDIC", 12) if $::IS_EBCDIC; 1600 1601 # Verify that the very last Latin-1 U+00FF 1602 # (LATIN SMALL LETTER Y WITH DIAERESIS) 1603 # and its UPPER counterpart (U+0178 which is pure Unicode), 1604 # and likewise for the very first pure Unicode 1605 # (LATIN CAPITAL LETTER A WITH MACRON) fold-match properly, 1606 # and there are no off-by-one logic errors in the transition zone. 1607 1608 ok("\xFF" =~ /\xFF/i, "Y WITH DIAERESIS l =~ l"); 1609 ok("\xFF" =~ /\x{178}/i, "Y WITH DIAERESIS l =~ u"); 1610 ok("\x{178}" =~ /\xFF/i, "Y WITH DIAERESIS u =~ l"); 1611 ok("\x{178}" =~ /\x{178}/i, "Y WITH DIAERESIS u =~ u"); 1612 1613 # U+00FF with U+05D0 (non-casing Hebrew letter). 1614 ok("\xFF\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS l =~ l"); 1615 ok("\xFF\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS l =~ u"); 1616 ok("\x{178}\x{5D0}" =~ /\xFF\x{5D0}/i, "Y WITH DIAERESIS u =~ l"); 1617 ok("\x{178}\x{5D0}" =~ /\x{178}\x{5D0}/i, "Y WITH DIAERESIS u =~ u"); 1618 1619 # U+0100. 1620 ok("\x{100}" =~ /\x{100}/i, "A WITH MACRON u =~ u"); 1621 ok("\x{100}" =~ /\x{101}/i, "A WITH MACRON u =~ l"); 1622 ok("\x{101}" =~ /\x{100}/i, "A WITH MACRON l =~ u"); 1623 ok("\x{101}" =~ /\x{101}/i, "A WITH MACRON l =~ l"); 1624 } 1625 1626 { 1627 use utf8; 1628 ok("abc" =~ /a b c/x, "NEL is white-space under /x"); 1629 } 1630 1631 { 1632 ok('a(b)c' =~ qr(a\(b\)c), "'\\(' is a literal in qr(...)"); 1633 ok('a[b]c' =~ qr[a\[b\]c], "'\\[' is a literal in qr[...]"); 1634 ok('a{3}c' =~ qr{a\{3\}c}, # Only failed when { could be a meta 1635 "'\\{' is a literal in qr{...}, where it could be a quantifier"); 1636 1637 # This one is for completeness 1638 ok('a<b>c' =~ qr<a\<b\>c>, "'\\<' is a literal in qr<...>)"); 1639 } 1640 1641 { # Was getting optimized into EXACT (non-folding node) 1642 my $x = qr/[x]/i; 1643 utf8::upgrade($x); 1644 like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case"); 1645 } 1646 1647SKIP: { # make sure we get an error when \p{} cannot load Unicode tables 1648 skip("Unicode tables always now loaded", 1); 1649 fresh_perl_like(<<' prog that cannot load uni tables', 1650 BEGIN { 1651 @INC = '../lib'; 1652 require utf8; require 'utf8_heavy.pl'; 1653 @INC = (); 1654 } 1655 $name = 'A B'; 1656 if ($name =~ /(\p{IsUpper}) (\p{IsUpper})/){ 1657 print "It's good! >$1< >$2<\n"; 1658 } else { 1659 print "It's not good...\n"; 1660 } 1661 prog that cannot load uni tables 1662 qr/^Can't locate unicore\/Heavy\.pl(?x: 1663 )|^Can't find Unicode property definition/, 1664 undef, 1665 '\p{} should not fail silently when uni tables evanesce'); 1666 } 1667 1668 { # Special handling of literal-ended ranges in [...] was breaking this 1669 use utf8; 1670 like("ÿ", qr/[ÿ-ÿ]/, "\"ÿ\" should match [ÿ-ÿ]"); 1671 } 1672 1673 { # [perl #123539] 1674 like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1675 like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1676 } 1677 1678 { # [perl #123604] 1679 my($s, $x, @x) = ('abc', 'a', 'd'); 1680 my $long = 'b' x 2000; 1681 my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0}; 1682 $eval =~ s{bbb}{$long}; 1683 my $match = eval $eval; 1684 ok(1, "did not crash"); 1685 ok($match, "[bbb...] resolved as character class, not subscript"); 1686 } 1687 1688 { # [perl #123755] 1689 for my $pat ('(??', '(?P', '(?i-') { 1690 eval qq{ qr/$pat/ }; 1691 ok(1, "qr/$pat/ did not crash"); 1692 eval qq{ qr/${pat}\x{123}/ }; 1693 my $e = $@; 1694 like($e, qr{\x{123}}, 1695 "qr/${pat}x/ shows x in error even if it's a wide character"); 1696 } 1697 } 1698 1699 { 1700 # Expect one of these sizes to cause overflow and wrap to negative 1701 for my $bits (32, 64) { 1702 my $wrapneg = 2 ** ($bits - 2) * 3; 1703 for my $sign ('', '-') { 1704 my $pat = sprintf "qr/(?%s%u)/", $sign, $wrapneg; 1705 eval $pat; 1706 ok(1, "big backref $pat did not crash"); 1707 } 1708 } 1709 } 1710 { 1711 # Test that we handle qr/\8888888/ and variants without an infinite loop, 1712 # we use a test within a test so we can todo it, and make sure we don't 1713 # infinite loop our tests. 1714 # NOTE - Do not put quotes in the code! 1715 # NOTE - We have to triple escape the backref in the pattern below. 1716 my $code=' 1717 BEGIN{require q(./test.pl);} 1718 watchdog(3); 1719 for my $len (1 .. 20) { 1720 my $eights= q(8) x $len; 1721 eval qq{ qr/\\\\$eights/ }; 1722 } 1723 print q(No infinite loop here!); 1724 '; 1725 fresh_perl_is($code, "No infinite loop here!", {}, 1726 "test that we handle things like m/\\888888888/ without infinite loops" ); 1727 } 1728 1729 { # Test that we handle some malformed UTF-8 without looping [perl 1730 # #123562] 1731 1732 my $code=' 1733 BEGIN{require q(./test.pl);} 1734 use Encode qw(_utf8_on); 1735 # \x80 and \x41 are continuation bytes in their respective 1736 # character sets 1737 my $malformed = (ord("A") == 65) ? "a\x80\n" : "a\x41\n"; 1738 utf8::downgrade($malformed); 1739 _utf8_on($malformed); 1740 watchdog(3); 1741 $malformed =~ /(\n\r|\r)$/; 1742 print q(No infinite loop here!); 1743 '; 1744 fresh_perl_like($code, qr/Malformed UTF-8 character/, {}, 1745 "test that we handle some UTF-8 malformations without looping" ); 1746 } 1747 1748 { 1749 # [perl #123843] hits SEGV trying to compile this pattern 1750 my $match; 1751 eval q{ ($match) = ("xxyxxyxy" =~ m{(x+(y(?1))*)}) }; 1752 ok(1, "compiled GOSUB in CURLYM ok"); 1753 is($match, 'xxyxxyx', "matched GOSUB in CURLYM"); 1754 } 1755 1756 { 1757 # [perl #123852] doesn't avoid all the capture-related work with 1758 # //n, leading to possible memory corruption 1759 eval q{ qr{()(?1)}n }; 1760 my $error = $@; 1761 ok(1, "qr{()(?1)}n didn't crash"); 1762 like($error, qr{Reference to nonexistent group}, 1763 'gave appropriate error for qr{()(?1)}n'); 1764 } 1765 1766 { 1767 # [perl #126406] panic with unmatchable quantifier 1768 my $code=' 1769 no warnings "regexp"; 1770 "" =~ m/(.0\N{6,0}0\N{6,0}000000000000000000000000000000000)/; 1771 '; 1772 fresh_perl_is($code, "", {}, 1773 "perl [#126406] panic"); 1774 } 1775 { 1776 my $bug="[perl #126182]"; # test for infinite pattern recursion 1777 for my $tuple ( 1778 [ 'q(a)=~/(.(?2))((?<=(?=(?1)).))/', "died", "look ahead left recursion fails fast" ], 1779 [ 'q(aa)=~/(?R)a/', "died", "left-recursion fails fast", ], 1780 [ 'q(bbaa)=~/(?&x)(?(DEFINE)(?<x>(?&y)*a)(?<y>(?&x)*b))/', 1781 "died", "inter-cyclic optional left recursion dies" ], 1782 [ 'q(abc) =~ /a((?1)?)c/', "died", "optional left recursion dies" ], 1783 [ 'q(abc) =~ /a((?1)??)c/', "died", "min mod left recursion dies" ], 1784 [ 'q(abc) =~ /a((?1)*)c/', "died", "* left recursion dies" ], 1785 [ 'q(abc) =~ /a((?1)+)c/', "died", "+ left recursion dies" ], 1786 [ 'q(abc) =~ /a((?1){0,3})c/', "died", "{0,3} left recursion fails fast" ], 1787 1788 [ 'q(aaabbb)=~/a(?R)?b/', "matched", "optional self recursion works" ], 1789 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?0))*+\\\\))/', "matched", 1790 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1791 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]++|(?1))*+\\\\))/', "matched", 1792 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1793 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?0))*\\\\))/', "matched", 1794 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1795 [ '"((5maa-maa)(maa-3maa))" =~ /(\\\\((?:[^()]+|(?1))*\\\\))/', "matched", 1796 "recursion and possessive captures", "((5maa-maa)(maa-3maa))"], 1797 ) { 1798 my ($expr, $expect, $test_name, $cap1)= @$tuple; 1799 # avoid quotes in this code! 1800 my $code=' 1801 BEGIN{require q(./test.pl);} 1802 watchdog(3); 1803 my $status= eval(q{ !(' . $expr . ') ? q(failed) : ' . 1804 ($cap1 ? '($1 ne q['.$cap1.']) ? qq(badmatch:$1) : ' : '') . 1805 ' q(matched) }) 1806 || ( ( $@ =~ /Infinite recursion/ ) ? qq(died) : q(strange-death) ); 1807 print $status; 1808 '; 1809 fresh_perl_is($code, $expect, {}, "$bug - $test_name" ); 1810 } 1811 } 1812 { 1813 fresh_perl_is(' 1814 BEGIN{require q(test.pl);} 1815 watchdog(3); 1816 $SIG{ALRM} = sub {print "Timeout\n"; exit(1)}; 1817 alarm 1; 1818 $_ = "a" x 1000 . "b" x 1000 . "c" x 1000; 1819 /.*a.*b.*c.*[de]/; 1820 ',"Timeout",{},"Test Perl 73464") 1821 } 1822 1823 { # [perl #128686], crashed the the interpreter 1824 my $AE = chr utf8::unicode_to_native(0xC6); 1825 my $ae = chr utf8::unicode_to_native(0xE6); 1826 my $re = qr/[$ae\s]/i; 1827 ok($AE !~ $re, '/[\xE6\s]/i doesn\'t match \xC6 when not in UTF-8'); 1828 utf8::upgrade $AE; 1829 ok($AE =~ $re, '/[\xE6\s]/i matches \xC6 when in UTF-8'); 1830 } 1831 1832 { 1833 is(0+("\n" =~ m'\n'), 1, q|m'\n' should interpolate escapes|); 1834 } 1835 1836 { 1837 my $str = "a\xB6"; 1838 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - latin1 case" ); 1839 utf8::upgrade($str); 1840 ok( $str =~ m{^(a|a\x{b6})$}, "fix [perl #129950] - utf8 case" ); 1841 } 1842 { 1843 my $got= run_perl( switches => [ '-l' ], prog => <<'EOF_CODE' ); 1844 my $died= !eval { 1845 $_=qq(ab); 1846 print; 1847 my $p=qr/(?{ s!!x! })/; 1848 /$p/; 1849 print; 1850 /a/; 1851 /$p/; 1852 print; 1853 /b/; 1854 /$p/; 1855 print; 1856 //; 1857 1; 1858 }; 1859 $error = $died ? ($@ || qq(Zombie)) : qq(none); 1860 print $died ? qq(died) : qq(lived); 1861 print qq(Error: $@); 1862EOF_CODE 1863 my @got= split /\n/, $got; 1864 is($got[0],"ab","empty pattern in regex codeblock: got expected start string"); 1865 is($got[1],"xab", 1866 "empty pattern in regex codeblock: first subst with no last-match worked right"); 1867 is($got[2],"xxb","empty pattern in regex codeblock: second subst worked right"); 1868 is($got[3],"xxx","empty pattern in regex codeblock: third subst worked right"); 1869 is($got[4],"died","empty pattern in regex codeblock: died as expected"); 1870 like($got[5],qr/Error: Infinite recursion via empty pattern/, 1871 "empty pattern in regex codeblock: produced the right exception message" ); 1872 } 1873 { 1874 # [perl #130495] /x comment skipping stopped a byte short, leading 1875 # to assertion failure or 'malformed utf-8 character" warning 1876 fresh_perl_is( 1877 "use utf8; m{a#\x{124}}x", '', {wide_chars => 1}, 1878 '[perl #130495] utf-8 character at end of /x comment should not misparse', 1879 ); 1880 } 1881 { 1882 # [perl #130522] causes out-of-bounds read detected by clang with 1883 # address=sanitized when length of the STCLASS string is greater than 1884 # length of target string. 1885 my $re = qr{(?=\0z)\0?z?$}i; 1886 my($yes, $no) = (1, ""); 1887 for my $test ( 1888 [ $no, undef, '<undef>' ], 1889 [ $no, '', '' ], 1890 [ $no, "\0", '\0' ], 1891 [ $yes, "\0z", '\0z' ], 1892 [ $no, "\0z\0", '\0z\0' ], 1893 [ $yes, "\0z\n", '\0z\n' ], 1894 ) { 1895 my($result, $target, $disp) = @$test; 1896 no warnings qw/uninitialized/; 1897 is($target =~ $re, $result, "[perl #130522] with target '$disp'"); 1898 } 1899 } 1900 { 1901 # [perl #129377] backref to an unmatched capture should not cause 1902 # reading before start of string. 1903 SKIP: { 1904 skip "no re-debug under miniperl" if is_miniperl; 1905 my $prog = <<'EOP'; 1906use re qw(Debug EXECUTE); 1907"x" =~ m{ () y | () \1 }x; 1908EOP 1909 fresh_perl_like($prog, qr{ 1910 \A (?! .* ^ \s+ - ) 1911 }msx, { stderr => 1 }, "Offsets in debug output are not negative"); 1912 } 1913 } 1914 { 1915 # buffer overflow 1916 fresh_perl_is("BEGIN{\$^H=0x200000}\ns/[(?{//xx", 1917 "Unmatched [ in regex; marked by <-- HERE in m/[ <-- HERE (?{/ at (eval 1) line 1.\n", 1918 {}, "buffer overflow for regexp component"); 1919 } 1920 { 1921 # [perl #129281] buffer write overflow, detected by ASAN, valgrind 1922 fresh_perl_is('/0(?0)|^*0(?0)|^*(^*())0|/', '', {}, "don't bump whilem_c too much"); 1923 } 1924 { 1925 # RT #131893 - fails with ASAN -fsanitize=undefined 1926 fresh_perl_is('qr/0(0?(0||00*))|/', '', {}, "integer overflow during compilation"); 1927 } 1928 1929 { 1930 # RT #131575 intuit skipping back from the end to find the highest 1931 # possible start point, was potentially hopping back beyond pos() 1932 # and crashing by calling fbm_instr with a negative length 1933 1934 my $text = "=t=\x{5000}"; 1935 pos($text) = 3; 1936 ok(scalar($text !~ m{(~*=[a-z]=)}g), "RT #131575"); 1937 } 1938 { 1939 fresh_perl_is('"AA" =~ m/AA{1,0}/','',{},"handle OPFAIL insert properly"); 1940 } 1941 { 1942 fresh_perl_is('$_="0\x{1000000}";/^000?\0000/','',{},"dont throw assert errors trying to fbm past end of string"); 1943 } 1944 { # [perl $132227] 1945 fresh_perl_is("('0ba' . ('ss' x 300)) =~ m/0B\\N{U+41}" . $sharp_s x 150 . '/i and print "1\n"', 1,{},"Use of sharp s under /di that changes to /ui"); 1946 } 1947 { # [perl $132164] 1948 fresh_perl_is('m m0*0+\Rm', "",{},"Undefined behavior in address sanitizer"); 1949 } 1950 1951} # End of sub run_tests 1952 19531; 1954 1955# 1956# ex: set ts=8 sts=4 sw=4 et: 1957# 1958