1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7sub run_tests; 8 9$| = 1; 10 11BEGIN { 12 chdir 't' if -d 't'; 13 require './test.pl'; 14 set_up_inc( '../lib', '.' ); 15 skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-"); 16} 17 18use strict; 19use warnings; 20use 5.010; 21use Config; 22 23plan tests => 2514; # Update this when adding/deleting tests. 24 25run_tests() unless caller; 26 27# 28# Tests start here. 29# 30sub run_tests { 31 32 like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/, 33 "Match UTF-8 char in presence of (??{ }); Bug 20000731.001 (#3600)"); 34 35 { 36 no warnings 'uninitialized'; 37 ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005 (#4492)"); 38 } 39 40 { 41 my $message = 'bug id 20001008.001 (#4407)'; 42 43 my @x = ("stra\337e 138", "stra\337e 138"); 44 for (@x) { 45 ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message); 46 ok(my ($latin) = /^(.+)(?:\s+\d)/, $message); 47 is($latin, "stra\337e", $message); 48 ok($latin =~ s/stra\337e/straße/, $message); 49 # 50 # Previous code follows, but outcommented - there were no tests. 51 # 52 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a 53 # use utf8; # needed for the raw UTF-8 54 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a 55 } 56 } 57 58 { 59 # Fist half of the bug. 60 my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003 (#4536)'; 61 my $X = chr (1448); 62 ok(my ($Y) = $X =~ /(.*)/, $message); 63 is($Y, v1448, $message); 64 is(length $Y, 1, $message); 65 66 # Second half of the bug. 67 $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003 (#4536)'; 68 $X = ''; 69 $X =~ s/^/chr(1488)/e; 70 is(length $X, 1, $message); 71 is(ord $X, 1488, $message); 72 } 73 74 { 75 my $message = 'Repeated s///; Bug 20001108.001 (#4631)'; 76 my $X = "Szab\x{f3},Bal\x{e1}zs"; 77 my $Y = $X; 78 $Y =~ s/(B)/$1/ for 0 .. 3; 79 is($Y, $X, $message); 80 is($X, "Szab\x{f3},Bal\x{e1}zs", $message); 81 } 82 83 { 84 my $message = 's/// on UTF-8 string; Bug 20000517.001 (#3253)'; 85 my $x = "\x{100}A"; 86 $x =~ s/A/B/; 87 is($x, "\x{100}B", $message); 88 is(length $x, 2, $message); 89 } 90 91 { 92 # The original bug report had 'no utf8' here but that was irrelevant. 93 94 my $message = "Don't dump core; Bug 20010306.008 (#5982)"; 95 my $a = "a\x{1234}"; 96 like($a, qr/\w/, $message); # used to core dump. 97 } 98 99 { 100 my $message = '/g in scalar context; Bug 20010410.006 (#6796)'; 101 for my $rx ('/(.*?)\{(.*?)\}/csg', 102 '/(.*?)\{(.*?)\}/cg', 103 '/(.*?)\{(.*?)\}/sg', 104 '/(.*?)\{(.*?)\}/g', 105 '/(.+?)\{(.+?)\}/csg',) { 106 my $i = 0; 107 my $input = "a{b}c{d}"; 108 eval <<" --"; 109 while (eval \$input =~ $rx) { 110 \$i ++; 111 } 112 -- 113 is($i, 2, $message); 114 } 115 } 116 117 { 118 # Amazingly vertical tabulator is the same in ASCII and EBCDIC. 119 for ("\n", "\t", "\014", "\r") { 120 unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003 (#7131)", ord $_); 121 } 122 for (" ") { 123 like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003 (#7131)"); 124 } 125 } 126 127 { 128 # [ID 20010814.004 (#7526)] pos() doesn't work when using =~m// in list context 129 130 $_ = "ababacadaea"; 131 my $a = join ":", /b./gc; 132 my $b = join ":", /a./gc; 133 my $c = pos; 134 is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004 (#7526)"); 135 } 136 137 { 138 # [ID 20010407.006 (#6767)] matching utf8 return values from 139 # functions does not work 140 141 my $message = 'UTF-8 return values from functions; Bug 20010407.006 (#6767)'; 142 package ID_20010407_006; 143 sub x {"a\x{1234}"} 144 my $x = x; 145 my $y; 146 ::ok($x =~ /(..)/, $message); 147 $y = $1; 148 ::ok(length ($y) == 2 && $y eq $x, $message); 149 ::ok(x =~ /(..)/, $message); 150 $y = $1; 151 ::ok(length ($y) == 2 && $y eq $x, $message); 152 } 153 154 { 155 # High bit bug -- japhy 156 my $x = "ab\200d"; 157 ok $x =~ /.*?\200/, "High bit fine"; 158 } 159 160 { 161 my $message = 'UTF-8 hash keys and /$/'; 162 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters 163 # /2002-01/msg01327.html 164 165 my $u = "a\x{100}"; 166 my $v = substr ($u, 0, 1); 167 my $w = substr ($u, 1, 1); 168 my %u = ($u => $u, $v => $v, $w => $w); 169 for (keys %u) { 170 my $m1 = /^\w*$/ ? 1 : 0; 171 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; 172 is($m1, $m2, $message); 173 } 174 } 175 176 { 177 my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005 (#8335)"; 178 179 for my $char ("a", "\x{df}", "\x{100}") { 180 my $x = "$char b $char"; 181 $x =~ s{($char)}{ 182 "c" =~ /c/; 183 "x"; 184 }ge; 185 is(substr ($x, 0, 1), substr ($x, -1, 1), $message); 186 } 187 } 188 189 { 190 my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005 (#8935)"; 191 192 # Requires reuse of last successful pattern. 193 my $num = 123; 194 $num =~ /\d/; 195 for (0 .. 1) { 196 my $match = m?? + 0; 197 ok($match != $_, $message) 198 or diag(sprintf "'match one' %s on %s iteration" => 199 $match ? 'succeeded' : 'failed', 200 $_ ? 'second' : 'first'); 201 } 202 $num =~ /(\d)/; 203 my $result = join "" => $num =~ //g; 204 is($result, $num, $message); 205 } 206 207 { 208 my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002 (#10013)'; 209 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { 210 my ($type, $char) = @$_; 211 for my $len (32000, 32768, 33000) { 212 my $s = $char . "f" x $len; 213 my $r = $s =~ /$char([f]*)/gc; 214 ok($r, $message) or diag("<$type x $len>"); 215 ok(!$r || pos ($s) == $len + 1, $message) 216 or diag("<$type x $len>; pos = @{[pos $s]}"); 217 } 218 } 219 } 220 221 { 222 my $s = "\x{100}" x 5; 223 my $ok = $s =~ /(\x{100}{4})/; 224 my ($ord, $len) = (ord $1, length $1); 225 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]"; 226 } 227 228 { 229 my $message = 'UTF-8 matching; Bug 15397'; 230 like("\x{100}", qr/\x{100}/, $message); 231 like("\x{100}", qr/(\x{100})/, $message); 232 like("\x{100}", qr/(\x{100}){1}/, $message); 233 like("\x{100}\x{100}", qr/(\x{100}){2}/, $message); 234 like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message); 235 } 236 237 { 238 my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471'; 239 local $_ = 'CD'; 240 ok(/(AB)*?CD/ && !defined $1, $message); 241 ok(/(AB)*CD/ && !defined $1, $message); 242 } 243 244 { 245 my $message = "Caching shouldn't prevent match; Bug 3547"; 246 my $pattern = "^(b+?|a){1,2}c"; 247 ok("bac" =~ /$pattern/ && $1 eq 'a', $message); 248 ok("bbac" =~ /$pattern/ && $1 eq 'a', $message); 249 ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message); 250 ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message); 251 } 252 253 { 254 ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232'); 255 is($1, "\x{100}", '$1 is UTF-8; Bug 18232'); 256 { 'a' =~ /./; } 257 is($1, "\x{100}", '$1 is still UTF-8; Bug 18232'); 258 isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232'); 259 } 260 261 { 262 my $message = "Optimizer doesn't prematurely reject match; Bug 19767"; 263 use utf8; 264 265 my $attr = 'Name-1'; 266 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; 267 my $NormalWord = qr /${NormalChar}+?/; 268 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; 269 270 $attr =~ /^$/; 271 like($attr, $PredNameHyphen, $message); # Original test. 272 273 "a" =~ m/[b]/; 274 like("0", qr/\p{N}+\z/, $message); # Variant. 275 } 276 277 { 278 my $message = "(??{ }) doesn't return stale values; Bug 20683"; 279 our $p = 1; 280 foreach (1, 2, 3, 4) { 281 $p ++ if /(??{ $p })/ 282 } 283 is($p, 5, $message); 284 285 { 286 package P; 287 $a = 1; 288 sub TIESCALAR {bless []} 289 sub FETCH {$a ++} 290 } 291 tie $p, "P"; 292 foreach (1, 2, 3, 4) { 293 /(??{ $p })/ 294 } 295 is($p, 5, $message); 296 } 297 298 { 299 # Subject: Odd regexp behavior 300 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> 301 # Date: Wed, 26 Feb 2003 16:53:12 +0000 302 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> 303 # To: perl-unicode@perl.org 304 305 my $message = 'Markus Kuhn 2003-02-26'; 306 307 my $x = "\x{2019}\nk"; 308 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); 309 is($x, "\x{2019} k", $message); 310 311 $x = "b\nk"; 312 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); 313 is($x, "b k", $message); 314 315 like("\x{2019}", qr/\S/, $message); 316 } 317 318 { 319 my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411"; 320 our $i; 321 is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message); 322 no warnings 'syntax'; 323 @_ = split /(?{'WOW'})/, 'abc'; 324 local $" = "|"; 325 is("@_", "a|b|c", $message); 326 } 327 328 is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split'); 329 330 { 331 $_ = "code: 'x' { '...' }\n"; study; 332 my @x; push @x, $& while m/'[^\']*'/gx; 333 local $" = ":"; 334 is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757"); 335 } 336 337 { 338 sub func ($) { 339 ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354"); 340 ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354"); 341 } 342 func "standalone"; 343 $_ = "x"; s/x/func "in subst"/e; 344 $_ = "x"; s/x/func "in multiline subst"/em; 345 $_ = "x"; /x(?{func "in regexp"})/; 346 $_ = "x"; /x(?{func "in multiline regexp"})/m; 347 } 348 349 { 350 $_ = "abcdef\n"; 351 my @x = m/./g; 352 is("abcde", $`, 'Global match sets $`; Bug 19049'); 353 } 354 355 { 356 # [perl #23769] Unicode regex broken on simple example 357 # regrepeat() didn't handle UTF-8 EXACT case right. 358 359 my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; 360 my $message = "$Mess; Bug 23769"; 361 362 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; 363 364 like($s, qr/\x{a0}/, $message); 365 like($s, qr/\x{a0}+/, $message); 366 like($s, qr/\x{a0}\x{a0}/, $message); 367 368 $message = "$Mess (easy variant); Bug 23769"; 369 ok("aaa\x{100}" =~ /(a+)/, $message); 370 is($1, "aaa", $message); 371 372 $message = "$Mess (easy invariant); Bug 23769"; 373 ok("aaa\x{100} " =~ /(a+?)/, $message); 374 is($1, "a", $message); 375 376 $message = "$Mess (regrepeat variant); Bug 23769"; 377 ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message); 378 is($1, "\xa0", $message); 379 380 $message = "$Mess (regrepeat invariant); Bug 23769"; 381 ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message); 382 is($1, "\xa0\xa0\xa0", $message); 383 384 $message = "$Mess (hard variant); Bug 23769"; 385 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message); 386 is($1, "\xa0\xa1", $message); 387 388 $message = "$Mess (hard invariant); Bug 23769"; 389 ok("ababab\x{100} " =~ /((?:ab)+)/, $message); 390 is($1, 'ababab', $message); 391 392 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message); 393 is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message); 394 395 ok("ababab\x{100} " =~ /((?:ab)+?)/, $message); 396 is($1, "ab", $message); 397 398 $message = "Don't match first byte of UTF-8 representation; Bug 23769"; 399 unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message); 400 unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message); 401 unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message); 402 } 403 404 { 405 # perl panic: pp_match start/end pointers 406 407 is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc", 408 'Captures can move backwards in string; Bug 25269'); 409 } 410 411 { 412 # \cA not recognized in character classes 413 like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940'); 414 like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940'); 415 like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940'); 416 like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940'); 417 like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940'); 418 like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940'); 419 like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940'); 420 unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940'); 421 } 422 423 { 424 # perl #28532: optional zero-width match at end of string is ignored 425 426 ok("abc" =~ /^abc(\z)?/ && defined($1), 427 'Optional zero-width match at end of string; Bug 28532'); 428 ok("abc" =~ /^abc(\z)??/ && !defined($1), 429 'Optional zero-width match at end of string; Bug 28532'); 430 } 431 432 { 433 my $utf8 = "\xe9\x{100}"; chop $utf8; 434 my $latin1 = "\xe9"; 435 436 like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207"); 437 like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207"); 438 like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207"); 439 like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207"); 440 441 like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207"); 442 like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207"); 443 like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207"); 444 like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207"); 445 } 446 447 { 448 my $s = "abcd"; 449 $s =~ /(..)(..)/g; 450 $s = $1; 451 $s = $2; 452 is($2, 'cd', 453 "Assigning to original string does not corrupt match vars; Bug 37038"); 454 } 455 456 { 457 { 458 package wooosh; 459 sub gloople {"!"} 460 } 461 my $aeek = bless {} => 'wooosh'; 462 is(do {$aeek -> gloople () =~ /(.)/g}, 1, 463 "//g match against return value of sub [change e26a497577f3ce7b]"); 464 465 sub gloople {"!"} 466 is(do{gloople () =~ /(.)/g}, 1, 467 "change e26a497577f3ce7b didn't affect sub calls for some reason"); 468 } 469 470 { 471 # [perl #78680] 472 # See changes 26925-26928, which reverted change 26410 473 { 474 package lv; 475 our $var = "abc"; 476 sub variable : lvalue {$var} 477 } 478 my $o = bless [] => 'lv'; 479 my $f = ""; 480 my $r = eval { 481 for (1 .. 2) { 482 $f .= $1 if $o -> variable =~ /(.)/g; 483 } 484 1; 485 }; 486 if ($r) { 487 is($f, "ab", "pos() retained between calls"); 488 } 489 else { 490 ok 0, "Code failed: $@"; 491 } 492 493 our $var = "abc"; 494 sub variable : lvalue {$var} 495 my $g = ""; 496 my $s = eval { 497 for (1 .. 2) { 498 $g .= $1 if variable =~ /(.)/g; 499 } 500 1; 501 }; 502 if ($s) { 503 is($g, "ab", "pos() retained between calls"); 504 } 505 else { 506 ok 0, "Code failed: $@"; 507 } 508 } 509 510 SKIP: 511 { 512 skip "In EBCDIC and unclear what would trigger this bug there" if $::IS_EBCDIC; 513 fresh_perl_like( 514 'no warnings "utf8"; 515 $_ = pack "U0C2", 0xa2, 0xf8; # Ill-formed UTF-8 516 my $ret = 0; 517 do {!($ret = s/[a\0]+//g)}', 518 qr/Malformed UTF-8/, 519 {}, "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836"); 520 } 521 522 { 523 # chr(65535) should be allowed in regexes 524 525 no warnings 'utf8'; # To allow non-characters 526 my ($c, $r, $s); 527 528 $c = chr 0xffff; 529 $c =~ s/$c//g; 530 is($c, "", "U+FFFF, parsed as atom; Bug 38293"); 531 532 $c = chr 0xffff; 533 $r = "\\$c"; 534 $c =~ s/$r//g; 535 is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293"); 536 537 $c = chr 0xffff; 538 $c =~ s/[$c]//g; 539 is($c, "", "U+FFFF, parsed in class; Bug 38293"); 540 541 $c = chr 0xffff; 542 $r = "[\\$c]"; 543 $c =~ s/$r//g; 544 is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293"); 545 546 $s = "A\x{ffff}B"; 547 $s =~ s/\x{ffff}//i; 548 is($s, "AB", "U+FFFF, EXACTF; Bug 38293"); 549 550 $s = "\x{ffff}A"; 551 $s =~ s/\bA//; 552 is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293"); 553 554 $s = "\x{ffff}!"; 555 $s =~ s/\B!//; 556 is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293"); 557 } 558 559 { 560 561 # The printing characters 562 my @chars = ("A" .. "Z"); 563 my $delim = ","; 564 my $size = 32771 - 4; 565 my $str = ''; 566 567 # Create some random junk. Inefficient, but it works. 568 for (my $i = 0; $i < $size; $ i++) { 569 $str .= $chars [rand @chars]; 570 } 571 572 $str .= ($delim x 4); 573 my $res; 574 my $matched; 575 ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583"); 576 is($str, "", "Empty string; Bug 39583"); 577 ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583'); 578 } 579 580 { 581 like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940'); 582 like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); 583 like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940'); 584 like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); 585 586 like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940'); 587 like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940'); 588 like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940'); 589 like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940'); 590 like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940'); 591 592 like("X\@A", qr/X@?A/, '@?; Bug 27940'); 593 like("X\@A", qr/X@*A/, '@*; Bug 27940'); 594 like("X\@A", qr/X@(A)/, '@(; Bug 27940'); 595 like("X\@A", qr/X(@)A/, '@); Bug 27940'); 596 like("X\@A", qr/X@|ZA/, '@|; Bug 27940'); 597 598 local $" = ','; # non-whitespace and non-RE-specific 599 like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940'); 600 like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940'); 601 like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940'); 602 like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940'); 603 like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940'); 604 } 605 606 { 607 my $s = 'foo bar baz'; 608 my (@k, @v, @fetch, $res); 609 my $count = 0; 610 my @names = qw ($+{A} $+{B} $+{C}); 611 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { 612 while (my ($k, $v) = each (%+)) { 613 $count++; 614 } 615 @k = sort keys (%+); 616 @v = sort values (%+); 617 $res = 1; 618 push @fetch, 619 ["$+{A}", "$1"], 620 ["$+{B}", "$2"], 621 ["$+{C}", "$3"], 622 ; 623 } 624 foreach (0 .. 2) { 625 if ($fetch [$_]) { 626 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); 627 } else { 628 ok 0, $names[$_]; 629 } 630 } 631 is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496"); 632 is($count, 3, "Got 3 keys in %+ via each; Bug 50496"); 633 is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496"); 634 is("@k", "A B C", "Got expected keys; Bug 50496"); 635 is("@v", "bar baz foo", "Got expected values; Bug 50496"); 636 eval ' 637 no warnings "uninitialized"; 638 print for $+ {this_key_doesnt_exist}; 639 '; 640 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); 641 } 642 643 { 644 # 645 # Almost the same as the block above, except that the capture is nested. 646 # 647 648 my $s = 'foo bar baz'; 649 my (@k, @v, @fetch, $res); 650 my $count = 0; 651 my @names = qw ($+{A} $+{B} $+{C} $+{D}); 652 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { 653 while (my ($k,$v) = each(%+)) { 654 $count++; 655 } 656 @k = sort keys (%+); 657 @v = sort values (%+); 658 $res = 1; 659 push @fetch, 660 ["$+{A}", "$2"], 661 ["$+{B}", "$3"], 662 ["$+{C}", "$4"], 663 ["$+{D}", "$1"], 664 ; 665 } 666 foreach (0 .. 3) { 667 if ($fetch [$_]) { 668 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); 669 } else { 670 ok 0, $names [$_]; 671 } 672 } 673 is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496"); 674 is($count, 4, "Got 4 keys in %+ via each; Bug 50496"); 675 is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496"); 676 is("@k", "A B C D", "Got expected keys; Bug 50496"); 677 is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496"); 678 eval ' 679 no warnings "uninitialized"; 680 print for $+ {this_key_doesnt_exist}; 681 '; 682 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); 683 } 684 685 { 686 my $str = 'abc'; 687 my $count = 0; 688 my $mval = 0; 689 my $pval = 0; 690 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} 691 is($mval, 0, '@- should be empty; Bug 36046'); 692 is($pval, 0, '@+ should be empty; Bug 36046'); 693 is($count, 1, 'Should have matched once only; Bug 36046'); 694 } 695 696 { 697 my $message = '/m in precompiled regexp; Bug 40684'; 698 my $s = "abc\ndef"; 699 my $rex = qr'^abc$'m; 700 ok($s =~ m/$rex/, $message); 701 ok($s =~ m/^abc$/m, $message); 702 } 703 704 { 705 my $message = '(?: ... )? should not lose $^R; Bug 36909'; 706 $^R = 'Nothing'; 707 { 708 local $^R = "Bad"; 709 ok('x foofoo y' =~ m { 710 (foo) # $^R correctly set 711 (?{ "last regexp code result" }) 712 }x, $message); 713 is($^R, 'last regexp code result', $message); 714 } 715 is($^R, 'Nothing', $message); 716 717 { 718 local $^R = "Bad"; 719 720 ok('x foofoo y' =~ m { 721 (?:foo|bar)+ # $^R correctly set 722 (?{ "last regexp code result" }) 723 }x, $message); 724 is($^R, 'last regexp code result', $message); 725 } 726 is($^R, 'Nothing', $message); 727 728 { 729 local $^R = "Bad"; 730 ok('x foofoo y' =~ m { 731 (foo|bar)\1+ # $^R undefined 732 (?{ "last regexp code result" }) 733 }x, $message); 734 is($^R, 'last regexp code result', $message); 735 } 736 is($^R, 'Nothing', $message); 737 738 { 739 local $^R = "Bad"; 740 ok('x foofoo y' =~ m { 741 (foo|bar)\1 # This time without the + 742 (?{"last regexp code result"}) 743 }x, $message); 744 is($^R, 'last regexp code result', $message); 745 } 746 is($^R, 'Nothing', $message); 747 } 748 749 { 750 my $message = 'Match is quadratic due to eval; See Bug 22395'; 751 our $count; 752 for my $l (10, 100, 1000) { 753 $count = 0; 754 ('a' x $l) =~ /(.*)(?{ $count++ })[bc]/; 755 is($count, $l*($l+3)/2+1, $message); 756 } 757 } 758 { 759 my $message = 'Match is linear, not quadratic; Bug 22395.'; 760 our $count; 761 my $ok= 0; 762 for my $l (10, 100, 1000) { 763 $count = 0; 764 ('a' x $l) =~ /(.*)(*{ $count++ })[bc]/; 765 $ok += is($count, $l + 1, $message); 766 } 767 is($ok,3, "Optimistic eval does not disable optimisations"); 768 } 769 770 { 771 my $message = '@-/@+ should not have undefined values; Bug 22614'; 772 local $_ = 'ab'; 773 our @len = (); 774 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; 775 is("@len", "2 2 2", $message); 776 } 777 778 { 779 my $message = '$& set on s///; Bug 18209'; 780 my $text = ' word1 word2 word3 word4 word5 word6 '; 781 782 my @words = ('word1', 'word3', 'word5'); 783 my $count; 784 foreach my $word (@words) { 785 $text =~ s/$word\s//gi; # Leave a space to separate words 786 # in the resultant str. 787 # The following block is not working. 788 if ($&) { 789 $count ++; 790 } 791 # End bad block 792 } 793 is($count, 3, $message); 794 is($text, ' word2 word4 word6 ', $message); 795 } 796 797 { 798 # RT#6893 799 800 local $_ = qq (A\nB\nC\n); 801 my @res; 802 while (m#(\G|\n)([^\n]*)\n#gsx) { 803 push @res, "$2"; 804 last if @res > 3; 805 } 806 is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893"); 807 } 808 809 { 810 # No optimizer bug 811 my @tails = ('', '(?(1))', '(|)', '()?'); 812 my @quants = ('*','+'); 813 my $doit = sub { 814 my $pats = shift; 815 for (@_) { 816 for my $pat (@$pats) { 817 for my $quant (@quants) { 818 for my $tail (@tails) { 819 my $re = "($pat$quant\$)$tail"; 820 ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010"); 821 ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010"); 822 } 823 } 824 } 825 } 826 }; 827 828 my @dpats = ('\d', 829 '[1234567890]', 830 '(1|[23]|4|[56]|[78]|[90])', 831 '(?:1|[23]|4|[56]|[78]|[90])', 832 '(1|2|3|4|5|6|7|8|9|0)', 833 '(?:1|2|3|4|5|6|7|8|9|0)'); 834 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); 835 my @sstrs = (' '); 836 my @dstrs = ('12345'); 837 $doit -> (\@spats, @sstrs); 838 $doit -> (\@dpats, @dstrs); 839 } 840 841 { 842 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string 843 844 my $utf_8 = "\xd6schel"; 845 utf8::upgrade ($utf_8); 846 $utf_8 =~ m {(\xd6|Ö)schel}; 847 is($1, "\xd6", "Upgrade error; Bug 45605"); 848 } 849 850 { 851 # Regardless of utf8ness any character matches itself when 852 # doing a case insensitive match. See also [perl #36207] 853 854 for my $o (0 .. 255) { 855 my @ch = (chr ($o), chr ($o)); 856 utf8::upgrade ($ch [1]); 857 for my $u_str (0, 1) { 858 for my $u_pat (0, 1) { 859 like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i, 860 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); 861 like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i, 862 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); 863 } 864 } 865 } 866 } 867 868 { 869 my $message = '$REGMARK in replacement; Bug 49190'; 870 our $REGMARK; 871 local $_ = "A"; 872 ok(s/(*:B)A/$REGMARK/, $message); 873 is($_, "B", $message); 874 $_ = "CCCCBAA"; 875 ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message); 876 is($_, "ZYX", $message); 877 # Use a longer name to force reallocation of $REGMARK. 878 $_ = "CCCCBAA"; 879 ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message); 880 is($_, "ZYYYYYYYYYYYYYYYYX", $message); 881 } 882 883 { 884 my $message = 'Substitution evaluation in list context; Bug 52658'; 885 my $reg = '../xxx/'; 886 my @te = ($reg =~ m{^(/?(?:\.\./)*)}, 887 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); 888 is($reg, '../bbb/', $message); 889 is($te [0], '../', $message); 890 } 891 892 { 893 my $a = "xyzt" x 8192; 894 like($a, qr/\A(?>[a-z])*\z/, 895 '(?>) does not cause wrongness on long string; Bug 60034'); 896 my $b = $a . chr 256; 897 chop $b; 898 is($a, $b, 'Bug 60034'); 899 like($b, qr/\A(?>[a-z])*\z/, 900 '(?>) does not cause wrongness on long string with UTF-8; Bug 60034'); 901 } 902 903 # 904 # Keep the following tests last -- they may crash perl 905 # 906 print "# Tests that follow may crash perl\n"; 907 { 908 909 my $message = 'Pattern in a loop, failure should not ' . 910 'affect previous success; Bug 19049/38869'; 911 my @list = ( 912 'ab cdef', # Matches regex 913 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it 914 ); 915 my $y; 916 my $x; 917 foreach (@list) { 918 m/ab(.+)cd/i; # The ignore-case seems to be important 919 $y = $1; # Use $1, which might not be from the last match! 920 $x = substr ($list [0], $- [0], $+ [0] - $- [0]); 921 } 922 is($y, ' ', $message); 923 is($x, 'ab cd', $message); 924 } 925 926 SKIP: { 927 skip("Can run out of memory on os390", 1) if $^O eq 'os390'; 928 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274"); 929 } 930 { 931 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, 932 "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274"); 933 } 934 935 { 936 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache 937 938 local ${^UTF8CACHE} = -1; 939 my $message = "Shouldn't panic; Bug 45337"; 940 my $s = "[a]a{2}"; 941 utf8::upgrade $s; 942 like("aaa", qr/$s/, $message); 943 } 944 { 945 my $message = "Check if tree logic breaks \$^R; Bug 57042"; 946 my $cond_re = qr/\s* 947 \s* (?: 948 \( \s* A (?{1}) 949 | \( \s* B (?{2}) 950 ) 951 /x; 952 my @res; 953 for my $line ("(A)","(B)") { 954 if ($line =~ m/$cond_re/) { 955 push @res, $^R ? "#$^R" : "UNDEF"; 956 } 957 } 958 is("@res","#1 #2", $message); 959 } 960 { 961 no warnings 'closure'; 962 my $re = qr/A(??{"1"})/; 963 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; 964 ok $1 eq "A1"; 965 ok $2 eq "B"; 966 } 967 968 # This only works under -DEBUGGING because it relies on an assert(). 969 { 970 # Check capture offset re-entrancy of utf8 code. 971 972 sub fswash { $_[0] =~ s/([>X])//g; } 973 974 my $k1 = "." x 4 . ">>"; 975 fswash($k1); 976 977 my $k2 = "\x{f1}\x{2022}"; 978 $k2 =~ s/([\360-\362])/>/g; 979 fswash($k2); 980 981 is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508"); 982 } 983 984 { 985 # minimal CURLYM limited to 32767 matches 986 my @pat = ( 987 qr{a(x|y)*b}, # CURLYM 988 qr{a(x|y)*?b}, # .. with minmod 989 qr{a([wx]|[yz])*b}, # .. and without tries 990 qr{a([wx]|[yz])*?b}, 991 ); 992 my $len = 32768; 993 my $s = join '', 'a', 'x' x $len, 'b'; 994 for my $pat (@pat) { 995 like($s, $pat, "$pat; Bug 65372"); 996 } 997 } 998 999 { 1000 local $::TODO = "[perl #38133]"; 1001 1002 "A" =~ /(((?:A))?)+/; 1003 my $first = $2; 1004 1005 "A" =~ /(((A))?)+/; 1006 my $second = $2; 1007 1008 is($first, $second); 1009 } 1010 1011 { 1012 my $message 1013 = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998'; 1014 1015 # Catch warnings: 1016 my $w; 1017 local $SIG{__WARN__} = sub { $w .= shift }; 1018 1019 # This bug can be reduced to 1020 qq{\x{30ab}} =~ /\xab|\xa9/; 1021 # but it's nice to have a more 'real-world' test. The original test 1022 # case from the RT ticket follows: 1023 1024 my %conv = ( 1025 "\xab" => "<", 1026 "\xa9" => "(c)", 1027 ); 1028 my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')'; 1029 $conv_rx = qr{$conv_rx}; 1030 1031 my $x 1032 = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}} 1033 . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}} 1034 . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}} 1035 . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}} 1036 . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}} 1037 . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}} 1038 . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}} 1039 . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}}; 1040 1041 $x =~ s{$conv_rx}{$conv{$1}}eg; 1042 1043 is($w, undef, $message); 1044 } 1045 1046 { 1047 # minimal CURLYM limited to 32767 matches 1048 1049 is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f", 1050 'stclass optimisation does not break + inside (?=); Bug 68564'); 1051 } 1052 1053 { 1054 use charnames ":full"; 1055 # Delayed interpolation of \N' 1056 my $r1 = qr/\N{THAI CHARACTER SARA I}/; 1057 my $r2 = qr'\N{THAI CHARACTER SARA I}'; 1058 my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}"; 1059 1060 # Bug #56444 1061 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/'; 1062 ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\''; 1063 1064 # Bug #62056 1065 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/'; 1066 1067 ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"'; 1068 ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"'; 1069 ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"'; 1070 ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"'; 1071 } 1072 1073 { 1074 use charnames ":full"; 1075 my $message = '[perl #74982] Period coming after \N{}'; 1076 ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message); 1077 ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message); 1078 ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message); 1079 ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message); 1080 } 1081 1082SKIP: { 1083 ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org 1084 1085 skip('Perl configured without Encode module', 1) 1086 unless $Config{extensions} =~ / Encode /; 1087 1088 # Test case cut down by jhi 1089 fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\)!, {}, 'Segfault using HTML::Entities'); 1090use Encode; 1091my $t = ord('A') == 193 ? "\xEA" : "\xE9"; 1092Encode::_utf8_on($t); 1093substr($t,0); 1094$t =~ s/([^a])//ge; 1095EOP 1096 } 1097 1098 { 1099 # pattern must be compiled late or we can break the test file 1100 my $message = '[perl #115050] repeated nothings in a trie can cause panic'; 1101 my $pattern; 1102 $pattern = '[xyz]|||'; 1103 ok("blah blah" =~ /$pattern/, $message); 1104 ok("blah blah" =~ /(?:$pattern)h/, $message); 1105 $pattern = '|||[xyz]'; 1106 ok("blah blah" =~ /$pattern/, $message); 1107 ok("blah blah" =~ /(?:$pattern)h/, $message); 1108 } 1109 1110 { 1111 # [perl #4289] First mention $& after a match 1112 local $::TODO = "these tests fail without Copy-on-Write enabled" 1113 if $Config{ccflags} =~ /PERL_NO_COW/; 1114 fresh_perl_is( 1115 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"', 1116 "b\n", {}, '$& first mentioned after match'); 1117 fresh_perl_is( 1118 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"', 1119 "a\n", {}, '$` first mentioned after match'); 1120 fresh_perl_is( 1121 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"', 1122 "c\n", {}, '$\' first mentioned after match'); 1123 } 1124 1125 { 1126 # [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t 1127 # this tests some related failures 1128 # 1129 # The tests in the block *only* fail when run on 32-bit systems 1130 # with a malloc that allocates above the 2GB line. On the system 1131 # in the report above that only happened in a thread. 1132 my $s = "\x{1ff}" . "f" x 32; 1133 ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap"); 1134 } 1135 1136 { 1137 # RT #129012 heap-buffer-overflow Perl_fbm_instr. 1138 # This test is unlikely to not pass, but it used to fail 1139 # ASAN/valgrind 1140 1141 my $s ="\x{100}0000000"; 1142 ok($s !~ /00000?\x80\x80\x80/, "RT #129012"); 1143 } 1144 1145 { 1146 # RT #129085 heap-buffer-overflow Perl_re_intuit_start 1147 # this did fail under ASAN, but didn't under valgrind 1148 my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}"; 1149 ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085"); 1150 } 1151 { 1152 # rt 1153 fresh_perl_is( 1154 'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"', 1155 "ok", {}, 'RT #130561 - allowing impossible quantifier should not cause SEGVs'); 1156 my $s= "foo"; 1157 no warnings 'regexp'; 1158 ok($s=~/(foo){1,0}|(?1)/, 1159 "RT #130561 - allowing impossible quantifier should not break recursion"); 1160 } 1161 { 1162 # RT #133892 Coredump in Perl_re_intuit_start 1163 # Second match flips to checking floating substring before fixed 1164 # substring, which triggers a pathway that failed to check there 1165 # was a non-utf8 version of the string before trying to use it 1166 # resulting in a SEGV. 1167 my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 }; 1168 ok($result == 0); 1169 } 1170 1171} # End of sub run_tests 1172 11731; 1174