1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7sub run_tests; 8 9$| = 1; 10 11BEGIN { 12 chdir 't' if -d 't'; 13 require './test.pl'; 14 set_up_inc( '../lib', '.' ); 15 skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-"); 16} 17 18use strict; 19use warnings; 20use 5.010; 21use Config; 22 23plan tests => 2510; # Update this when adding/deleting tests. 24 25run_tests() unless caller; 26 27# 28# Tests start here. 29# 30sub run_tests { 31 32 like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/, 33 "Match UTF-8 char in presence of (??{ }); Bug 20000731.001 (#3600)"); 34 35 { 36 no warnings 'uninitialized'; 37 ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005 (#4492)"); 38 } 39 40 { 41 my $message = 'bug id 20001008.001 (#4407)'; 42 43 my @x = ("stra\337e 138", "stra\337e 138"); 44 for (@x) { 45 ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message); 46 ok(my ($latin) = /^(.+)(?:\s+\d)/, $message); 47 is($latin, "stra\337e", $message); 48 ok($latin =~ s/stra\337e/straße/, $message); 49 # 50 # Previous code follows, but outcommented - there were no tests. 51 # 52 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a 53 # use utf8; # needed for the raw UTF-8 54 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a 55 } 56 } 57 58 { 59 # Fist half of the bug. 60 my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003 (#4536)'; 61 my $X = chr (1448); 62 ok(my ($Y) = $X =~ /(.*)/, $message); 63 is($Y, v1448, $message); 64 is(length $Y, 1, $message); 65 66 # Second half of the bug. 67 $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003 (#4536)'; 68 $X = ''; 69 $X =~ s/^/chr(1488)/e; 70 is(length $X, 1, $message); 71 is(ord $X, 1488, $message); 72 } 73 74 { 75 my $message = 'Repeated s///; Bug 20001108.001 (#4631)'; 76 my $X = "Szab\x{f3},Bal\x{e1}zs"; 77 my $Y = $X; 78 $Y =~ s/(B)/$1/ for 0 .. 3; 79 is($Y, $X, $message); 80 is($X, "Szab\x{f3},Bal\x{e1}zs", $message); 81 } 82 83 { 84 my $message = 's/// on UTF-8 string; Bug 20000517.001 (#3253)'; 85 my $x = "\x{100}A"; 86 $x =~ s/A/B/; 87 is($x, "\x{100}B", $message); 88 is(length $x, 2, $message); 89 } 90 91 { 92 # The original bug report had 'no utf8' here but that was irrelevant. 93 94 my $message = "Don't dump core; Bug 20010306.008 (#5982)"; 95 my $a = "a\x{1234}"; 96 like($a, qr/\w/, $message); # used to core dump. 97 } 98 99 { 100 my $message = '/g in scalar context; Bug 20010410.006 (#6796)'; 101 for my $rx ('/(.*?)\{(.*?)\}/csg', 102 '/(.*?)\{(.*?)\}/cg', 103 '/(.*?)\{(.*?)\}/sg', 104 '/(.*?)\{(.*?)\}/g', 105 '/(.+?)\{(.+?)\}/csg',) { 106 my $i = 0; 107 my $input = "a{b}c{d}"; 108 eval <<" --"; 109 while (eval \$input =~ $rx) { 110 \$i ++; 111 } 112 -- 113 is($i, 2, $message); 114 } 115 } 116 117 { 118 # Amazingly vertical tabulator is the same in ASCII and EBCDIC. 119 for ("\n", "\t", "\014", "\r") { 120 unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003 (#7131)", ord $_); 121 } 122 for (" ") { 123 like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003 (#7131)"); 124 } 125 } 126 127 { 128 # [ID 20010814.004 (#7526)] pos() doesn't work when using =~m// in list context 129 130 $_ = "ababacadaea"; 131 my $a = join ":", /b./gc; 132 my $b = join ":", /a./gc; 133 my $c = pos; 134 is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004 (#7526)"); 135 } 136 137 { 138 # [ID 20010407.006 (#6767)] matching utf8 return values from 139 # functions does not work 140 141 my $message = 'UTF-8 return values from functions; Bug 20010407.006 (#6767)'; 142 package ID_20010407_006; 143 sub x {"a\x{1234}"} 144 my $x = x; 145 my $y; 146 ::ok($x =~ /(..)/, $message); 147 $y = $1; 148 ::ok(length ($y) == 2 && $y eq $x, $message); 149 ::ok(x =~ /(..)/, $message); 150 $y = $1; 151 ::ok(length ($y) == 2 && $y eq $x, $message); 152 } 153 154 { 155 # High bit bug -- japhy 156 my $x = "ab\200d"; 157 ok $x =~ /.*?\200/, "High bit fine"; 158 } 159 160 { 161 my $message = 'UTF-8 hash keys and /$/'; 162 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters 163 # /2002-01/msg01327.html 164 165 my $u = "a\x{100}"; 166 my $v = substr ($u, 0, 1); 167 my $w = substr ($u, 1, 1); 168 my %u = ($u => $u, $v => $v, $w => $w); 169 for (keys %u) { 170 my $m1 = /^\w*$/ ? 1 : 0; 171 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; 172 is($m1, $m2, $message); 173 } 174 } 175 176 { 177 my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005 (#8335)"; 178 179 for my $char ("a", "\x{df}", "\x{100}") { 180 my $x = "$char b $char"; 181 $x =~ s{($char)}{ 182 "c" =~ /c/; 183 "x"; 184 }ge; 185 is(substr ($x, 0, 1), substr ($x, -1, 1), $message); 186 } 187 } 188 189 { 190 my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005 (#8935)"; 191 192 # Requires reuse of last successful pattern. 193 my $num = 123; 194 $num =~ /\d/; 195 for (0 .. 1) { 196 my $match = m?? + 0; 197 ok($match != $_, $message) 198 or diag(sprintf "'match one' %s on %s iteration" => 199 $match ? 'succeeded' : 'failed', 200 $_ ? 'second' : 'first'); 201 } 202 $num =~ /(\d)/; 203 my $result = join "" => $num =~ //g; 204 is($result, $num, $message); 205 } 206 207 { 208 my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002 (#10013)'; 209 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { 210 my ($type, $char) = @$_; 211 for my $len (32000, 32768, 33000) { 212 my $s = $char . "f" x $len; 213 my $r = $s =~ /$char([f]*)/gc; 214 ok($r, $message) or diag("<$type x $len>"); 215 ok(!$r || pos ($s) == $len + 1, $message) 216 or diag("<$type x $len>; pos = @{[pos $s]}"); 217 } 218 } 219 } 220 221 { 222 my $s = "\x{100}" x 5; 223 my $ok = $s =~ /(\x{100}{4})/; 224 my ($ord, $len) = (ord $1, length $1); 225 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]"; 226 } 227 228 { 229 my $message = 'UTF-8 matching; Bug 15397'; 230 like("\x{100}", qr/\x{100}/, $message); 231 like("\x{100}", qr/(\x{100})/, $message); 232 like("\x{100}", qr/(\x{100}){1}/, $message); 233 like("\x{100}\x{100}", qr/(\x{100}){2}/, $message); 234 like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message); 235 } 236 237 { 238 my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471'; 239 local $_ = 'CD'; 240 ok(/(AB)*?CD/ && !defined $1, $message); 241 ok(/(AB)*CD/ && !defined $1, $message); 242 } 243 244 { 245 my $message = "Caching shouldn't prevent match; Bug 3547"; 246 my $pattern = "^(b+?|a){1,2}c"; 247 ok("bac" =~ /$pattern/ && $1 eq 'a', $message); 248 ok("bbac" =~ /$pattern/ && $1 eq 'a', $message); 249 ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message); 250 ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message); 251 } 252 253 { 254 ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232'); 255 is($1, "\x{100}", '$1 is UTF-8; Bug 18232'); 256 { 'a' =~ /./; } 257 is($1, "\x{100}", '$1 is still UTF-8; Bug 18232'); 258 isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232'); 259 } 260 261 { 262 my $message = "Optimizer doesn't prematurely reject match; Bug 19767"; 263 use utf8; 264 265 my $attr = 'Name-1'; 266 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; 267 my $NormalWord = qr /${NormalChar}+?/; 268 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; 269 270 $attr =~ /^$/; 271 like($attr, $PredNameHyphen, $message); # Original test. 272 273 "a" =~ m/[b]/; 274 like("0", qr/\p{N}+\z/, $message); # Variant. 275 } 276 277 { 278 my $message = "(??{ }) doesn't return stale values; Bug 20683"; 279 our $p = 1; 280 foreach (1, 2, 3, 4) { 281 $p ++ if /(??{ $p })/ 282 } 283 is($p, 5, $message); 284 285 { 286 package P; 287 $a = 1; 288 sub TIESCALAR {bless []} 289 sub FETCH {$a ++} 290 } 291 tie $p, "P"; 292 foreach (1, 2, 3, 4) { 293 /(??{ $p })/ 294 } 295 is($p, 5, $message); 296 } 297 298 { 299 # Subject: Odd regexp behavior 300 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> 301 # Date: Wed, 26 Feb 2003 16:53:12 +0000 302 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> 303 # To: perl-unicode@perl.org 304 305 my $message = 'Markus Kuhn 2003-02-26'; 306 307 my $x = "\x{2019}\nk"; 308 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); 309 is($x, "\x{2019} k", $message); 310 311 $x = "b\nk"; 312 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); 313 is($x, "b k", $message); 314 315 like("\x{2019}", qr/\S/, $message); 316 } 317 318 { 319 my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411"; 320 our $i; 321 is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message); 322 no warnings 'syntax'; 323 @_ = split /(?{'WOW'})/, 'abc'; 324 local $" = "|"; 325 is("@_", "a|b|c", $message); 326 } 327 328 is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split'); 329 330 { 331 $_ = "code: 'x' { '...' }\n"; study; 332 my @x; push @x, $& while m/'[^\']*'/gx; 333 local $" = ":"; 334 is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757"); 335 } 336 337 { 338 sub func ($) { 339 ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354"); 340 ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354"); 341 } 342 func "standalone"; 343 $_ = "x"; s/x/func "in subst"/e; 344 $_ = "x"; s/x/func "in multiline subst"/em; 345 $_ = "x"; /x(?{func "in regexp"})/; 346 $_ = "x"; /x(?{func "in multiline regexp"})/m; 347 } 348 349 { 350 $_ = "abcdef\n"; 351 my @x = m/./g; 352 is("abcde", $`, 'Global match sets $`; Bug 19049'); 353 } 354 355 { 356 # [perl #23769] Unicode regex broken on simple example 357 # regrepeat() didn't handle UTF-8 EXACT case right. 358 359 my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; 360 my $message = "$Mess; Bug 23769"; 361 362 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; 363 364 like($s, qr/\x{a0}/, $message); 365 like($s, qr/\x{a0}+/, $message); 366 like($s, qr/\x{a0}\x{a0}/, $message); 367 368 $message = "$Mess (easy variant); Bug 23769"; 369 ok("aaa\x{100}" =~ /(a+)/, $message); 370 is($1, "aaa", $message); 371 372 $message = "$Mess (easy invariant); Bug 23769"; 373 ok("aaa\x{100} " =~ /(a+?)/, $message); 374 is($1, "a", $message); 375 376 $message = "$Mess (regrepeat variant); Bug 23769"; 377 ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message); 378 is($1, "\xa0", $message); 379 380 $message = "$Mess (regrepeat invariant); Bug 23769"; 381 ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message); 382 is($1, "\xa0\xa0\xa0", $message); 383 384 $message = "$Mess (hard variant); Bug 23769"; 385 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message); 386 is($1, "\xa0\xa1", $message); 387 388 $message = "$Mess (hard invariant); Bug 23769"; 389 ok("ababab\x{100} " =~ /((?:ab)+)/, $message); 390 is($1, 'ababab', $message); 391 392 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message); 393 is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message); 394 395 ok("ababab\x{100} " =~ /((?:ab)+?)/, $message); 396 is($1, "ab", $message); 397 398 $message = "Don't match first byte of UTF-8 representation; Bug 23769"; 399 unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message); 400 unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message); 401 unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message); 402 } 403 404 { 405 # perl panic: pp_match start/end pointers 406 407 is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc", 408 'Captures can move backwards in string; Bug 25269'); 409 } 410 411 { 412 # \cA not recognized in character classes 413 like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940'); 414 like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940'); 415 like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940'); 416 like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940'); 417 like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940'); 418 like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940'); 419 like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940'); 420 unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940'); 421 } 422 423 { 424 # perl #28532: optional zero-width match at end of string is ignored 425 426 ok("abc" =~ /^abc(\z)?/ && defined($1), 427 'Optional zero-width match at end of string; Bug 28532'); 428 ok("abc" =~ /^abc(\z)??/ && !defined($1), 429 'Optional zero-width match at end of string; Bug 28532'); 430 } 431 432 { 433 my $utf8 = "\xe9\x{100}"; chop $utf8; 434 my $latin1 = "\xe9"; 435 436 like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207"); 437 like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207"); 438 like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207"); 439 like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207"); 440 441 like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207"); 442 like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207"); 443 like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207"); 444 like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207"); 445 } 446 447 { 448 my $s = "abcd"; 449 $s =~ /(..)(..)/g; 450 $s = $1; 451 $s = $2; 452 is($2, 'cd', 453 "Assigning to original string does not corrupt match vars; Bug 37038"); 454 } 455 456 { 457 { 458 package wooosh; 459 sub gloople {"!"} 460 } 461 my $aeek = bless {} => 'wooosh'; 462 is(do {$aeek -> gloople () =~ /(.)/g}, 1, 463 "//g match against return value of sub [change e26a497577f3ce7b]"); 464 465 sub gloople {"!"} 466 is(do{gloople () =~ /(.)/g}, 1, 467 "change e26a497577f3ce7b didn't affect sub calls for some reason"); 468 } 469 470 { 471 # [perl #78680] 472 # See changes 26925-26928, which reverted change 26410 473 { 474 package lv; 475 our $var = "abc"; 476 sub variable : lvalue {$var} 477 } 478 my $o = bless [] => 'lv'; 479 my $f = ""; 480 my $r = eval { 481 for (1 .. 2) { 482 $f .= $1 if $o -> variable =~ /(.)/g; 483 } 484 1; 485 }; 486 if ($r) { 487 is($f, "ab", "pos() retained between calls"); 488 } 489 else { 490 ok 0, "Code failed: $@"; 491 } 492 493 our $var = "abc"; 494 sub variable : lvalue {$var} 495 my $g = ""; 496 my $s = eval { 497 for (1 .. 2) { 498 $g .= $1 if variable =~ /(.)/g; 499 } 500 1; 501 }; 502 if ($s) { 503 is($g, "ab", "pos() retained between calls"); 504 } 505 else { 506 ok 0, "Code failed: $@"; 507 } 508 } 509 510 SKIP: 511 { 512 skip "In EBCDIC and unclear what would trigger this bug there" if $::IS_EBCDIC; 513 fresh_perl_like( 514 'no warnings "utf8"; 515 $_ = pack "U0C2", 0xa2, 0xf8; # Ill-formed UTF-8 516 my $ret = 0; 517 do {!($ret = s/[a\0]+//g)}', 518 qr/Malformed UTF-8/, 519 {}, "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836"); 520 } 521 522 { 523 # chr(65535) should be allowed in regexes 524 525 no warnings 'utf8'; # To allow non-characters 526 my ($c, $r, $s); 527 528 $c = chr 0xffff; 529 $c =~ s/$c//g; 530 is($c, "", "U+FFFF, parsed as atom; Bug 38293"); 531 532 $c = chr 0xffff; 533 $r = "\\$c"; 534 $c =~ s/$r//g; 535 is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293"); 536 537 $c = chr 0xffff; 538 $c =~ s/[$c]//g; 539 is($c, "", "U+FFFF, parsed in class; Bug 38293"); 540 541 $c = chr 0xffff; 542 $r = "[\\$c]"; 543 $c =~ s/$r//g; 544 is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293"); 545 546 $s = "A\x{ffff}B"; 547 $s =~ s/\x{ffff}//i; 548 is($s, "AB", "U+FFFF, EXACTF; Bug 38293"); 549 550 $s = "\x{ffff}A"; 551 $s =~ s/\bA//; 552 is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293"); 553 554 $s = "\x{ffff}!"; 555 $s =~ s/\B!//; 556 is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293"); 557 } 558 559 { 560 561 # The printing characters 562 my @chars = ("A" .. "Z"); 563 my $delim = ","; 564 my $size = 32771 - 4; 565 my $str = ''; 566 567 # Create some random junk. Inefficient, but it works. 568 for (my $i = 0; $i < $size; $ i++) { 569 $str .= $chars [rand @chars]; 570 } 571 572 $str .= ($delim x 4); 573 my $res; 574 my $matched; 575 ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583"); 576 is($str, "", "Empty string; Bug 39583"); 577 ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583'); 578 } 579 580 { 581 like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940'); 582 like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); 583 like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940'); 584 like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); 585 586 like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940'); 587 like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940'); 588 like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940'); 589 like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940'); 590 like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940'); 591 592 like("X\@A", qr/X@?A/, '@?; Bug 27940'); 593 like("X\@A", qr/X@*A/, '@*; Bug 27940'); 594 like("X\@A", qr/X@(A)/, '@(; Bug 27940'); 595 like("X\@A", qr/X(@)A/, '@); Bug 27940'); 596 like("X\@A", qr/X@|ZA/, '@|; Bug 27940'); 597 598 local $" = ','; # non-whitespace and non-RE-specific 599 like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940'); 600 like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940'); 601 like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940'); 602 like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940'); 603 like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940'); 604 } 605 606 { 607 my $s = 'foo bar baz'; 608 my (@k, @v, @fetch, $res); 609 my $count = 0; 610 my @names = qw ($+{A} $+{B} $+{C}); 611 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { 612 while (my ($k, $v) = each (%+)) { 613 $count++; 614 } 615 @k = sort keys (%+); 616 @v = sort values (%+); 617 $res = 1; 618 push @fetch, 619 ["$+{A}", "$1"], 620 ["$+{B}", "$2"], 621 ["$+{C}", "$3"], 622 ; 623 } 624 foreach (0 .. 2) { 625 if ($fetch [$_]) { 626 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); 627 } else { 628 ok 0, $names[$_]; 629 } 630 } 631 is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496"); 632 is($count, 3, "Got 3 keys in %+ via each; Bug 50496"); 633 is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496"); 634 is("@k", "A B C", "Got expected keys; Bug 50496"); 635 is("@v", "bar baz foo", "Got expected values; Bug 50496"); 636 eval ' 637 no warnings "uninitialized"; 638 print for $+ {this_key_doesnt_exist}; 639 '; 640 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); 641 } 642 643 { 644 # 645 # Almost the same as the block above, except that the capture is nested. 646 # 647 648 my $s = 'foo bar baz'; 649 my (@k, @v, @fetch, $res); 650 my $count = 0; 651 my @names = qw ($+{A} $+{B} $+{C} $+{D}); 652 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { 653 while (my ($k,$v) = each(%+)) { 654 $count++; 655 } 656 @k = sort keys (%+); 657 @v = sort values (%+); 658 $res = 1; 659 push @fetch, 660 ["$+{A}", "$2"], 661 ["$+{B}", "$3"], 662 ["$+{C}", "$4"], 663 ["$+{D}", "$1"], 664 ; 665 } 666 foreach (0 .. 3) { 667 if ($fetch [$_]) { 668 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); 669 } else { 670 ok 0, $names [$_]; 671 } 672 } 673 is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496"); 674 is($count, 4, "Got 4 keys in %+ via each; Bug 50496"); 675 is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496"); 676 is("@k", "A B C D", "Got expected keys; Bug 50496"); 677 is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496"); 678 eval ' 679 no warnings "uninitialized"; 680 print for $+ {this_key_doesnt_exist}; 681 '; 682 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); 683 } 684 685 { 686 my $str = 'abc'; 687 my $count = 0; 688 my $mval = 0; 689 my $pval = 0; 690 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} 691 is($mval, 0, '@- should be empty; Bug 36046'); 692 is($pval, 0, '@+ should be empty; Bug 36046'); 693 is($count, 1, 'Should have matched once only; Bug 36046'); 694 } 695 696 { 697 my $message = '/m in precompiled regexp; Bug 40684'; 698 my $s = "abc\ndef"; 699 my $rex = qr'^abc$'m; 700 ok($s =~ m/$rex/, $message); 701 ok($s =~ m/^abc$/m, $message); 702 } 703 704 { 705 my $message = '(?: ... )? should not lose $^R; Bug 36909'; 706 $^R = 'Nothing'; 707 { 708 local $^R = "Bad"; 709 ok('x foofoo y' =~ m { 710 (foo) # $^R correctly set 711 (?{ "last regexp code result" }) 712 }x, $message); 713 is($^R, 'last regexp code result', $message); 714 } 715 is($^R, 'Nothing', $message); 716 717 { 718 local $^R = "Bad"; 719 720 ok('x foofoo y' =~ m { 721 (?:foo|bar)+ # $^R correctly set 722 (?{ "last regexp code result" }) 723 }x, $message); 724 is($^R, 'last regexp code result', $message); 725 } 726 is($^R, 'Nothing', $message); 727 728 { 729 local $^R = "Bad"; 730 ok('x foofoo y' =~ m { 731 (foo|bar)\1+ # $^R undefined 732 (?{ "last regexp code result" }) 733 }x, $message); 734 is($^R, 'last regexp code result', $message); 735 } 736 is($^R, 'Nothing', $message); 737 738 { 739 local $^R = "Bad"; 740 ok('x foofoo y' =~ m { 741 (foo|bar)\1 # This time without the + 742 (?{"last regexp code result"}) 743 }x, $message); 744 is($^R, 'last regexp code result', $message); 745 } 746 is($^R, 'Nothing', $message); 747 } 748 749 { 750 my $message = 'Match is linear, not quadratic; Bug 22395'; 751 our $count; 752 for my $l (10, 100, 1000) { 753 $count = 0; 754 ('a' x $l) =~ /(.*)(?{$count++})[bc]/; 755 local $::TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; 756 is($count, $l + 1, $message); 757 } 758 } 759 760 { 761 my $message = '@-/@+ should not have undefined values; Bug 22614'; 762 local $_ = 'ab'; 763 our @len = (); 764 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; 765 is("@len", "2 2 2", $message); 766 } 767 768 { 769 my $message = '$& set on s///; Bug 18209'; 770 my $text = ' word1 word2 word3 word4 word5 word6 '; 771 772 my @words = ('word1', 'word3', 'word5'); 773 my $count; 774 foreach my $word (@words) { 775 $text =~ s/$word\s//gi; # Leave a space to separate words 776 # in the resultant str. 777 # The following block is not working. 778 if ($&) { 779 $count ++; 780 } 781 # End bad block 782 } 783 is($count, 3, $message); 784 is($text, ' word2 word4 word6 ', $message); 785 } 786 787 { 788 # RT#6893 789 790 local $_ = qq (A\nB\nC\n); 791 my @res; 792 while (m#(\G|\n)([^\n]*)\n#gsx) { 793 push @res, "$2"; 794 last if @res > 3; 795 } 796 is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893"); 797 } 798 799 { 800 # No optimizer bug 801 my @tails = ('', '(?(1))', '(|)', '()?'); 802 my @quants = ('*','+'); 803 my $doit = sub { 804 my $pats = shift; 805 for (@_) { 806 for my $pat (@$pats) { 807 for my $quant (@quants) { 808 for my $tail (@tails) { 809 my $re = "($pat$quant\$)$tail"; 810 ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010"); 811 ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010"); 812 } 813 } 814 } 815 } 816 }; 817 818 my @dpats = ('\d', 819 '[1234567890]', 820 '(1|[23]|4|[56]|[78]|[90])', 821 '(?:1|[23]|4|[56]|[78]|[90])', 822 '(1|2|3|4|5|6|7|8|9|0)', 823 '(?:1|2|3|4|5|6|7|8|9|0)'); 824 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); 825 my @sstrs = (' '); 826 my @dstrs = ('12345'); 827 $doit -> (\@spats, @sstrs); 828 $doit -> (\@dpats, @dstrs); 829 } 830 831 { 832 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string 833 834 my $utf_8 = "\xd6schel"; 835 utf8::upgrade ($utf_8); 836 $utf_8 =~ m {(\xd6|Ö)schel}; 837 is($1, "\xd6", "Upgrade error; Bug 45605"); 838 } 839 840 { 841 # Regardless of utf8ness any character matches itself when 842 # doing a case insensitive match. See also [perl #36207] 843 844 for my $o (0 .. 255) { 845 my @ch = (chr ($o), chr ($o)); 846 utf8::upgrade ($ch [1]); 847 for my $u_str (0, 1) { 848 for my $u_pat (0, 1) { 849 like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i, 850 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); 851 like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i, 852 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); 853 } 854 } 855 } 856 } 857 858 { 859 my $message = '$REGMARK in replacement; Bug 49190'; 860 our $REGMARK; 861 local $_ = "A"; 862 ok(s/(*:B)A/$REGMARK/, $message); 863 is($_, "B", $message); 864 $_ = "CCCCBAA"; 865 ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message); 866 is($_, "ZYX", $message); 867 # Use a longer name to force reallocation of $REGMARK. 868 $_ = "CCCCBAA"; 869 ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message); 870 is($_, "ZYYYYYYYYYYYYYYYYX", $message); 871 } 872 873 { 874 my $message = 'Substitution evaluation in list context; Bug 52658'; 875 my $reg = '../xxx/'; 876 my @te = ($reg =~ m{^(/?(?:\.\./)*)}, 877 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); 878 is($reg, '../bbb/', $message); 879 is($te [0], '../', $message); 880 } 881 882 { 883 my $a = "xyzt" x 8192; 884 like($a, qr/\A(?>[a-z])*\z/, 885 '(?>) does not cause wrongness on long string; Bug 60034'); 886 my $b = $a . chr 256; 887 chop $b; 888 is($a, $b, 'Bug 60034'); 889 like($b, qr/\A(?>[a-z])*\z/, 890 '(?>) does not cause wrongness on long string with UTF-8; Bug 60034'); 891 } 892 893 # 894 # Keep the following tests last -- they may crash perl 895 # 896 print "# Tests that follow may crash perl\n"; 897 { 898 899 my $message = 'Pattern in a loop, failure should not ' . 900 'affect previous success; Bug 19049/38869'; 901 my @list = ( 902 'ab cdef', # Matches regex 903 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it 904 ); 905 my $y; 906 my $x; 907 foreach (@list) { 908 m/ab(.+)cd/i; # The ignore-case seems to be important 909 $y = $1; # Use $1, which might not be from the last match! 910 $x = substr ($list [0], $- [0], $+ [0] - $- [0]); 911 } 912 is($y, ' ', $message); 913 is($x, 'ab cd', $message); 914 } 915 916 SKIP: { 917 skip("Can run out of memory on os390", 1) if $^O eq 'os390'; 918 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274"); 919 } 920 { 921 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, 922 "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274"); 923 } 924 925 { 926 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache 927 928 local ${^UTF8CACHE} = -1; 929 my $message = "Shouldn't panic; Bug 45337"; 930 my $s = "[a]a{2}"; 931 utf8::upgrade $s; 932 like("aaa", qr/$s/, $message); 933 } 934 { 935 my $message = "Check if tree logic breaks \$^R; Bug 57042"; 936 my $cond_re = qr/\s* 937 \s* (?: 938 \( \s* A (?{1}) 939 | \( \s* B (?{2}) 940 ) 941 /x; 942 my @res; 943 for my $line ("(A)","(B)") { 944 if ($line =~ m/$cond_re/) { 945 push @res, $^R ? "#$^R" : "UNDEF"; 946 } 947 } 948 is("@res","#1 #2", $message); 949 } 950 { 951 no warnings 'closure'; 952 my $re = qr/A(??{"1"})/; 953 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; 954 ok $1 eq "A1"; 955 ok $2 eq "B"; 956 } 957 958 # This only works under -DEBUGGING because it relies on an assert(). 959 { 960 # Check capture offset re-entrancy of utf8 code. 961 962 sub fswash { $_[0] =~ s/([>X])//g; } 963 964 my $k1 = "." x 4 . ">>"; 965 fswash($k1); 966 967 my $k2 = "\x{f1}\x{2022}"; 968 $k2 =~ s/([\360-\362])/>/g; 969 fswash($k2); 970 971 is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508"); 972 } 973 974 { 975 # minimal CURLYM limited to 32767 matches 976 my @pat = ( 977 qr{a(x|y)*b}, # CURLYM 978 qr{a(x|y)*?b}, # .. with minmod 979 qr{a([wx]|[yz])*b}, # .. and without tries 980 qr{a([wx]|[yz])*?b}, 981 ); 982 my $len = 32768; 983 my $s = join '', 'a', 'x' x $len, 'b'; 984 for my $pat (@pat) { 985 like($s, $pat, "$pat; Bug 65372"); 986 } 987 } 988 989 { 990 local $::TODO = "[perl #38133]"; 991 992 "A" =~ /(((?:A))?)+/; 993 my $first = $2; 994 995 "A" =~ /(((A))?)+/; 996 my $second = $2; 997 998 is($first, $second); 999 } 1000 1001 { 1002 my $message 1003 = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998'; 1004 1005 # Catch warnings: 1006 my $w; 1007 local $SIG{__WARN__} = sub { $w .= shift }; 1008 1009 # This bug can be reduced to 1010 qq{\x{30ab}} =~ /\xab|\xa9/; 1011 # but it's nice to have a more 'real-world' test. The original test 1012 # case from the RT ticket follows: 1013 1014 my %conv = ( 1015 "\xab" => "<", 1016 "\xa9" => "(c)", 1017 ); 1018 my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')'; 1019 $conv_rx = qr{$conv_rx}; 1020 1021 my $x 1022 = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}} 1023 . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}} 1024 . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}} 1025 . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}} 1026 . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}} 1027 . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}} 1028 . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}} 1029 . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}}; 1030 1031 $x =~ s{$conv_rx}{$conv{$1}}eg; 1032 1033 is($w, undef, $message); 1034 } 1035 1036 { 1037 # minimal CURLYM limited to 32767 matches 1038 1039 is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f", 1040 'stclass optimisation does not break + inside (?=); Bug 68564'); 1041 } 1042 1043 { 1044 use charnames ":full"; 1045 # Delayed interpolation of \N' 1046 my $r1 = qr/\N{THAI CHARACTER SARA I}/; 1047 my $r2 = qr'\N{THAI CHARACTER SARA I}'; 1048 my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}"; 1049 1050 # Bug #56444 1051 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/'; 1052 ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\''; 1053 1054 # Bug #62056 1055 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/'; 1056 1057 ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"'; 1058 ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"'; 1059 ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"'; 1060 ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"'; 1061 } 1062 1063 { 1064 use charnames ":full"; 1065 my $message = '[perl #74982] Period coming after \N{}'; 1066 ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message); 1067 ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message); 1068 ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message); 1069 ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message); 1070 } 1071 1072SKIP: { 1073 ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org 1074 1075 skip('Perl configured without Encode module', 1) 1076 unless $Config{extensions} =~ / Encode /; 1077 1078 # Test case cut down by jhi 1079 fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, {}, 'Segfault using HTML::Entities'); 1080use Encode; 1081my $t = ord('A') == 193 ? "\xEA" : "\xE9"; 1082Encode::_utf8_on($t); 1083$t =~ s/([^a])//ge; 1084EOP 1085 } 1086 1087 { 1088 # pattern must be compiled late or we can break the test file 1089 my $message = '[perl #115050] repeated nothings in a trie can cause panic'; 1090 my $pattern; 1091 $pattern = '[xyz]|||'; 1092 ok("blah blah" =~ /$pattern/, $message); 1093 ok("blah blah" =~ /(?:$pattern)h/, $message); 1094 $pattern = '|||[xyz]'; 1095 ok("blah blah" =~ /$pattern/, $message); 1096 ok("blah blah" =~ /(?:$pattern)h/, $message); 1097 } 1098 1099 { 1100 # [perl #4289] First mention $& after a match 1101 local $::TODO = "these tests fail without Copy-on-Write enabled" 1102 if $Config{ccflags} =~ /PERL_NO_COW/; 1103 fresh_perl_is( 1104 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"', 1105 "b\n", {}, '$& first mentioned after match'); 1106 fresh_perl_is( 1107 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"', 1108 "a\n", {}, '$` first mentioned after match'); 1109 fresh_perl_is( 1110 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"', 1111 "c\n", {}, '$\' first mentioned after match'); 1112 } 1113 1114 { 1115 # [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t 1116 # this tests some related failures 1117 # 1118 # The tests in the block *only* fail when run on 32-bit systems 1119 # with a malloc that allocates above the 2GB line. On the system 1120 # in the report above that only happened in a thread. 1121 my $s = "\x{1ff}" . "f" x 32; 1122 ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap"); 1123 } 1124 1125 { 1126 # RT #129012 heap-buffer-overflow Perl_fbm_instr. 1127 # This test is unlikely to not pass, but it used to fail 1128 # ASAN/valgrind 1129 1130 my $s ="\x{100}0000000"; 1131 ok($s !~ /00000?\x80\x80\x80/, "RT #129012"); 1132 } 1133 1134 { 1135 # RT #129085 heap-buffer-overflow Perl_re_intuit_start 1136 # this did fail under ASAN, but didn't under valgrind 1137 my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}"; 1138 ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085"); 1139 } 1140 { 1141 # rt 1142 fresh_perl_is( 1143 'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"', 1144 "ok", {}, 'RT #130561 - allowing impossible quantifier should not cause SEGVs'); 1145 my $s= "foo"; 1146 no warnings 'regexp'; 1147 ok($s=~/(foo){1,0}|(?1)/, 1148 "RT #130561 - allowing impossible quantifier should not break recursion"); 1149 } 1150 { 1151 # RT #133892 Coredump in Perl_re_intuit_start 1152 # Second match flips to checking floating substring before fixed 1153 # substring, which triggers a pathway that failed to check there 1154 # was a non-utf8 version of the string before trying to use it 1155 # resulting in a SEGV. 1156 my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 }; 1157 ok($result == 0); 1158 } 1159 1160} # End of sub run_tests 1161 11621; 1163