xref: /openbsd/gnu/usr.bin/perl/t/re/pat_rt_report.t (revision 4bdff4be)
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t.  If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7sub run_tests;
8
9$| = 1;
10
11BEGIN {
12    chdir 't' if -d 't';
13    require './test.pl';
14    set_up_inc( '../lib', '.' );
15    skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-");
16}
17
18use strict;
19use warnings;
20use 5.010;
21use Config;
22
23plan tests => 2510;  # Update this when adding/deleting tests.
24
25run_tests() unless caller;
26
27#
28# Tests start here.
29#
30sub run_tests {
31
32    like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/,
33	 "Match UTF-8 char in presence of (??{ }); Bug 20000731.001 (#3600)");
34
35    {
36        no warnings 'uninitialized';
37        ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005 (#4492)");
38    }
39
40    {
41        my $message = 'bug id 20001008.001 (#4407)';
42
43        my @x = ("stra\337e 138", "stra\337e 138");
44        for (@x) {
45            ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message);
46            ok(my ($latin) = /^(.+)(?:\s+\d)/, $message);
47            is($latin, "stra\337e", $message);
48	    ok($latin =~ s/stra\337e/straße/, $message);
49            #
50            # Previous code follows, but outcommented - there were no tests.
51            #
52            # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
53            # use utf8; # needed for the raw UTF-8
54            # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
55        }
56    }
57
58    {
59        # Fist half of the bug.
60        my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003 (#4536)';
61        my $X = chr (1448);
62        ok(my ($Y) = $X =~ /(.*)/, $message);
63        is($Y, v1448, $message);
64        is(length $Y, 1, $message);
65
66        # Second half of the bug.
67        $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003 (#4536)';
68        $X = '';
69        $X =~ s/^/chr(1488)/e;
70        is(length $X, 1, $message);
71        is(ord $X, 1488, $message);
72    }
73
74    {
75        my $message = 'Repeated s///; Bug 20001108.001 (#4631)';
76        my $X = "Szab\x{f3},Bal\x{e1}zs";
77        my $Y = $X;
78        $Y =~ s/(B)/$1/ for 0 .. 3;
79        is($Y, $X, $message);
80        is($X, "Szab\x{f3},Bal\x{e1}zs", $message);
81    }
82
83    {
84        my $message = 's/// on UTF-8 string; Bug 20000517.001 (#3253)';
85        my $x = "\x{100}A";
86        $x =~ s/A/B/;
87        is($x, "\x{100}B", $message);
88        is(length $x, 2, $message);
89    }
90
91    {
92        # The original bug report had 'no utf8' here but that was irrelevant.
93
94        my $message = "Don't dump core; Bug 20010306.008 (#5982)";
95        my $a = "a\x{1234}";
96        like($a, qr/\w/, $message);  # used to core dump.
97    }
98
99    {
100        my $message = '/g in scalar context; Bug 20010410.006 (#6796)';
101        for my $rx ('/(.*?)\{(.*?)\}/csg',
102		    '/(.*?)\{(.*?)\}/cg',
103		    '/(.*?)\{(.*?)\}/sg',
104		    '/(.*?)\{(.*?)\}/g',
105		    '/(.+?)\{(.+?)\}/csg',) {
106            my $i = 0;
107            my $input = "a{b}c{d}";
108            eval <<"            --";
109                while (eval \$input =~ $rx) {
110                    \$i ++;
111                }
112            --
113            is($i, 2, $message);
114        }
115    }
116
117    {
118        # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
119        for ("\n", "\t", "\014", "\r") {
120            unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003 (#7131)", ord $_);
121        }
122        for (" ") {
123            like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003 (#7131)");
124        }
125    }
126
127    {
128        # [ID 20010814.004 (#7526)] pos() doesn't work when using =~m// in list context
129
130        $_ = "ababacadaea";
131        my $a = join ":", /b./gc;
132        my $b = join ":", /a./gc;
133        my $c = pos;
134        is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004 (#7526)");
135    }
136
137    {
138        # [ID 20010407.006 (#6767)] matching utf8 return values from
139        # functions does not work
140
141        my $message = 'UTF-8 return values from functions; Bug 20010407.006 (#6767)';
142        package ID_20010407_006;
143        sub x {"a\x{1234}"}
144        my $x = x;
145        my $y;
146      ::ok($x =~ /(..)/, $message);
147        $y = $1;
148      ::ok(length ($y) == 2 && $y eq $x, $message);
149      ::ok(x =~ /(..)/, $message);
150        $y = $1;
151      ::ok(length ($y) == 2 && $y eq $x, $message);
152    }
153
154    {
155        # High bit bug -- japhy
156        my $x = "ab\200d";
157        ok $x =~ /.*?\200/, "High bit fine";
158    }
159
160    {
161        my $message = 'UTF-8 hash keys and /$/';
162        # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
163        #                                         /2002-01/msg01327.html
164
165        my $u = "a\x{100}";
166        my $v = substr ($u, 0, 1);
167        my $w = substr ($u, 1, 1);
168        my %u = ($u => $u, $v => $v, $w => $w);
169        for (keys %u) {
170            my $m1 =            /^\w*$/ ? 1 : 0;
171            my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
172            is($m1, $m2, $message);
173        }
174    }
175
176    {
177        my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005 (#8335)";
178
179        for my $char ("a", "\x{df}", "\x{100}") {
180            my $x = "$char b $char";
181            $x =~ s{($char)}{
182                  "c" =~ /c/;
183                  "x";
184            }ge;
185            is(substr ($x, 0, 1), substr ($x, -1, 1), $message);
186        }
187    }
188
189    {
190        my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005 (#8935)";
191
192        # Requires reuse of last successful pattern.
193        my $num = 123;
194        $num =~ /\d/;
195        for (0 .. 1) {
196            my $match = m?? + 0;
197            ok($match != $_, $message)
198                or diag(sprintf "'match one' %s on %s iteration" =>
199			$match ? 'succeeded' : 'failed',
200			$_     ? 'second'    : 'first');
201        }
202        $num =~ /(\d)/;
203        my $result = join "" => $num =~ //g;
204        is($result, $num, $message);
205    }
206
207    {
208        my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002 (#10013)';
209        for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
210            my ($type, $char) = @$_;
211            for my $len (32000, 32768, 33000) {
212                my  $s = $char . "f" x $len;
213                my  $r = $s =~ /$char([f]*)/gc;
214                ok($r, $message) or diag("<$type x $len>");
215                ok(!$r || pos ($s) == $len + 1, $message)
216		    or diag("<$type x $len>; pos = @{[pos $s]}");
217            }
218        }
219    }
220
221    {
222        my $s = "\x{100}" x 5;
223        my $ok = $s =~ /(\x{100}{4})/;
224        my ($ord, $len) = (ord $1, length $1);
225        ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]";
226    }
227
228    {
229        my $message = 'UTF-8 matching; Bug 15397';
230        like("\x{100}", qr/\x{100}/, $message);
231        like("\x{100}", qr/(\x{100})/, $message);
232        like("\x{100}", qr/(\x{100}){1}/, $message);
233        like("\x{100}\x{100}", qr/(\x{100}){2}/, $message);
234        like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message);
235    }
236
237    {
238        my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471';
239        local $_       = 'CD';
240        ok(/(AB)*?CD/ && !defined $1, $message);
241        ok(/(AB)*CD/  && !defined $1, $message);
242    }
243
244    {
245        my $message = "Caching shouldn't prevent match; Bug 3547";
246        my $pattern = "^(b+?|a){1,2}c";
247        ok("bac"    =~ /$pattern/ && $1 eq 'a', $message);
248        ok("bbac"   =~ /$pattern/ && $1 eq 'a', $message);
249        ok("bbbac"  =~ /$pattern/ && $1 eq 'a', $message);
250        ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message);
251    }
252
253    {
254        ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232');
255        is($1, "\x{100}",  '$1 is UTF-8; Bug 18232');
256        { 'a' =~ /./; }
257        is($1, "\x{100}",  '$1 is still UTF-8; Bug 18232');
258        isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232');
259    }
260
261    {
262        my $message = "Optimizer doesn't prematurely reject match; Bug 19767";
263        use utf8;
264
265        my $attr = 'Name-1';
266        my $NormalChar      = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
267        my $NormalWord      = qr /${NormalChar}+?/;
268        my $PredNameHyphen  = qr /^${NormalWord}(\-${NormalWord})*?$/;
269
270        $attr =~ /^$/;
271        like($attr, $PredNameHyphen, $message);  # Original test.
272
273        "a" =~ m/[b]/;
274        like("0", qr/\p{N}+\z/, $message);         # Variant.
275    }
276
277    {
278        my $message = "(??{ }) doesn't return stale values; Bug 20683";
279        our $p = 1;
280        foreach (1, 2, 3, 4) {
281            $p ++ if /(??{ $p })/
282        }
283        is($p, 5, $message);
284
285        {
286            package P;
287            $a = 1;
288            sub TIESCALAR {bless []}
289            sub FETCH     {$a ++}
290        }
291        tie $p, "P";
292        foreach (1, 2, 3, 4) {
293            /(??{ $p })/
294        }
295        is($p, 5, $message);
296    }
297
298    {
299        # Subject: Odd regexp behavior
300        # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
301        # Date: Wed, 26 Feb 2003 16:53:12 +0000
302        # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
303        # To: perl-unicode@perl.org
304
305        my $message = 'Markus Kuhn 2003-02-26';
306
307        my $x = "\x{2019}\nk";
308        ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
309        is($x, "\x{2019} k", $message);
310
311        $x = "b\nk";
312        ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
313        is($x, "b k", $message);
314
315        like("\x{2019}", qr/\S/, $message);
316    }
317
318    {
319        my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411";
320        our $i;
321        is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message);
322        no warnings 'syntax';
323        @_ = split /(?{'WOW'})/, 'abc';
324        local $" = "|";
325        is("@_", "a|b|c", $message);
326    }
327
328    is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split');
329
330    {
331        $_ = "code:   'x' { '...' }\n"; study;
332        my @x; push @x, $& while m/'[^\']*'/gx;
333        local $" = ":";
334        is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757");
335    }
336
337    {
338        sub func ($) {
339            ok("a\nb" !~ /^b/,  "Propagated modifier; $_[0]; Bug 22354");
340            ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354");
341        }
342        func "standalone";
343        $_ = "x"; s/x/func "in subst"/e;
344        $_ = "x"; s/x/func "in multiline subst"/em;
345        $_ = "x"; /x(?{func "in regexp"})/;
346        $_ = "x"; /x(?{func "in multiline regexp"})/m;
347    }
348
349    {
350        $_    = "abcdef\n";
351        my @x = m/./g;
352        is("abcde", $`, 'Global match sets $`; Bug 19049');
353    }
354
355    {
356        # [perl #23769] Unicode regex broken on simple example
357        # regrepeat() didn't handle UTF-8 EXACT case right.
358
359        my $Mess       = 'regrepeat() handles UTF-8 EXACT case right';
360        my $message = "$Mess; Bug 23769";
361
362        my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
363
364        like($s, qr/\x{a0}/, $message);
365        like($s, qr/\x{a0}+/, $message);
366        like($s, qr/\x{a0}\x{a0}/, $message);
367
368        $message = "$Mess (easy variant); Bug 23769";
369        ok("aaa\x{100}" =~ /(a+)/, $message);
370        is($1, "aaa", $message);
371
372        $message = "$Mess (easy invariant); Bug 23769";
373        ok("aaa\x{100}     " =~ /(a+?)/, $message);
374        is($1, "a", $message);
375
376        $message = "$Mess (regrepeat variant); Bug 23769";
377        ok("\xa0\xa0\xa0\x{100}    " =~ /(\xa0+?)/, $message);
378        is($1, "\xa0", $message);
379
380        $message = "$Mess (regrepeat invariant); Bug 23769";
381        ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message);
382        is($1, "\xa0\xa0\xa0", $message);
383
384        $message = "$Mess (hard variant); Bug 23769";
385        ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message);
386        is($1, "\xa0\xa1", $message);
387
388        $message = "$Mess (hard invariant); Bug 23769";
389        ok("ababab\x{100}  " =~ /((?:ab)+)/, $message);
390        is($1, 'ababab', $message);
391
392        ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message);
393        is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message);
394
395        ok("ababab\x{100}  " =~ /((?:ab)+?)/, $message);
396        is($1, "ab", $message);
397
398        $message = "Don't match first byte of UTF-8 representation; Bug 23769";
399        unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message);
400        unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message);
401        unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message);
402    }
403
404    {
405        # perl panic: pp_match start/end pointers
406
407        is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc",
408	   'Captures can move backwards in string; Bug 25269');
409    }
410
411    {
412        # \cA not recognized in character classes
413        like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940');
414        like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940');
415        like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940');
416        like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940');
417        like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940');
418        like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940');
419        like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940');
420        unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940');
421    }
422
423    {
424        # perl #28532: optional zero-width match at end of string is ignored
425
426        ok("abc" =~ /^abc(\z)?/ && defined($1),
427           'Optional zero-width match at end of string; Bug 28532');
428        ok("abc" =~ /^abc(\z)??/ && !defined($1),
429           'Optional zero-width match at end of string; Bug 28532');
430    }
431
432    {
433        my $utf8 = "\xe9\x{100}"; chop $utf8;
434        my $latin1 = "\xe9";
435
436        like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207");
437        like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207");
438        like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207");
439        like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207");
440
441        like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207");
442        like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207");
443        like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207");
444        like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207");
445    }
446
447    {
448        my $s = "abcd";
449        $s =~ /(..)(..)/g;
450        $s = $1;
451        $s = $2;
452        is($2, 'cd',
453	   "Assigning to original string does not corrupt match vars; Bug 37038");
454    }
455
456    {
457        {
458            package wooosh;
459            sub gloople {"!"}
460        }
461        my $aeek = bless {} => 'wooosh';
462        is(do {$aeek -> gloople () =~ /(.)/g}, 1,
463	   "//g match against return value of sub [change e26a497577f3ce7b]");
464
465        sub gloople {"!"}
466        is(do{gloople () =~ /(.)/g}, 1,
467	   "change e26a497577f3ce7b didn't affect sub calls for some reason");
468    }
469
470    {
471        # [perl #78680]
472        # See changes 26925-26928, which reverted change 26410
473        {
474            package lv;
475            our $var = "abc";
476            sub variable : lvalue {$var}
477        }
478        my $o = bless [] => 'lv';
479        my $f = "";
480        my $r = eval {
481            for (1 .. 2) {
482                $f .= $1 if $o -> variable =~ /(.)/g;
483            }
484            1;
485        };
486        if ($r) {
487            is($f, "ab", "pos() retained between calls");
488        }
489        else {
490            ok 0, "Code failed: $@";
491        }
492
493        our $var = "abc";
494        sub variable : lvalue {$var}
495        my $g = "";
496        my $s = eval {
497            for (1 .. 2) {
498                $g .= $1 if variable =~ /(.)/g;
499            }
500            1;
501        };
502        if ($s) {
503            is($g, "ab", "pos() retained between calls");
504        }
505        else {
506            ok 0, "Code failed: $@";
507        }
508    }
509
510  SKIP:
511    {
512        skip "In EBCDIC and unclear what would trigger this bug there" if $::IS_EBCDIC;
513        fresh_perl_like(
514            'no warnings "utf8";
515             $_ = pack "U0C2", 0xa2, 0xf8;  # Ill-formed UTF-8
516             my $ret = 0;
517             do {!($ret = s/[a\0]+//g)}',
518             qr/Malformed UTF-8/,
519             {}, "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836");
520    }
521
522    {
523        # chr(65535) should be allowed in regexes
524
525        no warnings 'utf8'; # To allow non-characters
526        my ($c, $r, $s);
527
528        $c = chr 0xffff;
529        $c =~ s/$c//g;
530        is($c, "", "U+FFFF, parsed as atom; Bug 38293");
531
532        $c = chr 0xffff;
533        $r = "\\$c";
534        $c =~ s/$r//g;
535        is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293");
536
537        $c = chr 0xffff;
538        $c =~ s/[$c]//g;
539        is($c, "", "U+FFFF, parsed in class; Bug 38293");
540
541        $c = chr 0xffff;
542        $r = "[\\$c]";
543        $c =~ s/$r//g;
544        is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293");
545
546        $s = "A\x{ffff}B";
547        $s =~ s/\x{ffff}//i;
548        is($s, "AB", "U+FFFF, EXACTF; Bug 38293");
549
550        $s = "\x{ffff}A";
551        $s =~ s/\bA//;
552        is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293");
553
554        $s = "\x{ffff}!";
555        $s =~ s/\B!//;
556        is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293");
557    }
558
559    {
560
561        # The printing characters
562        my @chars = ("A" .. "Z");
563        my $delim = ",";
564        my $size = 32771 - 4;
565        my $str = '';
566
567        # Create some random junk. Inefficient, but it works.
568        for (my $i = 0; $i < $size; $ i++) {
569            $str .= $chars [rand @chars];
570        }
571
572        $str .= ($delim x 4);
573        my $res;
574        my $matched;
575        ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583");
576        is($str, "", "Empty string; Bug 39583");
577        ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583');
578    }
579
580    {
581        like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940');
582        like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
583        like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940');
584        like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
585
586        like("X\0A", qr/X\c@?A/,  '\c@?; Bug 27940');
587        like("X\0A", qr/X\c@*A/,  '\c@*; Bug 27940');
588        like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940');
589        like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940');
590        like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940');
591
592        like("X\@A", qr/X@?A/,  '@?; Bug 27940');
593        like("X\@A", qr/X@*A/,  '@*; Bug 27940');
594        like("X\@A", qr/X@(A)/, '@(; Bug 27940');
595        like("X\@A", qr/X(@)A/, '@); Bug 27940');
596        like("X\@A", qr/X@|ZA/, '@|; Bug 27940');
597
598        local $" = ','; # non-whitespace and non-RE-specific
599        like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940');
600        like("A@+B", qr/A@{+}B/,  'Interpolation of @+ in /@{+}/; Bug 27940');
601        like("A@-B", qr/A@{-}B/,  'Interpolation of @- in /@{-}/; Bug 27940');
602        like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940');
603        like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940');
604    }
605
606    {
607        my $s = 'foo bar baz';
608        my (@k, @v, @fetch, $res);
609        my $count = 0;
610        my @names = qw ($+{A} $+{B} $+{C});
611        if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
612            while (my ($k, $v) = each (%+)) {
613                $count++;
614            }
615            @k = sort keys   (%+);
616            @v = sort values (%+);
617            $res = 1;
618            push @fetch,
619                ["$+{A}", "$1"],
620                ["$+{B}", "$2"],
621                ["$+{C}", "$3"],
622            ;
623        }
624        foreach (0 .. 2) {
625            if ($fetch [$_]) {
626                is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
627            } else {
628                ok 0, $names[$_];
629            }
630        }
631        is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496");
632        is($count, 3, "Got 3 keys in %+ via each; Bug 50496");
633        is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496");
634        is("@k", "A B C", "Got expected keys; Bug 50496");
635        is("@v", "bar baz foo", "Got expected values; Bug 50496");
636        eval '
637            no warnings "uninitialized";
638            print for $+ {this_key_doesnt_exist};
639        ';
640        is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
641    }
642
643    {
644        #
645        # Almost the same as the block above, except that the capture is nested.
646        #
647
648        my $s = 'foo bar baz';
649        my (@k, @v, @fetch, $res);
650        my $count = 0;
651        my @names = qw ($+{A} $+{B} $+{C} $+{D});
652        if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
653            while (my ($k,$v) = each(%+)) {
654                $count++;
655            }
656            @k = sort keys   (%+);
657            @v = sort values (%+);
658            $res = 1;
659            push @fetch,
660                ["$+{A}", "$2"],
661                ["$+{B}", "$3"],
662                ["$+{C}", "$4"],
663                ["$+{D}", "$1"],
664            ;
665        }
666        foreach (0 .. 3) {
667            if ($fetch [$_]) {
668                is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
669            } else {
670                ok 0, $names [$_];
671            }
672        }
673        is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496");
674        is($count, 4, "Got 4 keys in %+ via each; Bug 50496");
675        is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496");
676        is("@k", "A B C D", "Got expected keys; Bug 50496");
677        is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496");
678        eval '
679            no warnings "uninitialized";
680            print for $+ {this_key_doesnt_exist};
681        ';
682        is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
683    }
684
685    {
686        my $str = 'abc';
687        my $count = 0;
688        my $mval = 0;
689        my $pval = 0;
690        while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
691        is($mval,  0, '@- should be empty; Bug 36046');
692        is($pval,  0, '@+ should be empty; Bug 36046');
693        is($count, 1, 'Should have matched once only; Bug 36046');
694    }
695
696    {
697        my $message = '/m in precompiled regexp; Bug 40684';
698        my $s = "abc\ndef";
699        my $rex = qr'^abc$'m;
700        ok($s =~ m/$rex/, $message);
701        ok($s =~ m/^abc$/m, $message);
702    }
703
704    {
705        my $message = '(?: ... )? should not lose $^R; Bug 36909';
706        $^R = 'Nothing';
707        {
708            local $^R = "Bad";
709            ok('x foofoo y' =~ m {
710                      (foo) # $^R correctly set
711                      (?{ "last regexp code result" })
712            }x, $message);
713            is($^R, 'last regexp code result', $message);
714        }
715        is($^R, 'Nothing', $message);
716
717        {
718            local $^R = "Bad";
719
720            ok('x foofoo y' =~ m {
721                      (?:foo|bar)+ # $^R correctly set
722                      (?{ "last regexp code result" })
723            }x, $message);
724            is($^R, 'last regexp code result', $message);
725        }
726        is($^R, 'Nothing', $message);
727
728        {
729            local $^R = "Bad";
730            ok('x foofoo y' =~ m {
731                      (foo|bar)\1+ # $^R undefined
732                      (?{ "last regexp code result" })
733            }x, $message);
734            is($^R, 'last regexp code result', $message);
735        }
736        is($^R, 'Nothing', $message);
737
738        {
739            local $^R = "Bad";
740            ok('x foofoo y' =~ m {
741                      (foo|bar)\1 # This time without the +
742                      (?{"last regexp code result"})
743            }x, $message);
744            is($^R, 'last regexp code result', $message);
745        }
746        is($^R, 'Nothing', $message);
747    }
748
749    {
750        my $message = 'Match is linear, not quadratic; Bug 22395';
751        our $count;
752        for my $l (10, 100, 1000) {
753            $count = 0;
754            ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
755            local $::TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
756            is($count, $l + 1, $message);
757        }
758    }
759
760    {
761        my $message = '@-/@+ should not have undefined values; Bug 22614';
762        local $_ = 'ab';
763        our @len = ();
764        /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
765        is("@len", "2 2 2", $message);
766    }
767
768    {
769        my $message = '$& set on s///; Bug 18209';
770        my $text = ' word1 word2 word3 word4 word5 word6 ';
771
772        my @words = ('word1', 'word3', 'word5');
773        my $count;
774        foreach my $word (@words) {
775            $text =~ s/$word\s//gi; # Leave a space to separate words
776                                    # in the resultant str.
777            # The following block is not working.
778            if ($&) {
779                $count ++;
780            }
781            # End bad block
782        }
783        is($count, 3, $message);
784        is($text, ' word2 word4 word6 ', $message);
785    }
786
787    {
788        # RT#6893
789
790        local $_ = qq (A\nB\nC\n);
791        my @res;
792        while (m#(\G|\n)([^\n]*)\n#gsx) {
793            push @res, "$2";
794            last if @res > 3;
795        }
796        is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893");
797    }
798
799    {
800        # No optimizer bug
801        my @tails  = ('', '(?(1))', '(|)', '()?');
802        my @quants = ('*','+');
803        my $doit = sub {
804            my $pats = shift;
805            for (@_) {
806                for my $pat (@$pats) {
807                    for my $quant (@quants) {
808                        for my $tail (@tails) {
809                            my $re = "($pat$quant\$)$tail";
810                            ok(/$re/  && $1 eq $_, "'$_' =~ /$re/; Bug 41010");
811                            ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010");
812                        }
813                    }
814                }
815            }
816        };
817
818        my @dpats = ('\d',
819                     '[1234567890]',
820                     '(1|[23]|4|[56]|[78]|[90])',
821                     '(?:1|[23]|4|[56]|[78]|[90])',
822                     '(1|2|3|4|5|6|7|8|9|0)',
823                     '(?:1|2|3|4|5|6|7|8|9|0)');
824        my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
825        my @sstrs = ('  ');
826        my @dstrs = ('12345');
827        $doit -> (\@spats, @sstrs);
828        $doit -> (\@dpats, @dstrs);
829    }
830
831    {
832        # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
833
834        my $utf_8 = "\xd6schel";
835        utf8::upgrade ($utf_8);
836        $utf_8 =~ m {(\xd6|&Ouml;)schel};
837        is($1, "\xd6", "Upgrade error; Bug 45605");
838    }
839
840    {
841        # Regardless of utf8ness any character matches itself when
842        # doing a case insensitive match. See also [perl #36207]
843
844        for my $o (0 .. 255) {
845            my @ch = (chr ($o), chr ($o));
846            utf8::upgrade ($ch [1]);
847            for my $u_str (0, 1) {
848                for my $u_pat (0, 1) {
849                    like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i,
850			 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
851                    like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i,
852			 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
853                }
854            }
855        }
856    }
857
858    {
859         my $message = '$REGMARK in replacement; Bug 49190';
860         our $REGMARK;
861         local $_ = "A";
862         ok(s/(*:B)A/$REGMARK/, $message);
863         is($_, "B", $message);
864         $_ = "CCCCBAA";
865         ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message);
866         is($_, "ZYX", $message);
867         # Use a longer name to force reallocation of $REGMARK.
868         $_ = "CCCCBAA";
869         ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message);
870         is($_, "ZYYYYYYYYYYYYYYYYX", $message);
871    }
872
873    {
874        my $message = 'Substitution evaluation in list context; Bug 52658';
875        my $reg = '../xxx/';
876        my @te  = ($reg =~ m{^(/?(?:\.\./)*)},
877                   $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
878        is($reg, '../bbb/', $message);
879        is($te [0], '../', $message);
880    }
881
882    {
883        my $a = "xyzt" x 8192;
884        like($a, qr/\A(?>[a-z])*\z/,
885	     '(?>) does not cause wrongness on long string; Bug 60034');
886        my $b = $a . chr 256;
887        chop $b;
888	is($a, $b, 'Bug 60034');
889        like($b, qr/\A(?>[a-z])*\z/,
890	     '(?>) does not cause wrongness on long string with UTF-8; Bug 60034');
891    }
892
893    #
894    # Keep the following tests last -- they may crash perl
895    #
896    print "# Tests that follow may crash perl\n";
897    {
898
899        my $message = 'Pattern in a loop, failure should not ' .
900                         'affect previous success; Bug 19049/38869';
901        my @list = (
902            'ab cdef',             # Matches regex
903            ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
904        );
905        my $y;
906        my $x;
907        foreach (@list) {
908            m/ab(.+)cd/i; # The ignore-case seems to be important
909            $y = $1;      # Use $1, which might not be from the last match!
910            $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
911        }
912        is($y, ' ', $message);
913        is($x, 'ab cd', $message);
914    }
915
916    SKIP: {
917        skip("Can run out of memory on os390", 1) if $^O eq 'os390';
918        ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274");
919    }
920    {
921        ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
922            "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274");
923    }
924
925    {
926        # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
927
928        local ${^UTF8CACHE} = -1;
929        my $message = "Shouldn't panic; Bug 45337";
930        my $s = "[a]a{2}";
931        utf8::upgrade $s;
932        like("aaa", qr/$s/, $message);
933    }
934    {
935	my $message = "Check if tree logic breaks \$^R; Bug 57042";
936	my $cond_re = qr/\s*
937	    \s* (?:
938		   \( \s* A  (?{1})
939		 | \( \s* B  (?{2})
940	       )
941	   /x;
942	my @res;
943	for my $line ("(A)","(B)") {
944	   if ($line =~ m/$cond_re/) {
945	       push @res, $^R ? "#$^R" : "UNDEF";
946	   }
947	}
948	is("@res","#1 #2", $message);
949    }
950    {
951	no warnings 'closure';
952	my $re = qr/A(??{"1"})/;
953	ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
954	ok $1 eq "A1";
955	ok $2 eq "B";
956    }
957
958    # This only works under -DEBUGGING because it relies on an assert().
959    {
960	# Check capture offset re-entrancy of utf8 code.
961
962        sub fswash { $_[0] =~ s/([>X])//g; }
963
964        my $k1 = "." x 4 . ">>";
965        fswash($k1);
966
967        my $k2 = "\x{f1}\x{2022}";
968        $k2 =~ s/([\360-\362])/>/g;
969        fswash($k2);
970
971        is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508");
972    }
973
974    {
975	# minimal CURLYM limited to 32767 matches
976	my @pat = (
977	    qr{a(x|y)*b},	# CURLYM
978	    qr{a(x|y)*?b},	# .. with minmod
979	    qr{a([wx]|[yz])*b},	# .. and without tries
980	    qr{a([wx]|[yz])*?b},
981	);
982	my $len = 32768;
983	my $s = join '', 'a', 'x' x $len, 'b';
984	for my $pat (@pat) {
985	    like($s, $pat, "$pat; Bug 65372");
986	}
987    }
988
989    {
990        local $::TODO = "[perl #38133]";
991
992        "A" =~ /(((?:A))?)+/;
993        my $first = $2;
994
995        "A" =~ /(((A))?)+/;
996        my $second = $2;
997
998        is($first, $second);
999    }
1000
1001    {
1002       my $message
1003        = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998';
1004
1005       # Catch warnings:
1006       my $w;
1007       local $SIG{__WARN__} = sub { $w .= shift };
1008
1009       # This bug can be reduced to
1010       qq{\x{30ab}} =~ /\xab|\xa9/;
1011       # but it's nice to have a more 'real-world' test. The original test
1012       # case from the RT ticket follows:
1013
1014       my %conv = (
1015                   "\xab"     => "&lt;",
1016                   "\xa9"     => "(c)",
1017                  );
1018       my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')';
1019       $conv_rx = qr{$conv_rx};
1020
1021       my $x
1022        = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}}
1023        . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}}
1024        . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}}
1025        . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}}
1026        . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}}
1027        . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}}
1028        . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}}
1029        . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}};
1030
1031       $x =~ s{$conv_rx}{$conv{$1}}eg;
1032
1033       is($w, undef, $message);
1034    }
1035
1036    {
1037        # minimal CURLYM limited to 32767 matches
1038
1039        is(join("-", "   abc   def  " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f",
1040	   'stclass optimisation does not break + inside (?=); Bug 68564');
1041    }
1042
1043    {
1044        use charnames ":full";
1045        # Delayed interpolation of \N'
1046        my $r1 = qr/\N{THAI CHARACTER SARA I}/;
1047        my $r2 = qr'\N{THAI CHARACTER SARA I}';
1048        my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
1049
1050        # Bug #56444
1051        ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
1052        ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\'';
1053
1054        # Bug #62056
1055        ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
1056
1057        ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
1058        ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"';
1059        ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
1060        ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"';
1061    }
1062
1063    {
1064        use charnames ":full";
1065        my $message = '[perl #74982] Period coming after \N{}';
1066        ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message);
1067        ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message);
1068        ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message);
1069        ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message);
1070    }
1071
1072SKIP: {
1073    ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org
1074
1075    skip('Perl configured without Encode module', 1)
1076	unless $Config{extensions} =~ / Encode /;
1077
1078    # Test case cut down by jhi
1079    fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, {}, 'Segfault using HTML::Entities');
1080use Encode;
1081my $t = ord('A') == 193 ? "\xEA" : "\xE9";
1082Encode::_utf8_on($t);
1083$t =~ s/([^a])//ge;
1084EOP
1085    }
1086
1087    {
1088        # pattern must be compiled late or we can break the test file
1089        my $message = '[perl #115050] repeated nothings in a trie can cause panic';
1090        my $pattern;
1091        $pattern = '[xyz]|||';
1092        ok("blah blah" =~ /$pattern/, $message);
1093        ok("blah blah" =~ /(?:$pattern)h/, $message);
1094        $pattern = '|||[xyz]';
1095        ok("blah blah" =~ /$pattern/, $message);
1096        ok("blah blah" =~ /(?:$pattern)h/, $message);
1097    }
1098
1099    {
1100        # [perl #4289] First mention $& after a match
1101	local $::TODO = "these tests fail without Copy-on-Write enabled"
1102	    if $Config{ccflags} =~ /PERL_NO_COW/;
1103        fresh_perl_is(
1104            '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"',
1105            "b\n", {}, '$& first mentioned after match');
1106        fresh_perl_is(
1107            '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"',
1108            "a\n", {}, '$` first mentioned after match');
1109        fresh_perl_is(
1110            '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"',
1111            "c\n", {}, '$\' first mentioned after match');
1112    }
1113
1114    {
1115	# [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t
1116	# this tests some related failures
1117	#
1118	# The tests in the block *only* fail when run on 32-bit systems
1119	# with a malloc that allocates above the 2GB line.  On the system
1120	# in the report above that only happened in a thread.
1121	my $s = "\x{1ff}" . "f" x 32;
1122	ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap");
1123    }
1124
1125    {
1126        # RT #129012 heap-buffer-overflow Perl_fbm_instr.
1127        # This test is unlikely to not pass, but it used to fail
1128        # ASAN/valgrind
1129
1130        my $s ="\x{100}0000000";
1131        ok($s !~ /00000?\x80\x80\x80/, "RT #129012");
1132    }
1133
1134    {
1135        # RT #129085 heap-buffer-overflow Perl_re_intuit_start
1136        # this did fail under ASAN, but didn't under valgrind
1137        my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}";
1138        ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085");
1139    }
1140    {
1141        # rt
1142        fresh_perl_is(
1143            'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"',
1144            "ok", {},  'RT #130561 - allowing impossible quantifier should not cause SEGVs');
1145        my $s= "foo";
1146        no warnings 'regexp';
1147        ok($s=~/(foo){1,0}|(?1)/,
1148            "RT #130561 - allowing impossible quantifier should not break recursion");
1149    }
1150	{
1151		# RT #133892 Coredump in Perl_re_intuit_start
1152		# Second match flips to checking floating substring before fixed
1153		# substring, which triggers a pathway that failed to check there
1154		# was a non-utf8 version of the string before trying to use it
1155		# resulting in a SEGV.
1156		my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 };
1157		ok($result == 0);
1158	}
1159
1160} # End of sub run_tests
1161
11621;
1163