xref: /openbsd/gnu/usr.bin/perl/t/re/pat_psycho.t (revision 3cab2bb3)
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t.  If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6#
7# this file includes test that my burn a lot of CPU or otherwise be heavy
8# on resources. Set env var $PERL_SKIP_PSYCHO_TEST to skip this file
9
10use strict;
11use warnings;
12use 5.010;
13
14
15sub run_tests;
16
17$| = 1;
18
19
20BEGIN {
21    chdir 't' if -d 't';
22    require './test.pl';
23    set_up_inc('../lib', '.');
24    if ($^O eq 'dec_osf') {
25        skip_all("$^O cannot handle this test");
26    }
27    my $time_out_factor = $ENV{PERL_TEST_TIME_OUT_FACTOR} || 1;
28    $time_out_factor = 1 if $time_out_factor < 1;
29
30    watchdog(5 * 60 * $time_out_factor);
31}
32
33
34skip_all('$PERL_SKIP_PSYCHO_TEST set') if $ENV{PERL_SKIP_PSYCHO_TEST};
35
36plan tests => 15;  # Update this when adding/deleting tests.
37
38run_tests() unless caller;
39
40#
41# Tests start here.
42#
43sub run_tests {
44    print "# Set PERL_SKIP_PSYCHO_TEST to skip these tests\n";
45
46    {
47
48	# stress test tries
49
50        my @normal = qw [the are some normal words];
51
52        local $" = "|";
53
54	note "setting up trie psycho vars ...";
55        my @psycho = (@normal, map chr $_, 255 .. 20000);
56        my $psycho1 = "@psycho";
57        for (my $i = @psycho; -- $i;) {
58            my $j = int rand (1 + $i);
59            @psycho [$i, $j] = @psycho [$j, $i];
60        }
61        my $psycho2 = "@psycho";
62
63        foreach my $word (@normal) {
64            ok $word =~ /($psycho1)/ && $1 eq $word, qq{"$word" =~ /\$psycho1/};
65            ok $word =~ /($psycho2)/ && $1 eq $word, qq{"$word" =~ /\$psycho1/};
66        }
67    }
68
69
70    {
71        # stress test CURLYX/WHILEM.
72        #
73        # This test includes varying levels of nesting, and according to
74        # profiling done against build 28905, exercises every code line in the
75        # CURLYX and WHILEM blocks, except those related to LONGJMP, the
76        # super-linear cache and warnings. It executes about 0.5M regexes
77
78        no warnings 'regexp';   # Silence "has useless greediness modifier"
79        my $r = qr/^
80                    (?:
81                        ( (?:a|z+)+ )
82                        (?:
83                            ( (?:b|z+){3,}? )
84                            (
85                                (?:
86                                    (?:
87                                        (?:c|z+){1,1}?z
88                                    )?
89                                    (?:c|z+){1,1}
90                                )*
91                            )
92                            (?:z*){2,}
93                            ( (?:z+|d)+ )
94                            (?:
95                                ( (?:e|z+)+ )
96                            )*
97                            ( (?:f|z+)+ )
98                        )*
99                        ( (?:z+|g)+ )
100                        (?:
101                            ( (?:h|z+)+ )
102                        )*
103                        ( (?:i|z+)+ )
104                    )+
105                    ( (?:j|z+)+ )
106                    (?:
107                        ( (?:k|z+)+ )
108                    )*
109                    ( (?:l|z+)+ )
110              $/x;
111        use warnings 'regexp';
112
113        my $ok = 1;
114        my $msg = "CURLYX stress test";
115        OUTER:
116          for my $a ("x","a","aa") {
117            for my $b ("x","bbb","bbbb") {
118              my $bs = $a.$b;
119              for my $c ("x","c","cc") {
120                my $cs = $bs.$c;
121                for my $d ("x","d","dd") {
122                  my $ds = $cs.$d;
123                  for my $e ("x","e","ee") {
124                    my $es = $ds.$e;
125                    for my $f ("x","f","ff") {
126                      my $fs = $es.$f;
127                      for my $g ("x","g","gg") {
128                        my $gs = $fs.$g;
129                        for my $h ("x","h","hh") {
130                          my $hs = $gs.$h;
131                          for my $i ("x","i","ii") {
132                            my $is = $hs.$i;
133                            for my $j ("x","j","jj") {
134                              my $js = $is.$j;
135                              for my $k ("x","k","kk") {
136                                my $ks = $js.$k;
137                                for my $l ("x","l","ll") {
138                                  my $ls = $ks.$l;
139                                  if ($ls =~ $r) {
140                                    if ($ls =~ /x/) {
141                                      $msg .= ": unexpected match for [$ls]";
142                                      $ok = 0;
143                                      last OUTER;
144                                    }
145                                    my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12";
146                                    unless ($ls eq $cap) {
147                                      $msg .= ": capture: [$ls], got [$cap]";
148                                      $ok = 0;
149                                      last OUTER;
150                                    }
151                                  }
152                                  else {
153                                    unless ($ls =~ /x/) {
154                                      $msg = ": failed for [$ls]";
155                                      $ok = 0;
156                                      last OUTER;
157                                    }
158                                  }
159                                }
160                              }
161                            }
162                          }
163                        }
164                      }
165                    }
166                  }
167                }
168              }
169            }
170        }
171        ok($ok, $msg);
172    }
173
174
175    {
176	# these bits of test code used to run quadratically. If we break
177	# anything, they'll start to take minutes to run, rather than
178	# seconds. We don't actually measure times or set alarms, since
179	# that tends to be very fragile and prone to false positives.
180	# Instead, just hope that if someone is messing with
181	# performance-related code, they'll re-run the test suite and
182	# notice it suddenly takes a lot longer.
183
184	my $x;
185
186	$x = 'x' x 1_000_000;
187	1 while $x =~ /(.)/g;
188	pass "ascii =~ /(.)/";
189
190	{
191	    local ${^UTF8CACHE} = 1; # defeat debugging
192	    $x = "\x{100}" x 1_000_000;
193	    1 while $x =~ /(.)/g;
194	    pass "utf8 =~ /(.)/";
195	}
196
197	# run these in separate processes, since they set $&
198
199        fresh_perl_is(<<'EOF', "ok\n", {}, 'ascii =~ /(.)/, mention $&');
200$&;
201$x = 'x' x 1_000_000;
2021 while $x =~ /(.)/g;
203print "ok\n";
204EOF
205
206        fresh_perl_is(<<'EOF', "ok\n", {}, 'utf8 =~ /(.)/, mention $&');
207$&;
208local ${^UTF8CACHE} = 1; # defeat debugging
209$x = "\x{100}" x 1_000_000;
2101 while $x =~ /(.)/g;
211print "ok\n";
212EOF
213
214
215    }
216} # End of sub run_tests
217
2181;
219