1--
2-- Regular expression tests
3--
4
5-- Don't want to have to double backslashes in regexes
6set standard_conforming_strings = on;
7
8-- Test simple quantified backrefs
9select 'bbbbb' ~ '^([bc])\1*$' as t;
10select 'ccc' ~ '^([bc])\1*$' as t;
11select 'xxx' ~ '^([bc])\1*$' as f;
12select 'bbc' ~ '^([bc])\1*$' as f;
13select 'b' ~ '^([bc])\1*$' as t;
14
15-- Test quantified backref within a larger expression
16select 'abc abc abc' ~ '^(\w+)( \1)+$' as t;
17select 'abc abd abc' ~ '^(\w+)( \1)+$' as f;
18select 'abc abc abd' ~ '^(\w+)( \1)+$' as f;
19select 'abc abc abc' ~ '^(.+)( \1)+$' as t;
20select 'abc abd abc' ~ '^(.+)( \1)+$' as f;
21select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
22
23-- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun
24select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
25select substring('a' from '((a))+');
26select substring('a' from '((a)+)');
27
28-- Test regexp_match()
29select regexp_match('abc', '');
30select regexp_match('abc', 'bc');
31select regexp_match('abc', 'd') is null;
32select regexp_match('abc', '(B)(c)', 'i');
33select regexp_match('abc', 'Bd', 'ig'); -- error
34
35-- Test lookahead constraints
36select regexp_matches('ab', 'a(?=b)b*');
37select regexp_matches('a', 'a(?=b)b*');
38select regexp_matches('abc', 'a(?=b)b*(?=c)c*');
39select regexp_matches('ab', 'a(?=b)b*(?=c)c*');
40select regexp_matches('ab', 'a(?!b)b*');
41select regexp_matches('a', 'a(?!b)b*');
42select regexp_matches('b', '(?=b)b');
43select regexp_matches('a', '(?=b)b');
44
45-- Test lookbehind constraints
46select regexp_matches('abb', '(?<=a)b*');
47select regexp_matches('a', 'a(?<=a)b*');
48select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*');
49select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*');
50select regexp_matches('ab', 'a*(?<!a)b*');
51select regexp_matches('ab', 'a*(?<!a)b+');
52select regexp_matches('b', 'a*(?<!a)b+');
53select regexp_matches('a', 'a(?<!a)b*');
54select regexp_matches('b', '(?<=b)b');
55select regexp_matches('foobar', '(?<=f)b+');
56select regexp_matches('foobar', '(?<=foo)b+');
57select regexp_matches('foobar', '(?<=oo)b+');
58
59-- Test optimization of single-chr-or-bracket-expression lookaround constraints
60select 'xz' ~ 'x(?=[xy])';
61select 'xy' ~ 'x(?=[xy])';
62select 'xz' ~ 'x(?![xy])';
63select 'xy' ~ 'x(?![xy])';
64select 'x'  ~ 'x(?![xy])';
65select 'xyy' ~ '(?<=[xy])yy+';
66select 'zyy' ~ '(?<=[xy])yy+';
67select 'xyy' ~ '(?<![xy])yy+';
68select 'zyy' ~ '(?<![xy])yy+';
69
70-- Test conversion of regex patterns to indexable conditions
71explain (costs off) select * from pg_proc where proname ~ 'abc';
72explain (costs off) select * from pg_proc where proname ~ '^abc';
73explain (costs off) select * from pg_proc where proname ~ '^abc$';
74explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
75explain (costs off) select * from pg_proc where proname ~ '^abc+d';
76explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
77explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
78explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
79explain (costs off) select * from pg_proc where proname ~ '^abcd(x|(?=\w\w)q)';
80
81-- Test for infinite loop in pullback() (CVE-2007-4772)
82select 'a' ~ '($|^)*';
83
84-- These cases expose a bug in the original fix for CVE-2007-4772
85select 'a' ~ '(^)+^';
86select 'a' ~ '$($$)+';
87
88-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix
89select 'a' ~ '($^)+';
90select 'a' ~ '(^$)*';
91select 'aa bb cc' ~ '(^(?!aa))+';
92select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+';
93select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+';
94select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+';
95select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+';
96
97-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
98select 'a' ~ '((((((a)*)*)*)*)*)*';
99select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
100
101-- These cases used to give too-many-states failures
102select 'x' ~ 'abcd(\m)+xyz';
103select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)';
104select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$';
105select 'x' ~ 'xyz(\Y\Y)+';
106select 'x' ~ 'x|(?:\M)+';
107
108-- This generates O(N) states but O(N^2) arcs, so it causes problems
109-- if arc count is not constrained
110select 'x' ~ repeat('x*y*z*', 1000);
111
112-- Test backref in combination with non-greedy quantifier
113-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
114select 'Programmer' ~ '(\w).*?\1' as t;
115select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
116
117-- Test for proper matching of non-greedy iteration (bug #11478)
118select regexp_matches('foo/bar/baz',
119                      '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', '');
120
121-- Test that greediness can be overridden by outer quantifier
122select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$');
123select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$');
124select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$');
125select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$');
126select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$');
127select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$');
128select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$');
129select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$');
130
131-- Test for infinite loop in cfindloop with zero-length possible match
132-- but no actual match (can only happen in the presence of backrefs)
133select 'a' ~ '$()|^\1';
134select 'a' ~ '.. ()|\1';
135select 'a' ~ '()*\1';
136select 'a' ~ '()+\1';
137
138-- Test incorrect removal of capture groups within {0}
139select 'xxx' ~ '(.){0}(\1)' as f;
140select 'xxx' ~ '((.)){0}(\2)' as f;
141select 'xyz' ~ '((.)){0}(\2){0}' as t;
142
143-- Test ancient oversight in when to apply zaptreesubs
144select 'abcdef' ~ '^(.)\1|\1.' as f;
145select 'abadef' ~ '^((.)\2|..)\2' as f;
146
147-- Add coverage for some cases in checkmatchall
148select regexp_match('xy', '.|...');
149select regexp_match('xyz', '.|...');
150select regexp_match('xy', '.*');
151select regexp_match('fooba', '(?:..)*');
152select regexp_match('xyz', repeat('.', 260));
153select regexp_match('foo', '(?:.|){99}');
154
155-- Error conditions
156select 'xyz' ~ 'x(\w)(?=\1)';  -- no backrefs in LACONs
157select 'xyz' ~ 'x(\w)(?=(\1))';
158select 'a' ~ '\x7fffffff';  -- invalid chr code
159