1-- 2-- Regular expression tests 3-- 4 5-- Don't want to have to double backslashes in regexes 6set standard_conforming_strings = on; 7 8-- Test simple quantified backrefs 9select 'bbbbb' ~ '^([bc])\1*$' as t; 10select 'ccc' ~ '^([bc])\1*$' as t; 11select 'xxx' ~ '^([bc])\1*$' as f; 12select 'bbc' ~ '^([bc])\1*$' as f; 13select 'b' ~ '^([bc])\1*$' as t; 14 15-- Test quantified backref within a larger expression 16select 'abc abc abc' ~ '^(\w+)( \1)+$' as t; 17select 'abc abd abc' ~ '^(\w+)( \1)+$' as f; 18select 'abc abc abd' ~ '^(\w+)( \1)+$' as f; 19select 'abc abc abc' ~ '^(.+)( \1)+$' as t; 20select 'abc abd abc' ~ '^(.+)( \1)+$' as f; 21select 'abc abc abd' ~ '^(.+)( \1)+$' as f; 22 23-- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun 24select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)'); 25select substring('a' from '((a))+'); 26select substring('a' from '((a)+)'); 27 28-- Test regexp_match() 29select regexp_match('abc', ''); 30select regexp_match('abc', 'bc'); 31select regexp_match('abc', 'd') is null; 32select regexp_match('abc', '(B)(c)', 'i'); 33select regexp_match('abc', 'Bd', 'ig'); -- error 34 35-- Test lookahead constraints 36select regexp_matches('ab', 'a(?=b)b*'); 37select regexp_matches('a', 'a(?=b)b*'); 38select regexp_matches('abc', 'a(?=b)b*(?=c)c*'); 39select regexp_matches('ab', 'a(?=b)b*(?=c)c*'); 40select regexp_matches('ab', 'a(?!b)b*'); 41select regexp_matches('a', 'a(?!b)b*'); 42select regexp_matches('b', '(?=b)b'); 43select regexp_matches('a', '(?=b)b'); 44 45-- Test lookbehind constraints 46select regexp_matches('abb', '(?<=a)b*'); 47select regexp_matches('a', 'a(?<=a)b*'); 48select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*'); 49select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*'); 50select regexp_matches('ab', 'a*(?<!a)b*'); 51select regexp_matches('ab', 'a*(?<!a)b+'); 52select regexp_matches('b', 'a*(?<!a)b+'); 53select regexp_matches('a', 'a(?<!a)b*'); 54select regexp_matches('b', '(?<=b)b'); 55select regexp_matches('foobar', '(?<=f)b+'); 56select regexp_matches('foobar', '(?<=foo)b+'); 57select regexp_matches('foobar', '(?<=oo)b+'); 58 59-- Test optimization of single-chr-or-bracket-expression lookaround constraints 60select 'xz' ~ 'x(?=[xy])'; 61select 'xy' ~ 'x(?=[xy])'; 62select 'xz' ~ 'x(?![xy])'; 63select 'xy' ~ 'x(?![xy])'; 64select 'x' ~ 'x(?![xy])'; 65select 'xyy' ~ '(?<=[xy])yy+'; 66select 'zyy' ~ '(?<=[xy])yy+'; 67select 'xyy' ~ '(?<![xy])yy+'; 68select 'zyy' ~ '(?<![xy])yy+'; 69 70-- Test conversion of regex patterns to indexable conditions 71explain (costs off) select * from pg_proc where proname ~ 'abc'; 72explain (costs off) select * from pg_proc where proname ~ '^abc'; 73explain (costs off) select * from pg_proc where proname ~ '^abc$'; 74explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; 75explain (costs off) select * from pg_proc where proname ~ '^abc+d'; 76explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; 77explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; 78explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; 79explain (costs off) select * from pg_proc where proname ~ '^abcd(x|(?=\w\w)q)'; 80 81-- Test for infinite loop in pullback() (CVE-2007-4772) 82select 'a' ~ '($|^)*'; 83 84-- These cases expose a bug in the original fix for CVE-2007-4772 85select 'a' ~ '(^)+^'; 86select 'a' ~ '$($$)+'; 87 88-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix 89select 'a' ~ '($^)+'; 90select 'a' ~ '(^$)*'; 91select 'aa bb cc' ~ '(^(?!aa))+'; 92select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 93select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 94select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 95select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 96 97-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683) 98select 'a' ~ '((((((a)*)*)*)*)*)*'; 99select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)'; 100 101-- These cases used to give too-many-states failures 102select 'x' ~ 'abcd(\m)+xyz'; 103select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)'; 104select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$'; 105select 'x' ~ 'xyz(\Y\Y)+'; 106select 'x' ~ 'x|(?:\M)+'; 107 108-- This generates O(N) states but O(N^2) arcs, so it causes problems 109-- if arc count is not constrained 110select 'x' ~ repeat('x*y*z*', 1000); 111 112-- Test backref in combination with non-greedy quantifier 113-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0 114select 'Programmer' ~ '(\w).*?\1' as t; 115select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); 116 117-- Test for proper matching of non-greedy iteration (bug #11478) 118select regexp_matches('foo/bar/baz', 119 '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); 120 121-- Test that greediness can be overridden by outer quantifier 122select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$'); 123select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$'); 124select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$'); 125select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$'); 126select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$'); 127select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$'); 128select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$'); 129select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$'); 130 131-- Test for infinite loop in cfindloop with zero-length possible match 132-- but no actual match (can only happen in the presence of backrefs) 133select 'a' ~ '$()|^\1'; 134select 'a' ~ '.. ()|\1'; 135select 'a' ~ '()*\1'; 136select 'a' ~ '()+\1'; 137 138-- Test incorrect removal of capture groups within {0} 139select 'xxx' ~ '(.){0}(\1)' as f; 140select 'xxx' ~ '((.)){0}(\2)' as f; 141select 'xyz' ~ '((.)){0}(\2){0}' as t; 142 143-- Test ancient oversight in when to apply zaptreesubs 144select 'abcdef' ~ '^(.)\1|\1.' as f; 145select 'abadef' ~ '^((.)\2|..)\2' as f; 146 147-- Add coverage for some cases in checkmatchall 148select regexp_match('xy', '.|...'); 149select regexp_match('xyz', '.|...'); 150select regexp_match('xy', '.*'); 151select regexp_match('fooba', '(?:..)*'); 152select regexp_match('xyz', repeat('.', 260)); 153select regexp_match('foo', '(?:.|){99}'); 154 155-- Error conditions 156select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs 157select 'xyz' ~ 'x(\w)(?=(\1))'; 158select 'a' ~ '\x7fffffff'; -- invalid chr code 159