1-- 2-- Regular expression tests 3-- 4 5-- Don't want to have to double backslashes in regexes 6set standard_conforming_strings = on; 7 8-- Test simple quantified backrefs 9select 'bbbbb' ~ '^([bc])\1*$' as t; 10select 'ccc' ~ '^([bc])\1*$' as t; 11select 'xxx' ~ '^([bc])\1*$' as f; 12select 'bbc' ~ '^([bc])\1*$' as f; 13select 'b' ~ '^([bc])\1*$' as t; 14 15-- Test quantified backref within a larger expression 16select 'abc abc abc' ~ '^(\w+)( \1)+$' as t; 17select 'abc abd abc' ~ '^(\w+)( \1)+$' as f; 18select 'abc abc abd' ~ '^(\w+)( \1)+$' as f; 19select 'abc abc abc' ~ '^(.+)( \1)+$' as t; 20select 'abc abd abc' ~ '^(.+)( \1)+$' as f; 21select 'abc abc abd' ~ '^(.+)( \1)+$' as f; 22 23-- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun 24select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)'); 25select substring('a' from '((a))+'); 26select substring('a' from '((a)+)'); 27 28-- Test lookahead constraints 29select regexp_matches('ab', 'a(?=b)b*'); 30select regexp_matches('a', 'a(?=b)b*'); 31select regexp_matches('abc', 'a(?=b)b*(?=c)c*'); 32select regexp_matches('ab', 'a(?=b)b*(?=c)c*'); 33select regexp_matches('ab', 'a(?!b)b*'); 34select regexp_matches('a', 'a(?!b)b*'); 35select regexp_matches('b', '(?=b)b'); 36select regexp_matches('a', '(?=b)b'); 37 38-- Test lookbehind constraints 39select regexp_matches('abb', '(?<=a)b*'); 40select regexp_matches('a', 'a(?<=a)b*'); 41select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*'); 42select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*'); 43select regexp_matches('ab', 'a*(?<!a)b*'); 44select regexp_matches('ab', 'a*(?<!a)b+'); 45select regexp_matches('b', 'a*(?<!a)b+'); 46select regexp_matches('a', 'a(?<!a)b*'); 47select regexp_matches('b', '(?<=b)b'); 48select regexp_matches('foobar', '(?<=f)b+'); 49select regexp_matches('foobar', '(?<=foo)b+'); 50select regexp_matches('foobar', '(?<=oo)b+'); 51 52-- Test optimization of single-chr-or-bracket-expression lookaround constraints 53select 'xz' ~ 'x(?=[xy])'; 54select 'xy' ~ 'x(?=[xy])'; 55select 'xz' ~ 'x(?![xy])'; 56select 'xy' ~ 'x(?![xy])'; 57select 'x' ~ 'x(?![xy])'; 58select 'xyy' ~ '(?<=[xy])yy+'; 59select 'zyy' ~ '(?<=[xy])yy+'; 60select 'xyy' ~ '(?<![xy])yy+'; 61select 'zyy' ~ '(?<![xy])yy+'; 62 63-- Test conversion of regex patterns to indexable conditions 64explain (costs off) select * from pg_proc where proname ~ 'abc'; 65explain (costs off) select * from pg_proc where proname ~ '^abc'; 66explain (costs off) select * from pg_proc where proname ~ '^abc$'; 67explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; 68explain (costs off) select * from pg_proc where proname ~ '^abc+d'; 69explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; 70explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; 71explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; 72explain (costs off) select * from pg_proc where proname ~ '^abcd(x|(?=\w\w)q)'; 73 74-- Test for infinite loop in pullback() (CVE-2007-4772) 75select 'a' ~ '($|^)*'; 76 77-- These cases expose a bug in the original fix for CVE-2007-4772 78select 'a' ~ '(^)+^'; 79select 'a' ~ '$($$)+'; 80 81-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix 82select 'a' ~ '($^)+'; 83select 'a' ~ '(^$)*'; 84select 'aa bb cc' ~ '(^(?!aa))+'; 85select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 86select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 87select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 88select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+'; 89 90-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683) 91select 'a' ~ '((((((a)*)*)*)*)*)*'; 92select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)'; 93 94-- These cases used to give too-many-states failures 95select 'x' ~ 'abcd(\m)+xyz'; 96select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)'; 97select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$'; 98select 'x' ~ 'xyz(\Y\Y)+'; 99select 'x' ~ 'x|(?:\M)+'; 100 101-- This generates O(N) states but O(N^2) arcs, so it causes problems 102-- if arc count is not constrained 103select 'x' ~ repeat('x*y*z*', 1000); 104 105-- Test backref in combination with non-greedy quantifier 106-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0 107select 'Programmer' ~ '(\w).*?\1' as t; 108select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); 109 110-- Test for proper matching of non-greedy iteration (bug #11478) 111select regexp_matches('foo/bar/baz', 112 '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); 113 114-- Test that greediness can be overridden by outer quantifier 115select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$'); 116select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$'); 117select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$'); 118select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$'); 119select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$'); 120select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$'); 121select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$'); 122select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$'); 123 124-- Test for infinite loop in cfindloop with zero-length possible match 125-- but no actual match (can only happen in the presence of backrefs) 126select 'a' ~ '$()|^\1'; 127select 'a' ~ '.. ()|\1'; 128select 'a' ~ '()*\1'; 129select 'a' ~ '()+\1'; 130 131-- Test incorrect removal of capture groups within {0} 132select 'xxx' ~ '(.){0}(\1)' as f; 133select 'xxx' ~ '((.)){0}(\2)' as f; 134select 'xyz' ~ '((.)){0}(\2){0}' as t; 135 136-- Test ancient oversight in when to apply zaptreesubs 137select 'abcdef' ~ '^(.)\1|\1.' as f; 138select 'abadef' ~ '^((.)\2|..)\2' as f; 139 140-- Error conditions 141select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs 142select 'xyz' ~ 'x(\w)(?=(\1))'; 143select 'a' ~ '\x7fffffff'; -- invalid chr code 144