-- -- Regular expression tests -- -- Don't want to have to double backslashes in regexes set standard_conforming_strings = on; -- Test simple quantified backrefs select 'bbbbb' ~ '^([bc])\1*$' as t; t --- t (1 row) select 'ccc' ~ '^([bc])\1*$' as t; t --- t (1 row) select 'xxx' ~ '^([bc])\1*$' as f; f --- f (1 row) select 'bbc' ~ '^([bc])\1*$' as f; f --- f (1 row) select 'b' ~ '^([bc])\1*$' as t; t --- t (1 row) -- Test quantified backref within a larger expression select 'abc abc abc' ~ '^(\w+)( \1)+$' as t; t --- t (1 row) select 'abc abd abc' ~ '^(\w+)( \1)+$' as f; f --- f (1 row) select 'abc abc abd' ~ '^(\w+)( \1)+$' as f; f --- f (1 row) select 'abc abc abc' ~ '^(.+)( \1)+$' as t; t --- t (1 row) select 'abc abd abc' ~ '^(.+)( \1)+$' as f; f --- f (1 row) select 'abc abc abd' ~ '^(.+)( \1)+$' as f; f --- f (1 row) -- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)'); substring ----------- foo (1 row) select substring('a' from '((a))+'); substring ----------- a (1 row) select substring('a' from '((a)+)'); substring ----------- a (1 row) -- Test regexp_match() select regexp_match('abc', ''); regexp_match -------------- {""} (1 row) select regexp_match('abc', 'bc'); regexp_match -------------- {bc} (1 row) select regexp_match('abc', 'd') is null; ?column? ---------- t (1 row) select regexp_match('abc', '(B)(c)', 'i'); regexp_match -------------- {b,c} (1 row) select regexp_match('abc', 'Bd', 'ig'); -- error ERROR: regexp_match() does not support the "global" option HINT: Use the regexp_matches function instead. -- Test lookahead constraints select regexp_matches('ab', 'a(?=b)b*'); regexp_matches ---------------- {ab} (1 row) select regexp_matches('a', 'a(?=b)b*'); regexp_matches ---------------- (0 rows) select regexp_matches('abc', 'a(?=b)b*(?=c)c*'); regexp_matches ---------------- {abc} (1 row) select regexp_matches('ab', 'a(?=b)b*(?=c)c*'); regexp_matches ---------------- (0 rows) select regexp_matches('ab', 'a(?!b)b*'); regexp_matches ---------------- (0 rows) select regexp_matches('a', 'a(?!b)b*'); regexp_matches ---------------- {a} (1 row) select regexp_matches('b', '(?=b)b'); regexp_matches ---------------- {b} (1 row) select regexp_matches('a', '(?=b)b'); regexp_matches ---------------- (0 rows) -- Test lookbehind constraints select regexp_matches('abb', '(?<=a)b*'); regexp_matches ---------------- {bb} (1 row) select regexp_matches('a', 'a(?<=a)b*'); regexp_matches ---------------- {a} (1 row) select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*'); regexp_matches ---------------- {abc} (1 row) select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*'); regexp_matches ---------------- {ab} (1 row) select regexp_matches('ab', 'a*(?= 'abc'::text) AND (proname < 'abd'::text)) Filter: (proname ~ '^abc'::text) (3 rows) explain (costs off) select * from pg_proc where proname ~ '^abc$'; QUERY PLAN ------------------------------------------------------------ Index Scan using pg_proc_proname_args_nsp_index on pg_proc Index Cond: (proname = 'abc'::text) Filter: (proname ~ '^abc$'::text) (3 rows) explain (costs off) select * from pg_proc where proname ~ '^abcd*e'; QUERY PLAN ---------------------------------------------------------------------- Index Scan using pg_proc_proname_args_nsp_index on pg_proc Index Cond: ((proname >= 'abc'::text) AND (proname < 'abd'::text)) Filter: (proname ~ '^abcd*e'::text) (3 rows) explain (costs off) select * from pg_proc where proname ~ '^abc+d'; QUERY PLAN ---------------------------------------------------------------------- Index Scan using pg_proc_proname_args_nsp_index on pg_proc Index Cond: ((proname >= 'abc'::text) AND (proname < 'abd'::text)) Filter: (proname ~ '^abc+d'::text) (3 rows) explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)'; QUERY PLAN ---------------------------------------------------------------------------- Index Scan using pg_proc_proname_args_nsp_index on pg_proc Index Cond: ((proname >= 'abcdef'::text) AND (proname < 'abcdeg'::text)) Filter: (proname ~ '^(abc)(def)'::text) (3 rows) explain (costs off) select * from pg_proc where proname ~ '^(abc)$'; QUERY PLAN ------------------------------------------------------------ Index Scan using pg_proc_proname_args_nsp_index on pg_proc Index Cond: (proname = 'abc'::text) Filter: (proname ~ '^(abc)$'::text) (3 rows) explain (costs off) select * from pg_proc where proname ~ '^(abc)?d'; QUERY PLAN ---------------------------------------- Seq Scan on pg_proc Filter: (proname ~ '^(abc)?d'::text) (2 rows) explain (costs off) select * from pg_proc where proname ~ '^abcd(x|(?=\w\w)q)'; QUERY PLAN ------------------------------------------------------------------------ Index Scan using pg_proc_proname_args_nsp_index on pg_proc Index Cond: ((proname >= 'abcd'::text) AND (proname < 'abce'::text)) Filter: (proname ~ '^abcd(x|(?=\w\w)q)'::text) (3 rows) -- Test for infinite loop in pullback() (CVE-2007-4772) select 'a' ~ '($|^)*'; ?column? ---------- t (1 row) -- These cases expose a bug in the original fix for CVE-2007-4772 select 'a' ~ '(^)+^'; ?column? ---------- t (1 row) select 'a' ~ '$($$)+'; ?column? ---------- t (1 row) -- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix select 'a' ~ '($^)+'; ?column? ---------- f (1 row) select 'a' ~ '(^$)*'; ?column? ---------- t (1 row) select 'aa bb cc' ~ '(^(?!aa))+'; ?column? ---------- f (1 row) select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+'; ?column? ---------- f (1 row) select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+'; ?column? ---------- f (1 row) select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+'; ?column? ---------- f (1 row) select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+'; ?column? ---------- t (1 row) -- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683) select 'a' ~ '((((((a)*)*)*)*)*)*'; ?column? ---------- t (1 row) select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)'; ?column? ---------- t (1 row) -- These cases used to give too-many-states failures select 'x' ~ 'abcd(\m)+xyz'; ?column? ---------- f (1 row) select 'a' ~ '^abcd*(((((^(a c(e?d)a+|)+|)+|)+|)+|a)+|)'; ?column? ---------- f (1 row) select 'x' ~ 'a^(^)bcd*xy(((((($a+|)+|)+|)+$|)+|)+|)^$'; ?column? ---------- f (1 row) select 'x' ~ 'xyz(\Y\Y)+'; ?column? ---------- f (1 row) select 'x' ~ 'x|(?:\M)+'; ?column? ---------- t (1 row) -- This generates O(N) states but O(N^2) arcs, so it causes problems -- if arc count is not constrained select 'x' ~ repeat('x*y*z*', 1000); ERROR: invalid regular expression: regular expression is too complex -- Test backref in combination with non-greedy quantifier -- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0 select 'Programmer' ~ '(\w).*?\1' as t; t --- t (1 row) select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); regexp_matches ---------------- {r,ogr} {m,m} (2 rows) -- Test for proper matching of non-greedy iteration (bug #11478) select regexp_matches('foo/bar/baz', '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); regexp_matches ---------------- {foo,bar,baz} (1 row) -- Test that greediness can be overridden by outer quantifier select regexp_matches('llmmmfff', '^(l*)(.*)(f*)$'); regexp_matches ---------------- {ll,mmmfff,""} (1 row) select regexp_matches('llmmmfff', '^(l*){1,1}(.*)(f*)$'); regexp_matches ---------------- {ll,mmmfff,""} (1 row) select regexp_matches('llmmmfff', '^(l*){1,1}?(.*)(f*)$'); regexp_matches ------------------ {"",llmmmfff,""} (1 row) select regexp_matches('llmmmfff', '^(l*){1,1}?(.*){1,1}?(f*)$'); regexp_matches ---------------- {"",llmmm,fff} (1 row) select regexp_matches('llmmmfff', '^(l*?)(.*)(f*)$'); regexp_matches ------------------ {"",llmmmfff,""} (1 row) select regexp_matches('llmmmfff', '^(l*?){1,1}(.*)(f*)$'); regexp_matches ---------------- {ll,mmmfff,""} (1 row) select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*)(f*)$'); regexp_matches ------------------ {"",llmmmfff,""} (1 row) select regexp_matches('llmmmfff', '^(l*?){1,1}?(.*){1,1}?(f*)$'); regexp_matches ---------------- {"",llmmm,fff} (1 row) -- Test for infinite loop in cfindloop with zero-length possible match -- but no actual match (can only happen in the presence of backrefs) select 'a' ~ '$()|^\1'; ?column? ---------- f (1 row) select 'a' ~ '.. ()|\1'; ?column? ---------- f (1 row) select 'a' ~ '()*\1'; ?column? ---------- t (1 row) select 'a' ~ '()+\1'; ?column? ---------- t (1 row) -- Test incorrect removal of capture groups within {0} select 'xxx' ~ '(.){0}(\1)' as f; f --- f (1 row) select 'xxx' ~ '((.)){0}(\2)' as f; f --- f (1 row) select 'xyz' ~ '((.)){0}(\2){0}' as t; t --- t (1 row) -- Test ancient oversight in when to apply zaptreesubs select 'abcdef' ~ '^(.)\1|\1.' as f; f --- f (1 row) select 'abadef' ~ '^((.)\2|..)\2' as f; f --- f (1 row) -- Add coverage for some cases in checkmatchall select regexp_match('xy', '.|...'); regexp_match -------------- {x} (1 row) select regexp_match('xyz', '.|...'); regexp_match -------------- {xyz} (1 row) select regexp_match('xy', '.*'); regexp_match -------------- {xy} (1 row) select regexp_match('fooba', '(?:..)*'); regexp_match -------------- {foob} (1 row) select regexp_match('xyz', repeat('.', 260)); regexp_match -------------- (1 row) select regexp_match('foo', '(?:.|){99}'); regexp_match -------------- {foo} (1 row) -- Error conditions select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs ERROR: invalid regular expression: invalid backreference number select 'xyz' ~ 'x(\w)(?=(\1))'; ERROR: invalid regular expression: invalid backreference number select 'a' ~ '\x7fffffff'; -- invalid chr code ERROR: invalid regular expression: invalid escape \ sequence