mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-02 02:11:16 +02:00
48789c5d23
It's possible to construct regular expressions that contain loops of constraint arcs (that is, ^ $ AHEAD BEHIND or LACON arcs). There's no use in fully traversing such a loop at execution, since you'd just end up in the same NFA state without having consumed any input. Worse, such a loop leads to infinite looping in the pullback/pushfwd stage of compilation, because we keep pushing or pulling the same constraints around the loop in a vain attempt to move them to the pre or post state. Such looping was previously recognized in CVE-2007-4772; but the fix only handled the case of trivial single-state loops (that is, a constraint arc leading back to its source state) ... and not only that, it was incorrect even for that case, because it broke the admittedly-not-very-clearly-stated API contract of the pull() and push() subroutines. The first two regression test cases added by this commit exhibit patterns that result in assertion failures because of that (though there seem to be no ill effects in non-assert builds). The other new test cases exhibit multi-state constraint loops; in an unpatched build they will run until the NFA state-count limit is exceeded. To fix, remove the code added for CVE-2007-4772, and instead create a general-purpose constraint-loop-breaking phase of regex compilation that executes before we do pullback/pushfwd. Since we never need to traverse a constraint loop fully, we can just break the loop at any chosen spot, if we add clone states that can replicate any sequence of arc transitions that would've traversed just part of the loop. Also add some commentary clarifying why we have to have all these machinations in the first place. This class of problems has been known for some time --- we had a report from Marc Mamin about two years ago, for example, and there are related complaints in the Tcl bug tracker. I had discussed a fix of this kind off-list with Henry Spencer, but didn't get around to doing something about it until the issue was rediscovered by Greg Stark recently. Back-patch to all supported branches.
281 lines
5.8 KiB
Plaintext
281 lines
5.8 KiB
Plaintext
--
|
|
-- Regular expression tests
|
|
--
|
|
-- Don't want to have to double backslashes in regexes
|
|
set standard_conforming_strings = on;
|
|
-- Test simple quantified backrefs
|
|
select 'bbbbb' ~ '^([bc])\1*$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'ccc' ~ '^([bc])\1*$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'xxx' ~ '^([bc])\1*$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'bbc' ~ '^([bc])\1*$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'b' ~ '^([bc])\1*$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
-- Test quantified backref within a larger expression
|
|
select 'abc abc abc' ~ '^(\w+)( \1)+$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'abc abd abc' ~ '^(\w+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'abc abc abd' ~ '^(\w+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'abc abc abc' ~ '^(.+)( \1)+$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'abc abd abc' ~ '^(.+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
-- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun
|
|
select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
|
|
substring
|
|
-----------
|
|
foo
|
|
(1 row)
|
|
|
|
select substring('a' from '((a))+');
|
|
substring
|
|
-----------
|
|
a
|
|
(1 row)
|
|
|
|
select substring('a' from '((a)+)');
|
|
substring
|
|
-----------
|
|
a
|
|
(1 row)
|
|
|
|
-- Test conversion of regex patterns to indexable conditions
|
|
explain (costs off) select * from pg_proc where proname ~ 'abc';
|
|
QUERY PLAN
|
|
-----------------------------------
|
|
Seq Scan on pg_proc
|
|
Filter: (proname ~ 'abc'::text)
|
|
(2 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
|
Filter: (proname ~ '^abc'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc$';
|
|
QUERY PLAN
|
|
------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: (proname = 'abc'::name)
|
|
Filter: (proname ~ '^abc$'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
|
Filter: (proname ~ '^abcd*e'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc+d';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
|
Filter: (proname ~ '^abc+d'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abcdef'::name) AND (proname < 'abcdeg'::name))
|
|
Filter: (proname ~ '^(abc)(def)'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
|
|
QUERY PLAN
|
|
------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: (proname = 'abc'::name)
|
|
Filter: (proname ~ '^(abc)$'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
|
|
QUERY PLAN
|
|
----------------------------------------
|
|
Seq Scan on pg_proc
|
|
Filter: (proname ~ '^(abc)?d'::text)
|
|
(2 rows)
|
|
|
|
-- Test for infinite loop in pullback() (CVE-2007-4772)
|
|
select 'a' ~ '($|^)*';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
-- These cases expose a bug in the original fix for CVE-2007-4772
|
|
select 'a' ~ '(^)+^';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
select 'a' ~ '$($$)+';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
-- More cases of infinite loop in pullback(), not fixed by CVE-2007-4772 fix
|
|
select 'a' ~ '($^)+';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'a' ~ '(^$)*';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
select 'aa bb cc' ~ '(^(?!aa))+';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'aa x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'bb x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'cc x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'dd x' ~ '(^(?!aa)(?!bb)(?!cc))+';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
|
|
select 'a' ~ '((((((a)*)*)*)*)*)*';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
-- Test backref in combination with non-greedy quantifier
|
|
-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
|
|
select 'Programmer' ~ '(\w).*?\1' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
|
|
regexp_matches
|
|
----------------
|
|
{r,ogr}
|
|
{m,m}
|
|
(2 rows)
|
|
|
|
-- Test for proper matching of non-greedy iteration (bug #11478)
|
|
select regexp_matches('foo/bar/baz',
|
|
'^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', '');
|
|
regexp_matches
|
|
----------------
|
|
{foo,bar,baz}
|
|
(1 row)
|
|
|
|
-- Test for infinite loop in cfindloop with zero-length possible match
|
|
-- but no actual match (can only happen in the presence of backrefs)
|
|
select 'a' ~ '$()|^\1';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'a' ~ '.. ()|\1';
|
|
?column?
|
|
----------
|
|
f
|
|
(1 row)
|
|
|
|
select 'a' ~ '()*\1';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
select 'a' ~ '()+\1';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|