mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-30 13:51:23 +02:00
3694b4d7e1
When the number of allowed iterations is limited (either a "?" quantifier
or a bound expression), the last sub-match has to reach to the end of the
target string. The previous coding here first tried the shortest possible
match (one character, usually) and then gave up and back-tracked if that
didn't work, typically leading to failure to match overall, as shown in
bug #11478 from Christoph Berg. The minimum change to fix that would be to
not decrement k before "goto backtrack"; but that would be a pretty stupid
solution, because we'd laboriously try each possible sub-match length
before finally discovering that only ending at the end can work. Instead,
force the sub-match endpoint limit up to the end for even the first
shortest() call if we cannot have any more sub-matches after this one.
Bug introduced in my rewrite that added the iterdissect logic, commit
173e29aa5d
. The shortest-first search code
was too closely modeled on the longest-first code, which hasn't got this
issue since it tries a match reaching to the end to start with anyway.
Back-patch to all affected branches.
199 lines
4.7 KiB
Plaintext
199 lines
4.7 KiB
Plaintext
--
|
|
-- Regular expression tests
|
|
--
|
|
-- Don't want to have to double backslashes in regexes
|
|
set standard_conforming_strings = on;
|
|
-- Test simple quantified backrefs
|
|
select 'bbbbb' ~ '^([bc])\1*$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'ccc' ~ '^([bc])\1*$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'xxx' ~ '^([bc])\1*$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'bbc' ~ '^([bc])\1*$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'b' ~ '^([bc])\1*$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
-- Test quantified backref within a larger expression
|
|
select 'abc abc abc' ~ '^(\w+)( \1)+$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'abc abd abc' ~ '^(\w+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'abc abc abd' ~ '^(\w+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'abc abc abc' ~ '^(.+)( \1)+$' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select 'abc abd abc' ~ '^(.+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
|
|
f
|
|
---
|
|
f
|
|
(1 row)
|
|
|
|
-- Test some cases that crashed in 9.2beta1 due to pmatch[] array overrun
|
|
select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
|
|
substring
|
|
-----------
|
|
foo
|
|
(1 row)
|
|
|
|
select substring('a' from '((a))+');
|
|
substring
|
|
-----------
|
|
a
|
|
(1 row)
|
|
|
|
select substring('a' from '((a)+)');
|
|
substring
|
|
-----------
|
|
a
|
|
(1 row)
|
|
|
|
-- Test conversion of regex patterns to indexable conditions
|
|
explain (costs off) select * from pg_proc where proname ~ 'abc';
|
|
QUERY PLAN
|
|
-----------------------------------
|
|
Seq Scan on pg_proc
|
|
Filter: (proname ~ 'abc'::text)
|
|
(2 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
|
Filter: (proname ~ '^abc'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc$';
|
|
QUERY PLAN
|
|
------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: (proname = 'abc'::name)
|
|
Filter: (proname ~ '^abc$'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
|
Filter: (proname ~ '^abcd*e'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^abc+d';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
|
Filter: (proname ~ '^abc+d'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
|
|
QUERY PLAN
|
|
----------------------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: ((proname >= 'abcdef'::name) AND (proname < 'abcdeg'::name))
|
|
Filter: (proname ~ '^(abc)(def)'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
|
|
QUERY PLAN
|
|
------------------------------------------------------------
|
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
|
Index Cond: (proname = 'abc'::name)
|
|
Filter: (proname ~ '^(abc)$'::text)
|
|
(3 rows)
|
|
|
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
|
|
QUERY PLAN
|
|
----------------------------------------
|
|
Seq Scan on pg_proc
|
|
Filter: (proname ~ '^(abc)?d'::text)
|
|
(2 rows)
|
|
|
|
-- Test for infinite loop in pullback() (CVE-2007-4772)
|
|
select 'a' ~ '($|^)*';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
|
|
select 'a' ~ '((((((a)*)*)*)*)*)*';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
|
|
?column?
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
-- Test backref in combination with non-greedy quantifier
|
|
-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
|
|
select 'Programmer' ~ '(\w).*?\1' as t;
|
|
t
|
|
---
|
|
t
|
|
(1 row)
|
|
|
|
select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
|
|
regexp_matches
|
|
----------------
|
|
{r,ogr}
|
|
{m,m}
|
|
(2 rows)
|
|
|
|
-- Test for proper matching of non-greedy iteration (bug #11478)
|
|
select regexp_matches('foo/bar/baz',
|
|
'^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', '');
|
|
regexp_matches
|
|
----------------
|
|
{foo,bar,baz}
|
|
(1 row)
|
|
|