Fix regex match failures for backrefs combined with non-greedy quantifiers.

An ancient logic error in cfindloop() could cause the regex engine to fail
to find matches that begin later than the start of the string.  This
function is only used when the regex pattern contains a back reference,
and so far as we can tell the error is only reachable if the pattern is
non-greedy (i.e. its first quantifier uses the ? modifier).  Furthermore,
the actual match must begin after some potential match that satisfies the
DFA but then fails the back-reference's match test.

Reported and fixed by Jeevan Chalke, with cosmetic adjustments by me.
This commit is contained in:
Tom Lane 2013-07-18 21:22:37 -04:00
parent 4cbe3ac3e8
commit e2bd904955
3 changed files with 31 additions and 9 deletions

View File

@ -487,19 +487,21 @@ cfindloop(struct vars * v,
*coldp = cold;
return er;
}
if ((shorter) ? end == estop : end == begin)
{
/* no point in trying again */
*coldp = cold;
return REG_NOMATCH;
}
/* go around and try again */
/* try next shorter/longer match with same begin point */
if (shorter)
{
if (end == estop)
break; /* NOTE BREAK OUT */
estart = end + 1;
}
else
{
if (end == begin)
break; /* NOTE BREAK OUT */
estop = end - 1;
}
}
}
} /* end loop over endpoint positions */
} /* end loop over beginning positions */
} while (close < v->stop);
*coldp = cold;

View File

@ -173,3 +173,18 @@ select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
t
(1 row)
-- Test backref in combination with non-greedy quantifier
-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
select 'Programmer' ~ '(\w).*?\1' as t;
t
---
t
(1 row)
select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');
regexp_matches
----------------
{r,ogr}
{m,m}
(2 rows)

View File

@ -41,3 +41,8 @@ select 'a' ~ '($|^)*';
-- Test for infinite loop in fixempties() (Tcl bugs 3604074, 3606683)
select 'a' ~ '((((((a)*)*)*)*)*)*';
select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)';
-- Test backref in combination with non-greedy quantifier
-- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0
select 'Programmer' ~ '(\w).*?\1' as t;
select regexp_matches('Programmer', '(\w)(.*?\1)', 'g');