Fix similar_escape() to convert parentheses to non-capturing style.

This is needed to avoid unwanted interference with SUBSTRING behavior,
as per bug #5257 from Roman Kononov.  Also, add some basic intelligence
about character classes (bracket expressions) since we now have several
behaviors that aren't appropriate inside a character class.

As with the previous patch in this area, I'm reluctant to back-patch
since it might affect applications that are relying on the prior
behavior.
This commit is contained in:
Tom Lane 2010-01-02 20:59:16 +00:00
parent 2b59274c09
commit e15d53e7a4
1 changed files with 26 additions and 5 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.85 2010/01/02 16:57:55 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.86 2010/01/02 20:59:16 tgl Exp $
* *
* Alistair Crooks added the code for the regex caching * Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance * agc - cached the regular expressions used - there's a good chance
@ -640,6 +640,7 @@ similar_escape(PG_FUNCTION_ARGS)
int plen, int plen,
elen; elen;
bool afterescape = false; bool afterescape = false;
bool incharclass = false;
int nquotes = 0; int nquotes = 0;
/* This function is not strict, so must test explicitly */ /* This function is not strict, so must test explicitly */
@ -682,10 +683,10 @@ similar_escape(PG_FUNCTION_ARGS)
*/ */
/* /*
* We need room for the prefix/postfix plus as many as 2 output bytes per * We need room for the prefix/postfix plus as many as 3 output bytes per
* input byte * input byte; since the input is at most 1GB this can't overflow
*/ */
result = (text *) palloc(VARHDRSZ + 6 + 2 * plen); result = (text *) palloc(VARHDRSZ + 6 + 3 * plen);
r = VARDATA(result); r = VARDATA(result);
*r++ = '^'; *r++ = '^';
@ -699,7 +700,7 @@ similar_escape(PG_FUNCTION_ARGS)
if (afterescape) if (afterescape)
{ {
if (pchar == '"') /* for SUBSTRING patterns */ if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
*r++ = ((nquotes++ % 2) == 0) ? '(' : ')'; *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
else else
{ {
@ -713,6 +714,19 @@ similar_escape(PG_FUNCTION_ARGS)
/* SQL99 escape character; do not send to output */ /* SQL99 escape character; do not send to output */
afterescape = true; afterescape = true;
} }
else if (incharclass)
{
if (pchar == '\\')
*r++ = '\\';
*r++ = pchar;
if (pchar == ']')
incharclass = false;
}
else if (pchar == '[')
{
*r++ = pchar;
incharclass = true;
}
else if (pchar == '%') else if (pchar == '%')
{ {
*r++ = '.'; *r++ = '.';
@ -720,6 +734,13 @@ similar_escape(PG_FUNCTION_ARGS)
} }
else if (pchar == '_') else if (pchar == '_')
*r++ = '.'; *r++ = '.';
else if (pchar == '(')
{
/* convert to non-capturing parenthesis */
*r++ = '(';
*r++ = '?';
*r++ = ':';
}
else if (pchar == '\\' || pchar == '.' || else if (pchar == '\\' || pchar == '.' ||
pchar == '^' || pchar == '$') pchar == '^' || pchar == '$')
{ {