From e15d53e7a419dcff49e108c78c8f70be6b18598b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 2 Jan 2010 20:59:16 +0000 Subject: [PATCH] Fix similar_escape() to convert parentheses to non-capturing style. This is needed to avoid unwanted interference with SUBSTRING behavior, as per bug #5257 from Roman Kononov. Also, add some basic intelligence about character classes (bracket expressions) since we now have several behaviors that aren't appropriate inside a character class. As with the previous patch in this area, I'm reluctant to back-patch since it might affect applications that are relying on the prior behavior. --- src/backend/utils/adt/regexp.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index ca61d5637f..cbffcdb183 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.85 2010/01/02 16:57:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.86 2010/01/02 20:59:16 tgl Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -640,6 +640,7 @@ similar_escape(PG_FUNCTION_ARGS) int plen, elen; bool afterescape = false; + bool incharclass = false; int nquotes = 0; /* This function is not strict, so must test explicitly */ @@ -682,10 +683,10 @@ similar_escape(PG_FUNCTION_ARGS) */ /* - * We need room for the prefix/postfix plus as many as 2 output bytes per - * input byte + * We need room for the prefix/postfix plus as many as 3 output bytes per + * input byte; since the input is at most 1GB this can't overflow */ - result = (text *) palloc(VARHDRSZ + 6 + 2 * plen); + result = (text *) palloc(VARHDRSZ + 6 + 3 * plen); r = VARDATA(result); *r++ = '^'; @@ -699,7 +700,7 @@ similar_escape(PG_FUNCTION_ARGS) if (afterescape) { - if (pchar == '"') /* for SUBSTRING patterns */ + if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */ *r++ = ((nquotes++ % 2) == 0) ? '(' : ')'; else { @@ -713,6 +714,19 @@ similar_escape(PG_FUNCTION_ARGS) /* SQL99 escape character; do not send to output */ afterescape = true; } + else if (incharclass) + { + if (pchar == '\\') + *r++ = '\\'; + *r++ = pchar; + if (pchar == ']') + incharclass = false; + } + else if (pchar == '[') + { + *r++ = pchar; + incharclass = true; + } else if (pchar == '%') { *r++ = '.'; @@ -720,6 +734,13 @@ similar_escape(PG_FUNCTION_ARGS) } else if (pchar == '_') *r++ = '.'; + else if (pchar == '(') + { + /* convert to non-capturing parenthesis */ + *r++ = '('; + *r++ = '?'; + *r++ = ':'; + } else if (pchar == '\\' || pchar == '.' || pchar == '^' || pchar == '$') {