Fix LIKE's special-case code for % followed by _. I'm not entirely sure that

this case is worth a special code path, but a special code path that gets
the boundary condition wrong is definitely no good.  Per bug #4821 from
Andrew Gierth.

In passing, clean up some minor code formatting issues (excess parentheses
and blank lines in odd places).

Back-patch to 8.3, where the bug was introduced.
This commit is contained in:
Tom Lane 2009-05-24 18:10:38 +00:00
parent c6c458e24d
commit fc2660fc25
3 changed files with 95 additions and 61 deletions

View File

@ -9,7 +9,7 @@
* (UTF8 is a special case because we can use a much more efficient version
* of NextChar than can be used for general multi-byte encodings.)
*
* Before the inclusion, we need to define following macros:
* Before the inclusion, we need to define the following macros:
*
* NextChar
* MatchText - to name of function wanted
@ -19,47 +19,46 @@
* Copyright (c) 1996-2009, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.24 2009/01/01 17:23:49 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.25 2009/05/24 18:10:37 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
** Rich $alz is now <rsalz@bbn.com>.
** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
**
** This code was shamelessly stolen from the "pql" code by myself and
** slightly modified :)
**
** All references to the word "star" were replaced by "percent"
** All references to the word "wild" were replaced by "like"
**
** All the nice shell RE matching stuff was replaced by just "_" and "%"
**
** As I don't have a copy of the SQL standard handy I wasn't sure whether
** to leave in the '\' escape character handling.
**
** Keith Parks. <keith@mtcc.demon.co.uk>
**
** SQL92 lets you specify the escape character by saying
** LIKE <pattern> ESCAPE <escape character>. We are a small operation
** so we force you to use '\'. - ay 7/95
**
** Now we have the like_escape() function that converts patterns with
** any specified escape character (or none at all) to the internal
** default escape character, which is still '\'. - tgl 9/2000
**
** The code is rewritten to avoid requiring null-terminated strings,
** which in turn allows us to leave out some memcpy() operations.
** This code should be faster and take less memory, but no promises...
** - thomas 2000-08-06
**
*/
* Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
* Rich $alz is now <rsalz@bbn.com>.
* Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
*
* This code was shamelessly stolen from the "pql" code by myself and
* slightly modified :)
*
* All references to the word "star" were replaced by "percent"
* All references to the word "wild" were replaced by "like"
*
* All the nice shell RE matching stuff was replaced by just "_" and "%"
*
* As I don't have a copy of the SQL standard handy I wasn't sure whether
* to leave in the '\' escape character handling.
*
* Keith Parks. <keith@mtcc.demon.co.uk>
*
* SQL92 lets you specify the escape character by saying
* LIKE <pattern> ESCAPE <escape character>. We are a small operation
* so we force you to use '\'. - ay 7/95
*
* Now we have the like_escape() function that converts patterns with
* any specified escape character (or none at all) to the internal
* default escape character, which is still '\'. - tgl 9/2000
*
* The code is rewritten to avoid requiring null-terminated strings,
* which in turn allows us to leave out some memcpy() operations.
* This code should be faster and take less memory, but no promises...
* - thomas 2000-08-06
*/
/*--------------------
* Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
* Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
*
* LIKE_TRUE: they match
* LIKE_FALSE: they don't match
@ -80,19 +79,18 @@ static int
MatchText(char *t, int tlen, char *p, int plen)
{
/* Fast path for match-everything pattern */
if ((plen == 1) && (*p == '%'))
if (plen == 1 && *p == '%')
return LIKE_TRUE;
/*
* In this loop, we advance by char when matching wildcards (and thus on
* recursive entry to this function we are properly char-synced). On other
* occasions it is safe to advance by byte, as the text and pattern will
* be in lockstep. This allows us to perform all comparisons between the
* be in lockstep. This allows us to perform all comparisons between the
* text and pattern on a byte by byte basis, even for multi-byte
* encodings.
*/
while ((tlen > 0) && (plen > 0))
while (tlen > 0 && plen > 0)
{
if (*p == '\\')
{
@ -116,7 +114,7 @@ MatchText(char *t, int tlen, char *p, int plen)
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while ((plen > 0) && (*p == '%'))
while (plen > 0 && *p == '%')
NextByte(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0)
@ -127,22 +125,24 @@ MatchText(char *t, int tlen, char *p, int plen)
* rest of the pattern.
*/
if (*p == '_')
{
/* %_ is the same as _% - avoid matching _ repeatedly */
NextChar(t, tlen);
NextByte(p, plen);
if (tlen <= 0)
do
{
return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
}
else if (plen <= 0)
{
return LIKE_FALSE;
}
NextChar(t, tlen);
NextByte(p, plen);
} while (tlen > 0 && plen > 0 && *p == '_');
/*
* If we are at the end of the pattern, succeed: % followed
* by n _'s matches any string of at least n characters, and
* we have now found there are at least n characters.
*/
if (plen <= 0)
return LIKE_TRUE;
/* Look for a place that matches the rest of the pattern */
while (tlen > 0)
{
int matched = MatchText(t, tlen, p, plen);
@ -155,7 +155,6 @@ MatchText(char *t, int tlen, char *p, int plen)
}
else
{
char firstpat = TCHAR(*p);
if (*p == '\\')
@ -180,7 +179,6 @@ MatchText(char *t, int tlen, char *p, int plen)
}
NextChar(t, tlen);
}
}
@ -192,20 +190,20 @@ MatchText(char *t, int tlen, char *p, int plen)
}
else if (*p == '_')
{
/* _ matches any single character, and we know there is one */
NextChar(t, tlen);
NextByte(p, plen);
continue;
}
else if (TCHAR(*t) != TCHAR(*p))
else if (TCHAR(*p) != TCHAR(*t))
{
/*
* Not the single-character wildcard and no explicit match? Then
* time to quit...
*/
/* non-wildcard pattern char fails to match text char */
return LIKE_FALSE;
}
/*
* Pattern and text match, so advance.
*
* It is safe to use NextByte instead of NextChar here, even for
* multi-byte character sets, because we are not following immediately
* after a wildcard character. If we are in the middle of a multibyte
@ -222,9 +220,8 @@ MatchText(char *t, int tlen, char *p, int plen)
if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */
/* End of input string. Do we have matching pattern remaining? */
while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of
* pattern */
/* End of text string. Do we have matching pattern remaining? */
while (plen > 0 && *p == '%') /* allow multiple %'s at end of pattern */
NextByte(p, plen);
if (plen <= 0)
@ -354,5 +351,4 @@ do_like_escape(text *pat, text *esc)
#ifdef MATCH_LOWER
#undef MATCH_LOWER
#endif

View File

@ -902,6 +902,33 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
f
(1 row)
--
-- test %/_ combination cases, cf bug #4821
--
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
t | t | f
---+---+---
t | t | f
(1 row)
SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
t | t | f
---+---+---
t | t | f
(1 row)
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
t | t | f
---+---+---
t | t | f
(1 row)
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
t | t | f
---+---+---
t | t | f
(1 row)
--
-- test implicit type conversion
--

View File

@ -279,6 +279,17 @@ SELECT 'hawkeye' NOT ILIKE 'H%Eye' AS "false";
SELECT 'Hawkeye' ILIKE 'h%' AS "true";
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
--
-- test %/_ combination cases, cf bug #4821
--
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
--
-- test implicit type conversion
--