Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.

Extraction of trigrams did not process LIKE escape sequences properly,
leading to possible misidentification of trigrams near escapes, resulting
in incorrect index search results.

Fujii Masao
This commit is contained in:
Tom Lane 2012-08-20 13:24:52 -04:00
parent 51fed14d73
commit b2a01b9ad1
3 changed files with 39 additions and 19 deletions

View File

@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
abcdef
(1 row)
select * from test2 where t like E'%\\bcd%';
t
--------
abcdef
(1 row)
select * from test2 where t ilike '%BCD%';
t
--------
@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
abcdef
(1 row)
select * from test2 where t like E'%\\bcd%';
t
--------
abcdef
(1 row)
select * from test2 where t ilike '%BCD%';
t
--------

View File

@ -49,6 +49,7 @@ explain (costs off)
select * from test2 where t ilike '%BCD%';
select * from test2 where t like '%BCD%';
select * from test2 where t like '%bcd%';
select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%';
drop index test2_idx_gin;
@ -60,5 +61,6 @@ explain (costs off)
select * from test2 where t ilike '%BCD%';
select * from test2 where t like '%BCD%';
select * from test2 where t like '%bcd%';
select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%';

View File

@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
const char *beginword = str;
const char *endword;
char *s = buf;
bool in_wildcard_meta = false;
bool in_leading_wildcard_meta = false;
bool in_trailing_wildcard_meta = false;
bool in_escape = false;
int clen;
/*
* Find the first word character remembering whether last character was
* wildcard meta-character.
* Find the first word character, remembering whether preceding character
* was wildcard meta-character. Note that the in_escape state persists
* from this loop to the next one, since we may exit at a word character
* that is in_escape.
*/
while (beginword - str < lenstr)
{
if (in_escape)
{
in_escape = false;
in_wildcard_meta = false;
if (iswordchr(beginword))
break;
in_escape = false;
in_leading_wildcard_meta = false;
}
else
{
if (ISESCAPECHAR(beginword))
in_escape = true;
else if (ISWILDCARDCHAR(beginword))
in_wildcard_meta = true;
in_leading_wildcard_meta = true;
else if (iswordchr(beginword))
break;
else
in_wildcard_meta = false;
in_leading_wildcard_meta = false;
}
beginword += pg_mblen(beginword);
}
@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
return NULL;
/*
* Add left padding spaces if last character wasn't wildcard
* Add left padding spaces if preceding character wasn't wildcard
* meta-character.
*/
*charlen = 0;
if (!in_wildcard_meta)
if (!in_leading_wildcard_meta)
{
if (LPADDING > 0)
{
@ -333,15 +336,11 @@ get_wildcard_part(const char *str, int lenstr,
* string boundary. Strip escapes during copy.
*/
endword = beginword;
in_wildcard_meta = false;
in_escape = false;
while (endword - str < lenstr)
{
clen = pg_mblen(endword);
if (in_escape)
{
in_escape = false;
in_wildcard_meta = false;
if (iswordchr(endword))
{
memcpy(s, endword, clen);
@ -349,7 +348,17 @@ get_wildcard_part(const char *str, int lenstr,
s += clen;
}
else
{
/*
* Back up endword to the escape character when stopping at
* an escaped char, so that subsequent get_wildcard_part will
* restart from the escape character. We assume here that
* escape chars are single-byte.
*/
endword--;
break;
}
in_escape = false;
}
else
{
@ -357,7 +366,7 @@ get_wildcard_part(const char *str, int lenstr,
in_escape = true;
else if (ISWILDCARDCHAR(endword))
{
in_wildcard_meta = true;
in_trailing_wildcard_meta = true;
break;
}
else if (iswordchr(endword))
@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
s += clen;
}
else
{
in_wildcard_meta = false;
break;
}
}
endword += clen;
}
/*
* Add right padding spaces if last character wasn't wildcard
* Add right padding spaces if next character isn't wildcard
* meta-character.
*/
if (!in_wildcard_meta)
if (!in_trailing_wildcard_meta)
{
if (RPADDING > 0)
{