Ensure Soundex difference() function handles empty input sanely.

fuzzystrmatch's difference() function assumes that _soundex()
always initializes its output buffer fully.  This was not so for
the case of a string containing no alphabetic characters, resulting
in unstable output and Valgrind complaints.

Fix by using memset() to fill the whole buffer in the early-exit
case.  Also make some cosmetic improvements (I didn't care for the
random switches between "instr[0]" and "*instr" notation).

Report and diagnosis by Alexander Lakhin (bug #17935).
Back-patch to all supported branches.

Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org
This commit is contained in:
Tom Lane 2023-05-16 10:53:42 -04:00
parent f96e531b1c
commit 8084bf9a49
3 changed files with 15 additions and 7 deletions

View File

@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
A500 | M626 | 0
(1 row)
SELECT soundex(''), difference('', '');
soundex | difference
---------+------------
| 4
(1 row)
SELECT levenshtein('GUMBO', 'GAMBOL');
levenshtein
-------------

View File

@ -729,16 +729,14 @@ _soundex(const char *instr, char *outstr)
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha((unsigned char) instr[0]) && instr[0])
while (*instr && !isalpha((unsigned char) *instr))
++instr;
/* No string left */
if (!instr[0])
/* If no string left, return all-zeroes buffer */
if (!*instr)
{
outstr[0] = (char) 0;
memset(outstr, '\0', SOUNDEX_LEN + 1);
return;
}
@ -751,7 +749,7 @@ _soundex(const char *instr, char *outstr)
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = soundex_code(instr[0]);
*outstr = soundex_code(*instr);
if (*outstr != '0')
{
++outstr;
@ -768,6 +766,9 @@ _soundex(const char *instr, char *outstr)
++outstr;
++count;
}
/* And null-terminate */
*outstr = '\0';
}
PG_FUNCTION_INFO_V1(difference);

View File

@ -6,6 +6,7 @@ SELECT soundex('hello world!');
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
SELECT soundex(''), difference('', '');
SELECT levenshtein('GUMBO', 'GAMBOL');