Jim C. Nasby wrote:
> Second argument to metaphone is suposed to set the limit on the > number of characters to return, but it breaks on some phrases: > > usps=# select metaphone(a,3),metaphone(a,4),metaphone(a,20) from > (select 'Hello world'::varchar AS a) a; > HLW | HLWR | HLWRLT > > usps=# select metaphone(a,3),metaphone(a,4),metaphone(a,20) from > (select 'A A COMEAUX MEMORIAL'::varchar AS a) a; > AKM | AKMKS | AKMKSMMRL > > In every case I've found that does this, the 4th and 5th letters are > always 'KS'. Nice catch. There was a bug in the original metaphone algorithm from CPAN. Patch attached (while I was at it I updated my email address, changed the copyright to PGDG, and removed an unnecessary palloc). Here's how it looks now: regression=# select metaphone(a,4) from (select 'A A COMEAUX MEMORIAL'::varchar AS a) a; metaphone ----------- AKMK (1 row) regression=# select metaphone(a,5) from (select 'A A COMEAUX MEMORIAL'::varchar AS a) a; metaphone ----------- AKMKS (1 row) Joe Conway
This commit is contained in:
parent
4b1fe23153
commit
7b1f6ffaab
|
@ -3,7 +3,10 @@
|
||||||
*
|
*
|
||||||
* Functions for "fuzzy" comparison of strings
|
* Functions for "fuzzy" comparison of strings
|
||||||
*
|
*
|
||||||
* Copyright (c) Joseph Conway <joseph.conway@home.com>, 2001;
|
* Joe Conway <mail@joeconway.com>
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2002, 2003 by PostgreSQL Global Development Group
|
||||||
|
* ALL RIGHTS RESERVED;
|
||||||
*
|
*
|
||||||
* levenshtein()
|
* levenshtein()
|
||||||
* -------------
|
* -------------
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
*
|
*
|
||||||
* Functions for "fuzzy" comparison of strings
|
* Functions for "fuzzy" comparison of strings
|
||||||
*
|
*
|
||||||
* Copyright (c) Joseph Conway <joseph.conway@home.com>, 2001;
|
* Joe Conway <mail@joeconway.com>
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2002, 2003 by PostgreSQL Global Development Group
|
||||||
|
* ALL RIGHTS RESERVED;
|
||||||
*
|
*
|
||||||
* levenshtein()
|
* levenshtein()
|
||||||
* -------------
|
* -------------
|
||||||
|
@ -221,9 +224,6 @@ metaphone(PG_FUNCTION_ARGS)
|
||||||
if (!(reqlen > 0))
|
if (!(reqlen > 0))
|
||||||
elog(ERROR, "metaphone: Requested Metaphone output length must be > 0");
|
elog(ERROR, "metaphone: Requested Metaphone output length must be > 0");
|
||||||
|
|
||||||
metaph = palloc(reqlen);
|
|
||||||
memset(metaph, '\0', reqlen);
|
|
||||||
|
|
||||||
retval = _metaphone(str_i, reqlen, &metaph);
|
retval = _metaphone(str_i, reqlen, &metaph);
|
||||||
if (retval == META_SUCCESS)
|
if (retval == META_SUCCESS)
|
||||||
{
|
{
|
||||||
|
@ -629,6 +629,7 @@ _metaphone(
|
||||||
/* KS */
|
/* KS */
|
||||||
case 'X':
|
case 'X':
|
||||||
Phonize('K');
|
Phonize('K');
|
||||||
|
if (max_phonemes == 0 || Phone_Len < max_phonemes)
|
||||||
Phonize('S');
|
Phonize('S');
|
||||||
break;
|
break;
|
||||||
/* Y if followed by a vowel */
|
/* Y if followed by a vowel */
|
||||||
|
|
|
@ -3,7 +3,10 @@
|
||||||
*
|
*
|
||||||
* Functions for "fuzzy" comparison of strings
|
* Functions for "fuzzy" comparison of strings
|
||||||
*
|
*
|
||||||
* Copyright (c) Joseph Conway <joseph.conway@home.com>, 2001;
|
* Joe Conway <mail@joeconway.com>
|
||||||
|
*
|
||||||
|
* Copyright (c) 2001, 2002, 2003 by PostgreSQL Global Development Group
|
||||||
|
* ALL RIGHTS RESERVED;
|
||||||
*
|
*
|
||||||
* levenshtein()
|
* levenshtein()
|
||||||
* -------------
|
* -------------
|
||||||
|
|
Loading…
Reference in New Issue