From 7b1f6ffaabc8f538c82b94a5fc664941f4ff0b70 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Tue, 24 Jun 2003 22:59:46 +0000 Subject: [PATCH] Jim C. Nasby wrote: > Second argument to metaphone is suposed to set the limit on the > number of characters to return, but it breaks on some phrases: > > usps=# select metaphone(a,3),metaphone(a,4),metaphone(a,20) from > (select 'Hello world'::varchar AS a) a; > HLW | HLWR | HLWRLT > > usps=# select metaphone(a,3),metaphone(a,4),metaphone(a,20) from > (select 'A A COMEAUX MEMORIAL'::varchar AS a) a; > AKM | AKMKS | AKMKSMMRL > > In every case I've found that does this, the 4th and 5th letters are > always 'KS'. Nice catch. There was a bug in the original metaphone algorithm from CPAN. Patch attached (while I was at it I updated my email address, changed the copyright to PGDG, and removed an unnecessary palloc). Here's how it looks now: regression=# select metaphone(a,4) from (select 'A A COMEAUX MEMORIAL'::varchar AS a) a; metaphone ----------- AKMK (1 row) regression=# select metaphone(a,5) from (select 'A A COMEAUX MEMORIAL'::varchar AS a) a; metaphone ----------- AKMKS (1 row) Joe Conway --- contrib/fuzzystrmatch/README.fuzzystrmatch | 5 ++++- contrib/fuzzystrmatch/fuzzystrmatch.c | 11 ++++++----- contrib/fuzzystrmatch/fuzzystrmatch.h | 5 ++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/contrib/fuzzystrmatch/README.fuzzystrmatch b/contrib/fuzzystrmatch/README.fuzzystrmatch index 8d310b4ade..9cd80f81b7 100644 --- a/contrib/fuzzystrmatch/README.fuzzystrmatch +++ b/contrib/fuzzystrmatch/README.fuzzystrmatch @@ -3,7 +3,10 @@ * * Functions for "fuzzy" comparison of strings * - * Copyright (c) Joseph Conway , 2001; + * Joe Conway + * + * Copyright (c) 2001, 2002, 2003 by PostgreSQL Global Development Group + * ALL RIGHTS RESERVED; * * levenshtein() * ------------- diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c index 0358fb2b66..67e70cfc7e 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.c +++ b/contrib/fuzzystrmatch/fuzzystrmatch.c @@ -3,7 +3,10 @@ * * Functions for "fuzzy" comparison of strings * - * Copyright (c) Joseph Conway , 2001; + * Joe Conway + * + * Copyright (c) 2001, 2002, 2003 by PostgreSQL Global Development Group + * ALL RIGHTS RESERVED; * * levenshtein() * ------------- @@ -221,9 +224,6 @@ metaphone(PG_FUNCTION_ARGS) if (!(reqlen > 0)) elog(ERROR, "metaphone: Requested Metaphone output length must be > 0"); - metaph = palloc(reqlen); - memset(metaph, '\0', reqlen); - retval = _metaphone(str_i, reqlen, &metaph); if (retval == META_SUCCESS) { @@ -629,7 +629,8 @@ _metaphone( /* KS */ case 'X': Phonize('K'); - Phonize('S'); + if (max_phonemes == 0 || Phone_Len < max_phonemes) + Phonize('S'); break; /* Y if followed by a vowel */ case 'Y': diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.h b/contrib/fuzzystrmatch/fuzzystrmatch.h index c8dbddeb07..079e520b8e 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.h +++ b/contrib/fuzzystrmatch/fuzzystrmatch.h @@ -3,7 +3,10 @@ * * Functions for "fuzzy" comparison of strings * - * Copyright (c) Joseph Conway , 2001; + * Joe Conway + * + * Copyright (c) 2001, 2002, 2003 by PostgreSQL Global Development Group + * ALL RIGHTS RESERVED; * * levenshtein() * -------------