Sorry - I should have gotten to this sooner. Here's a patch which you should

be able to apply against what you just committed. It rolls soundex into
fuzzystrmatch.

Remove soundex/metaphone and merge into fuzzystrmatch.

Joe Conway
This commit is contained in:
Bruce Momjian 2001-08-07 18:16:01 +00:00
parent fb5b85a8f2
commit cdd02cdf00
14 changed files with 167 additions and 689 deletions

View File

@ -57,7 +57,7 @@ fulltextindex -
fuzzystrmatch -
Levenshtein and Metaphone fuzzy string matching
by Joe Conway <joseph.conway@home.com>
by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>
intarray -
Index support for arrays of int4, using GiST
@ -86,7 +86,6 @@ mac -
metaphone -
Improved Soundex function
by Joel Burton <jburton@scw.org>
miscutil -
PostgreSQL assert checking and various utility functions
@ -149,9 +148,6 @@ seg -
Confidence-interval datatype (GiST indexing example)
by Gene Selkov, Jr. <selkovjr@mcs.anl.gov>
soundex -
Soundex function
spi -
Various trigger functions, examples for using SPI.

View File

@ -20,6 +20,11 @@
* Metaphone was originally created by Lawrence Philips and presented in article
* in "Computer Language" December 1990 issue.
*
* soundex()
* -----------
* Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
* to soundex() for consistency.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this
@ -40,12 +45,15 @@
*/
Version 0.1 (3 August, 2001):
Version 0.2 (7 August, 2001):
Functions to calculate the degree to which two strings match in a "fuzzy" way
Tested under Linux (Red Hat 6.2 and 7.0) and PostgreSQL 7.2devel
Release Notes:
Version 0.2
- folded soundex contrib into this one
Version 0.1
- initial release

View File

@ -0,0 +1,62 @@
NOTE: Modified August 07, 2001 by Joe Conway. Updated for accuracy
after combining soundex code into the fuzzystrmatch contrib
---------------------------------------------------------------------
The Soundex system is a method of matching similar sounding names
(or any words) to the same code. It was initially used by the
United States Census in 1880, 1900, and 1910, but it has little use
beyond English names (or the English pronunciation of names), and
it is not a linguistic tool.
The following are some usage examples:
SELECT soundex('hello world!');
CREATE TABLE s (nm text)\g
insert into s values ('john')\g
insert into s values ('joan')\g
insert into s values ('wobbly')\g
select * from s
where soundex(nm) = soundex('john')\g
select nm from s a, s b
where soundex(a.nm) = soundex(b.nm)
and a.oid <> b.oid\g
CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
'select soundex($1) = soundex($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
'select soundex($1) < soundex($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
'select soundex($1) > soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
'select soundex($1) <= soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
'select soundex($1) >= soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
'select soundex($1) <> soundex($2)'
LANGUAGE 'sql';
DROP OPERATOR #= (text,text)\g
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
commutator=text_sx_eq)\g
SELECT *
FROM s
WHERE text_sx_eq(nm,'john')\g
SELECT *
from s
where s.nm #= 'john';

View File

@ -629,3 +629,71 @@ int _metaphone (
return(META_SUCCESS);
} /* END metaphone */
/*
* SQL function: soundex(text) returns text
*/
PG_FUNCTION_INFO_V1(soundex);
Datum
soundex(PG_FUNCTION_ARGS)
{
char outstr[SOUNDEX_LEN + 1];
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
_soundex(arg, outstr);
PG_RETURN_TEXT_P(_textin(outstr));
}
static void
_soundex(const char *instr, char *outstr)
{
int count;
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha((unsigned char) instr[0]) && instr[0])
++instr;
/* No string left */
if (!instr[0])
{
outstr[0] = (char) 0;
return;
}
/* Take the first letter as is */
*outstr++ = (char) toupper((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
{
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = soundex_code(instr[0]);
if (*outstr != '0')
{
++outstr;
++count;
}
}
++instr;
}
/* Fill with 0's */
while (count < SOUNDEX_LEN)
{
*outstr = '0';
++outstr;
++count;
}
}

View File

@ -51,32 +51,43 @@
#include "utils/builtins.h"
#define MAX_LEVENSHTEIN_STRLEN 255
#define MAX_METAPHONE_STRLEN 255
typedef struct dynmatrix
{
int value;
} dynmat;
/*
* External declarations
*/
extern Datum levenshtein(PG_FUNCTION_ARGS);
extern Datum metaphone(PG_FUNCTION_ARGS);
extern Datum soundex(PG_FUNCTION_ARGS);
/*
* Internal declarations
* Soundex
*/
static void _soundex(const char *instr, char *outstr);
#define SOUNDEX_LEN 4
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
/*
* Levenshtein
*/
#define STRLEN(p) strlen(p)
#define CHAREQ(p1, p2) (*(p1) == *(p2))
#define NextChar(p) ((p)++)
#define MAX_LEVENSHTEIN_STRLEN 255
/*
* Metaphone
*/
#define MAX_METAPHONE_STRLEN 255
/*
* Original code by Michael G Schwern starts here.
* Code slightly modified for use as PostgreSQL

View File

@ -3,3 +3,9 @@ CREATE FUNCTION levenshtein (text,text) RETURNS int
CREATE FUNCTION metaphone (text,int) RETURNS text
AS 'MODULE_PATHNAME','metaphone' LANGUAGE 'c' with (iscachable, isstrict);
CREATE FUNCTION soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c' with (iscachable, isstrict);
CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c';

View File

@ -1,40 +0,0 @@
#
# $Header: /cvsroot/pgsql/contrib/metaphone/Attic/Makefile,v 1.2 2001/06/20 00:04:44 momjian Exp $
#
subdir = contrib/metaphone
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
NAME := metaphone
SONAME := $(NAME)$(DLSUFFIX)
override CPPFLAGS += -I$(srcdir)
override CFLAGS += $(CFLAGS_SL)
override DLLLIBS := $(BE_DLLLIBS) $(DLLLIBS)
all: $(SONAME) $(NAME).sql
$(NAME).sql: $(NAME).sql.in
sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
install: all installdirs
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
$(INSTALL_DATA) README.$(NAME) $(docdir)/contrib
installdirs:
$(mkinstalldirs) $(libdir)/contrib $(datadir)/contrib $(docdir)/contrib
uninstall:
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
clean distclean maintainer-clean:
rm -f $(SONAME) $(NAME).o $(NAME).sql
depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend
ifeq (depend,$(wildcard depend))
include depend
endif

View File

@ -1,79 +0,0 @@
This directory contains a module that implements the "Metaphone" code as
a PostgreSQL user-defined function. The Metaphone system is a method of
matching similar sounding names (or any words) to the same code.
Metaphone was invented by Lawrence Philips as an improvement to the popular
name-hashing routine, Soundex.
This metaphone code is from Michael Kuhn, and is detailed at
http://aspell.sourceforge.net/metaphone/metaphone-kuhn.txt
Code for this (including this help file!) was liberally borrowed from
the soundex() module for PostgreSQL.
There are two functions:
metaphone(text) : returns hash of a name
metaphone(text,int) : returns hash (maximum length of int) of name
---
To install it, first configure the main source tree, then run make;
make install in this directory. Finally, load the function definition
with psql:
psql -f PREFIX/share/contrib/metaphone.sql
The following are some usage examples:
SELECT text_metaphone('hello world!');
SELECT text_metaphone('hello world!', 4);
CREATE TABLE s (nm text)\g
insert into s values ('john')\g
insert into s values ('joan')\g
insert into s values ('wobbly')\g
select * from s
where text_metaphone(nm) = text_metaphone('john')\g
select nm from s a, s b
where text_metaphone(a.nm) = text_metaphone(b.nm)
and a.oid <> b.oid\g
CREATE FUNCTION text_mp_eq(text, text) RETURNS bool AS
'select text_metaphone($1) = text_metaphone($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_mp_lt(text,text) RETURNS bool AS
'select text_metaphone($1) < text_metaphone($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_mp_gt(text,text) RETURNS bool AS
'select text_metaphone($1) > text_metaphone($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_mp_le(text,text) RETURNS bool AS
'select text_metaphone($1) <= text_metaphone($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_mp_ge(text,text) RETURNS bool AS
'select text_metaphone($1) >= text_metaphone($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_mp_ne(text,text) RETURNS bool AS
'select text_metaphone($1) <> text_metaphone($2)'
LANGUAGE 'sql';
DROP OPERATOR #= (text,text)\g
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_mp_eq,
commutator=text_mp_eq)\g
SELECT *
FROM s
WHERE text_mp_eq(nm,'pillsbury')\g
SELECT *
from s
where s.nm #= 'pillsbury';

View File

@ -1,321 +0,0 @@
#include "postgres.h"
#include "fmgr.h"
#include "utils/builtins.h"
#include <stdio.h>
#include <string.h>
#include <ctype.h>
Datum text_metaphone(PG_FUNCTION_ARGS);
Datum text_metaphone_length(PG_FUNCTION_ARGS);
void phonetic(char *name, char *metaph, int metalen);
#define METAPHONE_LEN 50
#undef METAPHONE_TEST
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
#define NULLCHAR (char *) 0
char *VOWELS="AEIOU",
*FRONTV="EIY", /* special cases for letters in FRONT of these */
*VARSON="CSPTG", /* variable sound--those modified by adding an "h" */
*DOUBLE="."; /* let these double letters through */
char *excpPAIR="AGKPW", /* exceptions "ae-", "gn-", "kn-", "pn-", "wr-" */
*nextLTR ="ENNNR";
char *chrptr, *chrptr1;
void phonetic(name,metaph,metalen)
char *name, *metaph;
int metalen;
{
int ii, jj, silent, hard, Lng, lastChr;
char curLtr, prevLtr, nextLtr, nextLtr2, nextLtr3;
int vowelAfter, vowelBefore, frontvAfter;
char wname[60];
char *ename=wname;
jj = 0;
for (ii=0; name[ii] != '\0'; ii++) {
if ( isalpha(name[ii]) ) {
ename[jj] = toupper(name[ii]);
jj++;
}
}
ename[jj] = '\0';
if (strlen(ename) == 0) return;
/* if ae, gn, kn, pn, wr then drop the first letter */
if ( (chrptr=strchr(excpPAIR,ename[0]) ) != NULLCHAR ) {
chrptr1 = nextLTR + (chrptr-excpPAIR);
if ( *chrptr1 == ename[1] ) strcpy(ename,&ename[1]);
}
/* change x to s */
if (ename[0] == 'X') ename[0] = 'S';
/* get rid of the "h" in "wh" */
if ( strncmp(ename,"WH",2) == 0 ) strcpy(&ename[1], &ename[2]);
Lng = strlen(ename);
lastChr = Lng -1; /* index to last character in string makes code easier*/
/* Remove an S from the end of the string */
if ( ename[lastChr] == 'S' ) {
ename[lastChr] = '\0';
Lng = strlen(ename);
lastChr = Lng -1;
}
for (ii=0; ( (strlen(metaph) < metalen) && (ii < Lng) ); ii++) {
curLtr = ename[ii];
vowelBefore = FALSE; prevLtr = ' ';
if (ii > 0) {
prevLtr = ename[ii-1];
if ( strchr(VOWELS,prevLtr) != NULLCHAR ) vowelBefore = TRUE;
}
/* if first letter is a vowel KEEP it */
if (ii == 0 && (strchr(VOWELS,curLtr) != NULLCHAR) ) {
strncat(metaph,&curLtr,1);
continue;
}
vowelAfter = FALSE; frontvAfter = FALSE; nextLtr = ' ';
if ( ii < lastChr ) {
nextLtr = ename[ii+1];
if ( strchr(VOWELS,nextLtr) != NULLCHAR ) vowelAfter = TRUE;
if ( strchr(FRONTV,nextLtr) != NULLCHAR ) frontvAfter = TRUE;
}
/* skip double letters except ones in list */
if (curLtr == nextLtr && (strchr(DOUBLE,nextLtr) == NULLCHAR) ) continue;
nextLtr2 = ' ';
if (ii < (lastChr-1) ) nextLtr2 = ename[ii+2];
nextLtr3 = ' ';
if (ii < (lastChr-2) ) nextLtr3 = ename[ii+3];
switch (curLtr) {
case 'B': silent = FALSE;
if (ii == lastChr && prevLtr == 'M') silent = TRUE;
if (! silent) strncat(metaph,&curLtr,1);
break;
/*silent -sci-,-sce-,-scy-; sci-, etc OK*/
case 'C': if (! (ii > 1 && prevLtr == 'S' && frontvAfter) ) {
if ( ii > 0 && nextLtr == 'I' && nextLtr2 == 'A' )
strncat(metaph,"X",1);
else
if (frontvAfter)
strncat(metaph,"S",1);
else
if (ii > 1 && prevLtr == 'S' && nextLtr == 'H')
strncat(metaph,"K",1);
else
if (nextLtr == 'H')
if (ii == 0 && (strchr(VOWELS,nextLtr2) == NULLCHAR) )
strncat(metaph,"K",1);
else
strncat(metaph,"X",1);
else
if (prevLtr == 'C')
strncat(metaph,"C",1);
else
strncat(metaph,"K",1);
}
break;
case 'D': if (nextLtr == 'G' && (strchr(FRONTV,nextLtr2) != NULLCHAR))
strncat(metaph,"J",1);
else
strncat(metaph,"T",1);
break;
case 'G': silent=FALSE;
/* SILENT -gh- except for -gh and no vowel after h */
if ( (ii < (lastChr-1) && nextLtr == 'H')
&& (strchr(VOWELS,nextLtr2) == NULLCHAR) )
silent=TRUE;
if ( (ii == (lastChr-3) )
&& nextLtr == 'N' && nextLtr2 == 'E' && nextLtr3 == 'D')
silent=TRUE;
else
if ( (ii == (lastChr-1)) && nextLtr == 'N') silent=TRUE;
if (prevLtr == 'D' && frontvAfter) silent=TRUE;
if (prevLtr == 'G')
hard=TRUE;
else
hard=FALSE;
if (!silent) {
if (frontvAfter && (! hard) )
strncat(metaph,"J",1);
else
strncat(metaph,"K",1);
}
break;
case 'H': silent = FALSE;
if ( strchr(VARSON,prevLtr) != NULLCHAR ) silent = TRUE;
if ( vowelBefore && !vowelAfter) silent = TRUE;
if (!silent) strncat(metaph,&curLtr,1);
break;
case 'F':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R': strncat(metaph,&curLtr,1);
break;
case 'K': if (prevLtr != 'C') strncat(metaph,&curLtr,1);
break;
case 'P': if (nextLtr == 'H')
strncat(metaph,"F",1);
else
strncat(metaph,"P",1);
break;
case 'Q': strncat(metaph,"K",1);
break;
case 'S': if (ii > 1 && nextLtr == 'I'
&& ( nextLtr2 == 'O' || nextLtr2 == 'A') )
strncat(metaph,"X",1);
else
if (nextLtr == 'H')
strncat(metaph,"X",1);
else
strncat(metaph,"S",1);
break;
case 'T': if (ii > 1 && nextLtr == 'I'
&& ( nextLtr2 == 'O' || nextLtr2 == 'A') )
strncat(metaph,"X",1);
else
if (nextLtr == 'H') /* The=0, Tho=T, Withrow=0 */
if (ii > 0 || (strchr(VOWELS,nextLtr2) != NULLCHAR) )
strncat(metaph,"0",1);
else
strncat(metaph,"T",1);
else
if (! (ii < (lastChr-2) && nextLtr == 'C' && nextLtr2 == 'H'))
strncat(metaph,"T",1);
break;
case 'V': strncat(metaph,"F",1);
break;
case 'W':
case 'Y': if (ii < lastChr && vowelAfter) strncat(metaph,&curLtr,1);
break;
case 'X': strncat(metaph,"KS",2);
break;
case 'Z': strncat(metaph,"S",1);
break;
}
}
/* DON'T DO THIS NOW, REMOVING "S" IN BEGINNING HAS the same effect
with plurals, in addition imbedded S's in the Metaphone are included
Lng = strlen(metaph);
lastChr = Lng -1;
if ( metaph[lastChr] == 'S' && Lng >= 3 ) metaph[lastChr] = '\0';
*/
return;
}
#ifdef METAPHONE_TEST
int
main(int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stderr, "usage: %s string\n", argv[0]);
return 1;
}
else
{
char output[51]="";
phonetic(argv[1], output, 50);
printf("metaphone(%s) = %s\n", argv[1], output);
return 0;
}
}
#endif /* METAPHONE_TEST */
#ifndef METAPHONE_TEST
/*
* SQL function: text_metaphone(text) returns text
*/
PG_FUNCTION_INFO_V1(text_metaphone);
Datum
text_metaphone(PG_FUNCTION_ARGS)
{
char outstr[51]="";
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
phonetic(arg, outstr, 50);
PG_RETURN_TEXT_P(_textin(outstr));
}
/*
char outstr[51]="";
char *arg;
int32 metalen;
arg = _textout(PG_GETARG_TEXT_P(0));
metalen = PG_GETARG_INT32(1);
phonetic(arg, outstr, metalen);
*/
PG_FUNCTION_INFO_V1(text_metaphone_length);
Datum
text_metaphone_length(PG_FUNCTION_ARGS)
{
char outstr[51]="";
char *arg;
int32 metalen;
arg = _textout(PG_GETARG_TEXT_P(0));
metalen = PG_GETARG_INT32(1);
phonetic(arg, outstr, metalen);
PG_RETURN_TEXT_P(_textin(outstr));
}
#endif /* not METAPHONE_TEST */

View File

@ -1,3 +0,0 @@
CREATE FUNCTION text_soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_metaphone' LANGUAGE 'C';

View File

@ -1,40 +0,0 @@
#
# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.10 2001/06/18 21:38:02 momjian Exp $
#
subdir = contrib/soundex
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
NAME := soundex
SONAME := $(NAME)$(DLSUFFIX)
override CPPFLAGS += -I$(srcdir)
override CFLAGS += $(CFLAGS_SL)
override DLLLIBS := $(BE_DLLLIBS) $(DLLLIBS)
all: $(SONAME) $(NAME).sql
$(NAME).sql: $(NAME).sql.in
sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
install: all installdirs
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
$(INSTALL_DATA) README.$(NAME) $(docdir)/contrib
installdirs:
$(mkinstalldirs) $(libdir)/contrib $(datadir)/contrib $(docdir)/contrib
uninstall:
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
clean distclean maintainer-clean:
rm -f $(SONAME) $(NAME).o $(NAME).sql
depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend
ifeq (depend,$(wildcard depend))
include depend
endif

View File

@ -1,66 +0,0 @@
This directory contains a module that implements the "Soundex" code as
a PostgreSQL user-defined function. The Soundex system is a method of
matching similar sounding names (or any words) to the same code. It
was initially used by the United States Census in 1880, 1900, and
1910, but it has little use beyond English names (or the English
pronunciation of names), and it is not a linguistic tool.
To install it, first configure the main source tree, then run make;
make install in this directory. Finally, load the function definition
with psql:
psql -f PREFIX/share/contrib/soundex.sql
The following are some usage examples:
SELECT text_soundex('hello world!');
CREATE TABLE s (nm text)\g
insert into s values ('john')\g
insert into s values ('joan')\g
insert into s values ('wobbly')\g
select * from s
where text_soundex(nm) = text_soundex('john')\g
select nm from s a, s b
where text_soundex(a.nm) = text_soundex(b.nm)
and a.oid <> b.oid\g
CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
'select text_soundex($1) = text_soundex($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
'select text_soundex($1) < text_soundex($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
'select text_soundex($1) > text_soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
'select text_soundex($1) <= text_soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
'select text_soundex($1) >= text_soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
'select text_soundex($1) <> text_soundex($2)'
LANGUAGE 'sql';
DROP OPERATOR #= (text,text)\g
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
commutator=text_sx_eq)\g
SELECT *
FROM s
WHERE text_sx_eq(nm,'john')\g
SELECT *
from s
where s.nm #= 'john';

View File

@ -1,119 +0,0 @@
/* $Header: /cvsroot/pgsql/contrib/soundex/Attic/soundex.c,v 1.11 2001/03/22 03:59:10 momjian Exp $ */
#include "postgres.h"
#include <ctype.h>
#include "fmgr.h"
#include "utils/builtins.h"
Datum text_soundex(PG_FUNCTION_ARGS);
static void soundex(const char *instr, char *outstr);
#define SOUNDEX_LEN 4
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
#ifndef SOUNDEX_TEST
/*
* SQL function: text_soundex(text) returns text
*/
PG_FUNCTION_INFO_V1(text_soundex);
Datum
text_soundex(PG_FUNCTION_ARGS)
{
char outstr[SOUNDEX_LEN + 1];
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
soundex(arg, outstr);
PG_RETURN_TEXT_P(_textin(outstr));
}
#endif /* not SOUNDEX_TEST */
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
static void
soundex(const char *instr, char *outstr)
{
int count;
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha((unsigned char) instr[0]) && instr[0])
++instr;
/* No string left */
if (!instr[0])
{
outstr[0] = (char) 0;
return;
}
/* Take the first letter as is */
*outstr++ = (char) toupper((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
{
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = soundex_code(instr[0]);
if (*outstr != '0')
{
++outstr;
++count;
}
}
++instr;
}
/* Fill with 0's */
while (count < SOUNDEX_LEN)
{
*outstr = '0';
++outstr;
++count;
}
}
#ifdef SOUNDEX_TEST
int
main(int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stderr, "usage: %s string\n", argv[0]);
return 1;
}
else
{
char output[SOUNDEX_LEN + 1];
soundex(argv[1], output);
printf("soundex(%s) = %s\n", argv[1], output);
return 0;
}
}
#endif /* SOUNDEX_TEST */

View File

@ -1,5 +0,0 @@
CREATE FUNCTION text_soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'C';
CREATE FUNCTION soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'C';