Update soundex to new fmgr interface and fix algorithm

This commit is contained in:
Peter Eisentraut 2000-10-04 19:25:34 +00:00
parent baa3a09b5f
commit 18d99bc264
4 changed files with 100 additions and 59 deletions

View File

@ -1,10 +1,10 @@
#
# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.7 2000/07/09 13:13:33 petere Exp $
# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.8 2000/10/04 19:25:34 petere Exp $
#
subdir = contrib/soundex
top_builddir = ../..
include ../../src/Makefile.global
include $(top_builddir)/src/Makefile.global
NAME := soundex
SONAME := $(NAME)$(DLSUFFIX)
@ -14,7 +14,7 @@ CFLAGS += -I. $(CFLAGS_SL)
all: $(SONAME) $(NAME).sql
$(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(datadir)/contrib/$(SONAME):g' < $< > $@
sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@
install: all installdirs
$(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib
@ -28,7 +28,7 @@ uninstall:
rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME)
clean distclean maintainer-clean:
rm -f $(SONAME) $(NAME).sql
rm -f $(SONAME) $(NAME).o $(NAME).sql
depend dep:
$(CC) -MM -MG $(CFLAGS) *.c > depend

View File

@ -1,3 +1,17 @@
This directory contains a module that implements the "Soundex" code as
a PostgreSQL user-defined function. The Soundex system is a method of
matching similar sounding names (or any words) to the same code. It
was initially used by the United States Census in 1880, 1900, and
1910, but it has little use beyond English names (or the English
pronunciation of names), and it is not a linguistic tool.
To install it, first configure the main source tree, then run make;
make install in this directory. Finally, load the function definition
with psql:
psql -f PREFIX/share/contrib/soundex.sql
The following are some usage examples:
SELECT text_soundex('hello world!');
@ -50,4 +64,3 @@ WHERE text_sx_eq(nm,'john')\g
SELECT *
from s
where s.nm #= 'john';

View File

@ -1,79 +1,79 @@
/*****************************************************************************/
/* soundex.c */
/*****************************************************************************/
/* $Header: /cvsroot/pgsql/contrib/soundex/Attic/soundex.c,v 1.7 2000/10/04 19:25:34 petere Exp $ */
#include "postgres.h"
#include "fmgr.h"
#include "utils/builtins.h"
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include "postgres.h" /* for char16, etc. */
#include "utils/palloc.h" /* for palloc */
Datum
text_soundex(PG_FUNCTION_ARGS);
/* prototypes for soundex functions */
text *text_soundex(text *t);
char *soundex(char *instr, char *outstr);
static void
soundex(const char *instr, char *outstr);
text *
text_soundex(text *t)
#define SOUNDEX_LEN 4
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
#ifndef SOUNDEX_TEST
/*
* SQL function: text_soundex(text) returns text
*/
Datum
text_soundex(PG_FUNCTION_ARGS)
{
text *new_t;
char outstr[SOUNDEX_LEN + 1];
char *arg;
char outstr[6 + 1]; /* max length of soundex is 6 */
char *instr;
arg = _textout(PG_GETARG_TEXT_P(0));
/* make a null-terminated string */
instr = palloc(VARSIZE(t) + 1);
memcpy(instr, VARDATA(t), VARSIZE(t) - VARHDRSZ);
instr[VARSIZE(t) - VARHDRSZ] = (char) 0;
soundex(arg, outstr);
/* load soundex into outstr */
soundex(instr, outstr);
/* Now the outstr contains the soundex of instr */
/* copy outstr to new_t */
new_t = (text *) palloc(strlen(outstr) + VARHDRSZ);
memset(new_t, 0, strlen(outstr) + 1);
VARSIZE(new_t) = strlen(outstr) + VARHDRSZ;
memcpy((void *) VARDATA(new_t),
(void *) outstr,
strlen(outstr));
/* free instr */
pfree(instr);
return (new_t);
PG_RETURN_TEXT_P(_textin(outstr));
}
#endif /* not SOUNDEX_TEST */
char *
soundex(char *instr, char *outstr)
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
#define soundex_code(letter) soundex_table[toupper(letter) - 'A']
static void
soundex(const char *instr, char *outstr)
{
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
char *table = "01230120022455012623010202";
int count = 0;
int count;
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha(instr[0]) && instr[0])
++instr;
/* No string left */
if (!instr[0])
{ /* Hey! Where'd the string go? */
outstr[0] = (char) 0;
return outstr;
}
if (toupper(instr[0]) == 'P' && toupper(instr[1]) == 'H')
{
instr[0] = 'F';
instr[1] = 'A';
outstr[0] = (char) 0;
return;
}
/* Take the first letter as is */
*outstr++ = (char) toupper(*instr++);
while (*instr && count < 5)
count = 1;
while (*instr && count < SOUNDEX_LEN)
{
if (isalpha(*instr) && *instr != *(instr - 1))
if (isalpha(*instr) && soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = table[toupper(instr[0]) - 'A'];
*outstr = soundex_code(instr[0]);
if (*outstr != '0')
{
++outstr;
@ -83,6 +83,33 @@ soundex(char *instr, char *outstr)
++instr;
}
*outstr = '\0';
return (outstr);
/* Fill with 0's */
while (count < SOUNDEX_LEN)
{
*outstr = '0';
++outstr;
++count;
}
}
#ifdef SOUNDEX_TEST
int
main (int argc, char *argv[])
{
if (argc < 2)
{
fprintf(stderr, "usage: %s string\n", argv[0]);
return 1;
}
else
{
char output[SOUNDEX_LEN + 1];
soundex(argv[1], output);
printf("soundex(%s) = %s\n", argv[1], output);
return 0;
}
}
#endif /* SOUNDEX_TEST */

View File

@ -1,4 +1,5 @@
CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE 'c';
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';
CREATE FUNCTION soundex(text) RETURNS text
AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';