From 18d99bc264fd3b768117cb1efdc540bc8f619295 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Wed, 4 Oct 2000 19:25:34 +0000 Subject: [PATCH] Update soundex to new fmgr interface and fix algorithm --- contrib/soundex/Makefile | 8 +- contrib/soundex/README.soundex | 15 +++- contrib/soundex/soundex.c | 131 ++++++++++++++++++++------------- contrib/soundex/soundex.sql.in | 5 +- 4 files changed, 100 insertions(+), 59 deletions(-) diff --git a/contrib/soundex/Makefile b/contrib/soundex/Makefile index 616b96c47e..9bfc813f97 100644 --- a/contrib/soundex/Makefile +++ b/contrib/soundex/Makefile @@ -1,10 +1,10 @@ # -# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.7 2000/07/09 13:13:33 petere Exp $ +# $Header: /cvsroot/pgsql/contrib/soundex/Attic/Makefile,v 1.8 2000/10/04 19:25:34 petere Exp $ # subdir = contrib/soundex top_builddir = ../.. -include ../../src/Makefile.global +include $(top_builddir)/src/Makefile.global NAME := soundex SONAME := $(NAME)$(DLSUFFIX) @@ -14,7 +14,7 @@ CFLAGS += -I. $(CFLAGS_SL) all: $(SONAME) $(NAME).sql $(NAME).sql: $(NAME).sql.in - sed -e 's:MODULE_PATHNAME:$(datadir)/contrib/$(SONAME):g' < $< > $@ + sed 's,@MODULE_FILENAME@,$(libdir)/contrib/$(SONAME),g' $< >$@ install: all installdirs $(INSTALL_SHLIB) $(SONAME) $(libdir)/contrib @@ -28,7 +28,7 @@ uninstall: rm -f $(libdir)/contrib/$(SONAME) $(datadir)/contrib/$(NAME).sql $(docdir)/contrib/README.$(NAME) clean distclean maintainer-clean: - rm -f $(SONAME) $(NAME).sql + rm -f $(SONAME) $(NAME).o $(NAME).sql depend dep: $(CC) -MM -MG $(CFLAGS) *.c > depend diff --git a/contrib/soundex/README.soundex b/contrib/soundex/README.soundex index e3ba4ee231..a5b1fb3c44 100644 --- a/contrib/soundex/README.soundex +++ b/contrib/soundex/README.soundex @@ -1,3 +1,17 @@ +This directory contains a module that implements the "Soundex" code as +a PostgreSQL user-defined function. The Soundex system is a method of +matching similar sounding names (or any words) to the same code. It +was initially used by the United States Census in 1880, 1900, and +1910, but it has little use beyond English names (or the English +pronunciation of names), and it is not a linguistic tool. + +To install it, first configure the main source tree, then run make; +make install in this directory. Finally, load the function definition +with psql: + + psql -f PREFIX/share/contrib/soundex.sql + +The following are some usage examples: SELECT text_soundex('hello world!'); @@ -50,4 +64,3 @@ WHERE text_sx_eq(nm,'john')\g SELECT * from s where s.nm #= 'john'; - diff --git a/contrib/soundex/soundex.c b/contrib/soundex/soundex.c index 8fe73054ca..f2acba4036 100644 --- a/contrib/soundex/soundex.c +++ b/contrib/soundex/soundex.c @@ -1,79 +1,79 @@ -/*****************************************************************************/ -/* soundex.c */ -/*****************************************************************************/ - +/* $Header: /cvsroot/pgsql/contrib/soundex/Attic/soundex.c,v 1.7 2000/10/04 19:25:34 petere Exp $ */ +#include "postgres.h" +#include "fmgr.h" +#include "utils/builtins.h" #include #include #include -#include "postgres.h" /* for char16, etc. */ -#include "utils/palloc.h" /* for palloc */ +Datum +text_soundex(PG_FUNCTION_ARGS); -/* prototypes for soundex functions */ -text *text_soundex(text *t); -char *soundex(char *instr, char *outstr); +static void +soundex(const char *instr, char *outstr); -text * -text_soundex(text *t) +#define SOUNDEX_LEN 4 + + +#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str)) +#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str))) + + +#ifndef SOUNDEX_TEST +/* + * SQL function: text_soundex(text) returns text + */ +Datum +text_soundex(PG_FUNCTION_ARGS) { - text *new_t; + char outstr[SOUNDEX_LEN + 1]; + char *arg; - char outstr[6 + 1]; /* max length of soundex is 6 */ - char *instr; + arg = _textout(PG_GETARG_TEXT_P(0)); - /* make a null-terminated string */ - instr = palloc(VARSIZE(t) + 1); - memcpy(instr, VARDATA(t), VARSIZE(t) - VARHDRSZ); - instr[VARSIZE(t) - VARHDRSZ] = (char) 0; + soundex(arg, outstr); - /* load soundex into outstr */ - soundex(instr, outstr); - - /* Now the outstr contains the soundex of instr */ - /* copy outstr to new_t */ - new_t = (text *) palloc(strlen(outstr) + VARHDRSZ); - memset(new_t, 0, strlen(outstr) + 1); - VARSIZE(new_t) = strlen(outstr) + VARHDRSZ; - memcpy((void *) VARDATA(new_t), - (void *) outstr, - strlen(outstr)); - - /* free instr */ - pfree(instr); - - return (new_t); + PG_RETURN_TEXT_P(_textin(outstr)); } +#endif /* not SOUNDEX_TEST */ -char * -soundex(char *instr, char *outstr) + +/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ +static const char *soundex_table = "01230120022455012623010202"; +#define soundex_code(letter) soundex_table[toupper(letter) - 'A'] + + +static void +soundex(const char *instr, char *outstr) { - /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ - char *table = "01230120022455012623010202"; - int count = 0; + int count; + AssertArg(instr); + AssertArg(outstr); + + outstr[SOUNDEX_LEN] = '\0'; + + /* Skip leading non-alphabetic characters */ while (!isalpha(instr[0]) && instr[0]) ++instr; + /* No string left */ if (!instr[0]) - { /* Hey! Where'd the string go? */ - outstr[0] = (char) 0; - return outstr; - } - - if (toupper(instr[0]) == 'P' && toupper(instr[1]) == 'H') { - instr[0] = 'F'; - instr[1] = 'A'; + outstr[0] = (char) 0; + return; } + /* Take the first letter as is */ *outstr++ = (char) toupper(*instr++); - while (*instr && count < 5) + count = 1; + while (*instr && count < SOUNDEX_LEN) { - if (isalpha(*instr) && *instr != *(instr - 1)) + if (isalpha(*instr) && soundex_code(*instr) != soundex_code(*(instr - 1))) { - *outstr = table[toupper(instr[0]) - 'A']; + *outstr = soundex_code(instr[0]); if (*outstr != '0') { ++outstr; @@ -83,6 +83,33 @@ soundex(char *instr, char *outstr) ++instr; } - *outstr = '\0'; - return (outstr); + /* Fill with 0's */ + while (count < SOUNDEX_LEN) + { + *outstr = '0'; + ++outstr; + ++count; + } } + + + +#ifdef SOUNDEX_TEST +int +main (int argc, char *argv[]) +{ + if (argc < 2) + { + fprintf(stderr, "usage: %s string\n", argv[0]); + return 1; + } + else + { + char output[SOUNDEX_LEN + 1]; + + soundex(argv[1], output); + printf("soundex(%s) = %s\n", argv[1], output); + return 0; + } +} +#endif /* SOUNDEX_TEST */ diff --git a/contrib/soundex/soundex.sql.in b/contrib/soundex/soundex.sql.in index d8e66cd02a..4cb0ad0e4d 100644 --- a/contrib/soundex/soundex.sql.in +++ b/contrib/soundex/soundex.sql.in @@ -1,4 +1,5 @@ - CREATE FUNCTION text_soundex(text) RETURNS text - AS 'MODULE_PATHNAME' LANGUAGE 'c'; + AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC'; +CREATE FUNCTION soundex(text) RETURNS text + AS '@MODULE_FILENAME@', 'text_soundex' LANGUAGE 'newC';