Add GIN support for pg_trgm. From Guillaume Smet <guillaume.smet@gmail.com>

with minor editorization by me.
This commit is contained in:
Teodor Sigaev 2007-03-14 14:15:40 +00:00
parent 547e41cdf8
commit 15f91f2789
8 changed files with 1256 additions and 5 deletions

View File

@ -1,7 +1,7 @@
# $PostgreSQL: pgsql/contrib/pg_trgm/Makefile,v 1.6 2007/02/09 17:24:33 petere Exp $
# $PostgreSQL: pgsql/contrib/pg_trgm/Makefile,v 1.7 2007/03/14 14:15:40 teodor Exp $
MODULE_big = pg_trgm
OBJS = trgm_op.o trgm_gist.o
OBJS = trgm_op.o trgm_gist.o trgm_gin.o
DATA_built = pg_trgm.sql
DATA = uninstall_pg_trgm.sql

View File

@ -113,6 +113,8 @@ Tsearch2 Integration
Next, create a trigram index on the word column:
CREATE INDEX words_idx ON words USING gist(word gist_trgm_ops);
or
CREATE INDEX words_idx ON words USING gin(word gist_trgm_ops);
Now, a SELECT query similar to the example above can be used to
suggest spellings for misspelled words in user search terms. A

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,7 @@ CREATE OPERATOR % (
JOIN = contjoinsel
);
--gist key
-- gist key
CREATE FUNCTION gtrgm_in(cstring)
RETURNS gtrgm
AS 'MODULE_PATHNAME'
@ -53,7 +53,7 @@ CREATE TYPE gtrgm (
OUTPUT = gtrgm_out
);
-- support functions
-- support functions for gist
CREATE FUNCTION gtrgm_consistent(gtrgm,internal,int4)
RETURNS bool
AS 'MODULE_PATHNAME'
@ -89,7 +89,7 @@ RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C;
-- create the operator class
-- create the operator class for gist
CREATE OPERATOR CLASS gist_trgm_ops
FOR TYPE text USING gist
AS
@ -103,5 +103,31 @@ AS
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
STORAGE gtrgm;
-- support functions for gin
CREATE FUNCTION gin_extract_trgm(text, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION gin_extract_trgm(text, internal, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION gin_trgm_consistent(internal, internal, text)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C;
-- create the operator class for gin
CREATE OPERATOR CLASS gin_trgm_ops
FOR TYPE text USING gin
AS
OPERATOR 1 % (text, text) RECHECK,
FUNCTION 1 btint4cmp (int4, int4),
FUNCTION 2 gin_extract_trgm (text, internal),
FUNCTION 3 gin_extract_trgm (text, internal, internal),
FUNCTION 4 gin_trgm_consistent (internal, internal, text),
STORAGE int4;
COMMIT;

View File

@ -28,3 +28,11 @@ select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu098
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
drop index trgm_idx;
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
set enable_seqscan=off;
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;

View File

@ -28,6 +28,7 @@ typedef char trgm[3];
*(((char*)(a))+2) = *(((char*)(b))+2); \
} while(0);
#define TRGMINT(a) ( (*(((char*)(a))+2)<<16)+(*(((char*)(a))+1)<<8)+*(((char*)(a))+0) )
typedef struct
{

View File

@ -0,0 +1,77 @@
#include "trgm.h"
#include "access/gin.h"
#include "access/itup.h"
#include "access/tuptoaster.h"
#include "storage/bufpage.h"
#include "utils/array.h"
#include "utils/builtins.h"
PG_FUNCTION_INFO_V1(gin_extract_trgm);
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
Datum gin_trgm_consistent(PG_FUNCTION_ARGS);
Datum
gin_extract_trgm(PG_FUNCTION_ARGS)
{
text *val = (text *) PG_GETARG_TEXT_P(0);
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
TRGM *trg;
int4 trglen;
*nentries = 0;
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
trglen = ARRNELEM(trg);
if (trglen > 0)
{
trgm *ptr;
int4 i = 0,
item;
*nentries = (int32) trglen;
entries = (Datum *) palloc(sizeof(Datum) * trglen);
ptr = GETARR(trg);
while (ptr - GETARR(trg) < ARRNELEM(trg))
{
item = TRGMINT(ptr);
entries[i++] = Int32GetDatum(item);
ptr++;
}
}
PG_RETURN_POINTER(entries);
}
Datum
gin_trgm_consistent(PG_FUNCTION_ARGS)
{
bool *check = (bool *) PG_GETARG_POINTER(0);
text *query = (text *) PG_GETARG_TEXT_P(2);
bool res = FALSE;
TRGM *trg;
int4 i,
trglen,
ntrue = 0;
trg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
trglen = ARRNELEM(trg);
for (i = 0; i < trglen; i++)
if (check[i])
ntrue ++;
#ifdef DIVUNION
res = (trglen == ntrue) ? true : ((((((float4) ntrue) / ((float4) (trglen - ntrue)))) >= trgm_limit) ? true : false);
#else
res = (trglen == 0) ? false : ((((((float4) ntrue) / ((float4) trglen))) >= trgm_limit) ? true : false);
#endif
PG_RETURN_BOOL(res);
}

View File

@ -20,6 +20,14 @@ DROP FUNCTION gtrgm_consistent(gtrgm,internal,int4);
DROP TYPE gtrgm CASCADE;
DROP OPERATOR CLASS gin_trgm_ops USING gin;
DROP FUNCTION gin_extract_trgm(text, internal);
DROP FUNCTION gin_extract_trgm(text, internal, internal);
DROP FUNCTION gin_trgm_consistent(internal, internal, text);
DROP OPERATOR % (text, text);
DROP FUNCTION similarity_op(text,text);