diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile index 8203b6669c..4901b611ee 100644 --- a/contrib/tsearch2/Makefile +++ b/contrib/tsearch2/Makefile @@ -1,11 +1,12 @@ -# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.10 2005/09/27 17:13:11 tgl Exp $ +# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.11 2005/11/08 17:08:46 teodor Exp $ MODULE_big = tsearch2 OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ dict_snowball.o dict_ispell.o dict_syn.o \ wparser.o wparser_def.o \ - ts_cfg.o tsvector.o rewrite.o crc32.o query.o gistidx.o \ - tsvector_op.o rank.o ts_stat.o + ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \ + tsvector_op.o rank.o ts_stat.o \ + query_util.o query_support.o query_rewrite.o query_gist.o SUBDIRS := snowball ispell wordparser SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o) diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out index a7ac240ef9..81c456d05c 100644 --- a/contrib/tsearch2/expected/tsearch2.out +++ b/contrib/tsearch2/expected/tsearch2.out @@ -16,6 +16,9 @@ psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell psql:tsearch2.sql:543: NOTICE: type "gtsvector" is not yet defined DETAIL: Creating a shell type definition. psql:tsearch2.sql:548: NOTICE: argument type gtsvector is only a shell +psql:tsearch2.sql:997: NOTICE: type "gtsq" is not yet defined +DETAIL: Creating a shell type definition. +psql:tsearch2.sql:1002: NOTICE: argument type gtsq is only a shell --tsvector SELECT '1'::tsvector; tsvector @@ -342,6 +345,286 @@ SELECT '''the wether'':dc & '' sKies '':BC & a:d b:a'; 'the wether':dc & ' sKies ':BC & a:d b:a (1 row) +select 'a' < 'b & c'::tsquery; + ?column? +---------- + t +(1 row) + +select 'a' > 'b & c'::tsquery; + ?column? +---------- + f +(1 row) + +select 'a | f' < 'b & c'::tsquery; + ?column? +---------- + t +(1 row) + +select 'a | ff' < 'b & c'::tsquery; + ?column? +---------- + f +(1 row) + +select 'a | f | g' < 'b & c'::tsquery; + ?column? +---------- + f +(1 row) + +select numnode( 'new'::tsquery ); + numnode +--------- + 1 +(1 row) + +select numnode( 'new & york'::tsquery ); + numnode +--------- + 3 +(1 row) + +select numnode( 'new & york | qwery'::tsquery ); + numnode +--------- + 5 +(1 row) + +create table test_tsquery (txtkeyword text, txtsample text); +\set ECHO none +alter table test_tsquery add column keyword tsquery; +update test_tsquery set keyword = to_tsquery('default', txtkeyword); +alter table test_tsquery add column sample tsquery; +update test_tsquery set sample = to_tsquery('default', txtsample::text); +create unique index bt_tsq on test_tsquery (keyword); +select count(*) from test_tsquery where keyword < 'new & york'; + count +------- + 1 +(1 row) + +select count(*) from test_tsquery where keyword <= 'new & york'; + count +------- + 2 +(1 row) + +select count(*) from test_tsquery where keyword = 'new & york'; + count +------- + 1 +(1 row) + +select count(*) from test_tsquery where keyword >= 'new & york'; + count +------- + 3 +(1 row) + +select count(*) from test_tsquery where keyword > 'new & york'; + count +------- + 2 +(1 row) + +set enable_seqscan=off; +select count(*) from test_tsquery where keyword < 'new & york'; + count +------- + 1 +(1 row) + +select count(*) from test_tsquery where keyword <= 'new & york'; + count +------- + 2 +(1 row) + +select count(*) from test_tsquery where keyword = 'new & york'; + count +------- + 1 +(1 row) + +select count(*) from test_tsquery where keyword >= 'new & york'; + count +------- + 3 +(1 row) + +select count(*) from test_tsquery where keyword > 'new & york'; + count +------- + 2 +(1 row) + +set enable_seqscan=on; +select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city'); + rewrite +---------------------------------------------------------------------------------- + 'qq' & 'foo' & 'bar' & ( 'city' & 'york' & 'new' | ( 'nyc' | 'apple' & 'big' ) ) +(1 row) + +select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); + rewrite +--------------------- + 'moskva' | 'moscow' +(1 row) + +select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text ); + rewrite +----------------------------------- + ( 'moskva' | 'moscow' ) & 'hotel' +(1 row) + +select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text ); + rewrite +------------------------------------------------------------------------------------- + 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) +(1 row) + +select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery; + rewrite +--------------------- + 'moskva' | 'moscow' +(1 row) + +select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery; + rewrite +----------------------------------- + ( 'moskva' | 'moscow' ) & 'hotel' +(1 row) + +select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery; + rewrite +------------------------------------------------------------------------------------- + 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) +(1 row) + +select keyword from test_tsquery where keyword @ 'new'; + keyword +---------------- + 'new' & 'york' +(1 row) + +select keyword from test_tsquery where keyword @ 'moscow'; + keyword +---------- + 'moscow' +(1 row) + +select keyword from test_tsquery where keyword ~ 'new'; + keyword +--------- +(0 rows) + +select keyword from test_tsquery where keyword ~ 'moscow'; + keyword +---------- + 'moscow' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query; + rewrite +--------------------- + 'moskva' | 'moscow' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query; + rewrite +----------------------------------- + ( 'moskva' | 'moscow' ) & 'hotel' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query; + rewrite +------------------------------------------------------------------------------------- + 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword; + rewrite +--------------------- + 'moskva' | 'moscow' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword; + rewrite +----------------------------------- + ( 'moskva' | 'moscow' ) & 'hotel' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword; + rewrite +------------------------------------------------------------------------------------- + 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) +(1 row) + +create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); +set enable_seqscan='off'; +select keyword from test_tsquery where keyword @ 'new'; + keyword +---------------- + 'new' & 'york' +(1 row) + +select keyword from test_tsquery where keyword @ 'moscow'; + keyword +---------- + 'moscow' +(1 row) + +select keyword from test_tsquery where keyword ~ 'new'; + keyword +--------- +(0 rows) + +select keyword from test_tsquery where keyword ~ 'moscow'; + keyword +---------- + 'moscow' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query; + rewrite +--------------------- + 'moskva' | 'moscow' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query; + rewrite +----------------------------------- + ( 'moskva' | 'moscow' ) & 'hotel' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query; + rewrite +------------------------------------------------------------------------------------- + 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword; + rewrite +--------------------- + 'moskva' | 'moscow' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword; + rewrite +----------------------------------- + ( 'moskva' | 'moscow' ) & 'hotel' +(1 row) + +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword; + rewrite +------------------------------------------------------------------------------------- + 'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) ) +(1 row) + +set enable_seqscan='on'; select lexize('simple', 'ASD56 hsdkf'); lexize ----------------- diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c index a260d45935..e6f1ae3a89 100644 --- a/contrib/tsearch2/query.c +++ b/contrib/tsearch2/query.c @@ -23,7 +23,7 @@ #include "tsvector.h" #include "crc32.h" #include "query.h" -#include "rewrite.h" +#include "query_cleanup.h" #include "common.h" diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h index 3ac757a162..3a84afc9c0 100644 --- a/contrib/tsearch2/query.h +++ b/contrib/tsearch2/query.h @@ -17,7 +17,7 @@ typedef struct ITEM int4 val; /* user-friendly value, must correlate with WordEntry */ uint32 - unused:1, + istrue:1, /* use for ranking in Cover */ length:11, distance:20; } ITEM; diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/query_cleanup.c similarity index 97% rename from contrib/tsearch2/rewrite.c rename to contrib/tsearch2/query_cleanup.c index 2e9b39f18d..f56f7fbea3 100644 --- a/contrib/tsearch2/rewrite.c +++ b/contrib/tsearch2/query_cleanup.c @@ -7,14 +7,10 @@ #include -#include "access/gist.h" -#include "access/itup.h" -#include "storage/bufpage.h" -#include "utils/array.h" #include "utils/builtins.h" #include "query.h" -#include "rewrite.h" +#include "query_cleanup.h" typedef struct NODE { diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/query_cleanup.h similarity index 100% rename from contrib/tsearch2/rewrite.h rename to contrib/tsearch2/query_cleanup.h diff --git a/contrib/tsearch2/query_gist.c b/contrib/tsearch2/query_gist.c new file mode 100644 index 0000000000..630d3e4e00 --- /dev/null +++ b/contrib/tsearch2/query_gist.c @@ -0,0 +1,324 @@ +#include "postgres.h" + +#include "storage/bufpage.h" +#include "access/skey.h" +#include "access/gist.h" + +#include "query.h" + +typedef uint64 TPQTGist; + +#define GETENTRY(vec,pos) ((TPQTGist *) DatumGetPointer((vec)->vector[(pos)].key)) + +PG_FUNCTION_INFO_V1(tsq_mcontains); +Datum tsq_mcontains(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsq_mcontained); +Datum tsq_mcontained(PG_FUNCTION_ARGS); + +static TPQTGist +makesign(QUERYTYPE* a) { + int i; + ITEM *ptr = GETQUERY(a); + TPQTGist sign = 0; + + for (i = 0; i < a->size; i++) { + if ( ptr->type == VAL ) + sign |= 1 << (ptr->val % 64); + ptr++; + } + + return sign; +} + +Datum +tsq_mcontains(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + TPQTGist sq, se; + int i,j; + ITEM *iq, *ie; + + if ( query->size < ex->size ) { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( false ); + } + + sq = makesign(query); + se = makesign(ex); + + if ( (sq&se)!=se ) { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( false ); + } + + ie = GETQUERY(ex); + + for(i=0;isize;i++) { + iq = GETQUERY(query); + if ( ie[i].type != VAL ) + continue; + for(j=0;jsize;j++) + if ( iq[j].type == VAL && ie[i].val == iq[j].val ) { + j = query->size+1; + break; + } + if ( j == query->size ) { + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( false ); + } + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + + PG_RETURN_BOOL( true ); +} + +Datum +tsq_mcontained(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM( + DirectFunctionCall2( + tsq_mcontains, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0) + ) + ); +} + +PG_FUNCTION_INFO_V1(gtsq_in); +Datum gtsq_in(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_out); +Datum gtsq_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_compress); +Datum gtsq_compress(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_decompress); +Datum gtsq_decompress(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_consistent); +Datum gtsq_consistent(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_union); +Datum gtsq_union(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_same); +Datum gtsq_same(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_penalty); +Datum gtsq_penalty(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsq_picksplit); +Datum gtsq_picksplit(PG_FUNCTION_ARGS); + + +Datum +gtsq_in(PG_FUNCTION_ARGS) { + elog(ERROR, "Not implemented"); + PG_RETURN_DATUM(0); +} + +Datum +gtsq_out(PG_FUNCTION_ARGS) { + elog(ERROR, "Not implemented"); + PG_RETURN_DATUM(0); +} + +Datum +gtsq_compress(PG_FUNCTION_ARGS) { + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval = entry; + + if (entry->leafkey) { + TPQTGist *sign = (TPQTGist*)palloc( sizeof(TPQTGist) ); + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + *sign = makesign( (QUERYTYPE*)DatumGetPointer(PG_DETOAST_DATUM(entry->key)) ); + + gistentryinit(*retval, PointerGetDatum(sign), + entry->rel, entry->page, + entry->offset, sizeof(TPQTGist), FALSE); + } + + PG_RETURN_POINTER(retval); +} + +Datum +gtsq_decompress(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM(PG_GETARG_DATUM(0)); +} + +Datum +gtsq_consistent(PG_FUNCTION_ARGS) { + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + TPQTGist *key = (TPQTGist*) DatumGetPointer(entry->key); + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + TPQTGist sq = makesign(query); + + if ( GIST_LEAF(entry) ) + PG_RETURN_BOOL( ( (*key) & sq ) == ((strategy==1) ? sq : *key) ); + else + PG_RETURN_BOOL( (*key) & sq ); +} + +Datum +gtsq_union(PG_FUNCTION_ARGS) { + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + TPQTGist *sign = (TPQTGist*)palloc( sizeof(TPQTGist) ); + int i; + int *size = (int *) PG_GETARG_POINTER(1); + + memset( sign, 0, sizeof(TPQTGist) ); + + for (i = 0; i < entryvec->n;i++) + *sign |= *GETENTRY(entryvec, i); + + *size = sizeof(TPQTGist); + + PG_RETURN_POINTER(sign); +} + +Datum +gtsq_same(PG_FUNCTION_ARGS) { + TPQTGist *a = (TPQTGist *) PG_GETARG_POINTER(0); + TPQTGist *b = (TPQTGist *) PG_GETARG_POINTER(1); + + PG_RETURN_POINTER( *a == *b ); +} + +static int +sizebitvec(TPQTGist sign) { + int size=0,i; + + for(i=0;i<64;i++) + size += 0x01 & (sign>>i); +} + +static int +hemdist(TPQTGist a, TPQTGist b) { + TPQTGist res = a ^ b; + + return sizebitvec(res); +} + +Datum +gtsq_penalty(PG_FUNCTION_ARGS) { + TPQTGist *origval = (TPQTGist*) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key); + TPQTGist *newval = (TPQTGist*) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key); + float *penalty = (float *) PG_GETARG_POINTER(2); + + *penalty = hemdist(*origval, *newval); + + PG_RETURN_POINTER(penalty); +} + + +typedef struct { + OffsetNumber pos; + int4 cost; +} SPLITCOST; + +static int +comparecost(const void *a, const void *b) { + if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) + return 0; + else + return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) + +Datum +gtsq_picksplit(PG_FUNCTION_ARGS) { + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + OffsetNumber maxoff = entryvec->n - 2; + OffsetNumber k,j; + + TPQTGist *datum_l, *datum_r; + int4 size_alpha, size_beta; + int4 size_waste, waste = -1; + int4 nbytes; + OffsetNumber seed_1 = 0, seed_2 = 0; + OffsetNumber *left, *right; + + SPLITCOST *costvector; + + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + left = v->spl_left = (OffsetNumber *) palloc(nbytes); + right = v->spl_right = (OffsetNumber *) palloc(nbytes); + v->spl_nleft = v->spl_nright = 0; + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { + size_waste = hemdist( *GETENTRY(entryvec,j), *GETENTRY(entryvec,k) ); + if (size_waste > waste) { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + + + if (seed_1 == 0 || seed_2 == 0) { + seed_1 = 1; + seed_2 = 2; + } + + datum_l = (TPQTGist*)palloc( sizeof(TPQTGist) ); + *datum_l=*GETENTRY(entryvec,seed_1); + datum_r = (TPQTGist*)palloc( sizeof(TPQTGist) ); + *datum_r=*GETENTRY(entryvec,seed_2); + + + maxoff = OffsetNumberNext(maxoff); + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { + costvector[j - 1].pos = j; + size_alpha = hemdist( *GETENTRY(entryvec,seed_1), *GETENTRY(entryvec,j) ); + size_beta = hemdist( *GETENTRY(entryvec,seed_2), *GETENTRY(entryvec,j) ); + costvector[j - 1].cost = abs(size_alpha - size_beta); + } + qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) { + j = costvector[k].pos; + if ( j == seed_1 ) { + *left++ = j; + v->spl_nleft++; + continue; + } else if ( j == seed_2 ) { + *right++ = j; + v->spl_nright++; + continue; + } + size_alpha = hemdist( *datum_l, *GETENTRY(entryvec,j) ); + size_beta = hemdist( *datum_r, *GETENTRY(entryvec,j) ); + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) { + *datum_l |= *GETENTRY(entryvec,j); + *left++ = j; + v->spl_nleft++; + } else { + *datum_r |= *GETENTRY(entryvec,j); + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} + + diff --git a/contrib/tsearch2/query_rewrite.c b/contrib/tsearch2/query_rewrite.c new file mode 100644 index 0000000000..c462097bce --- /dev/null +++ b/contrib/tsearch2/query_rewrite.c @@ -0,0 +1,466 @@ +#include "postgres.h" +#include "executor/spi.h" + +#include "query_util.h" + +MemoryContext AggregateContext = NULL; + +static int +addone(int * counters, int last, int total) { + counters[last]++; + if ( counters[last]>=total ) { + if (last==0) + return 0; + if ( addone( counters, last-1, total-1 ) == 0 ) + return 0; + counters[last] = counters[last-1]+1; + } + return 1; +} + +static QTNode * +findeq(QTNode *node, QTNode *ex, MemoryType memtype, QTNode *subs, bool *isfind) { + + if ( (node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val ) + return node; + + if ( node->flags & QTN_NOCHANGE ) + return node; + + if ( node->valnode->type==OPR ) { + if ( node->nchild == ex->nchild ) { + if ( QTNEq( node, ex ) ) { + QTNFree( node ); + if ( subs ) { + node = QTNCopy( subs, memtype ); + node->flags |= QTN_NOCHANGE; + } else + node = NULL; + *isfind = true; + } + } else if ( node->nchild > ex->nchild ) { + int *counters = (int*)palloc( sizeof(int) * node->nchild ); + int i; + QTNode *tnode = (QTNode*)MEMALLOC( memtype, sizeof(QTNode) ); + + memset(tnode, 0, sizeof(QTNode)); + tnode->child = (QTNode**)MEMALLOC( memtype, sizeof(QTNode*) * ex->nchild ); + tnode->nchild = ex->nchild; + tnode->valnode = (ITEM*)MEMALLOC( memtype, sizeof(ITEM) ); + *(tnode->valnode) = *(ex->valnode); + + for(i=0;inchild;i++) + counters[i]=i; + + do { + tnode->sign=0; + for(i=0;inchild;i++) { + tnode->child[i] = node->child[ counters[i] ]; + tnode->sign |= tnode->child[i]->sign; + } + + if ( QTNEq( tnode, ex ) ) { + int j=0; + + MEMFREE( memtype, tnode->valnode ); + MEMFREE( memtype, tnode->child ); + MEMFREE( memtype, tnode ); + if ( subs ) { + tnode = QTNCopy( subs, memtype ); + tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE; + } else + tnode = NULL; + + node->child[ counters[0] ] = tnode; + + for(i=1;inchild;i++) + node->child[ counters[i] ] = NULL; + for(i=0;inchild;i++) { + if ( node->child[i] ) { + node->child[j] = node->child[i]; + j++; + } + } + + node->nchild = j; + + *isfind = true; + + break; + } + } while (addone(counters,ex->nchild-1,node->nchild)); + if ( tnode && (tnode->flags & QTN_NOCHANGE) == 0 ) { + MEMFREE( memtype, tnode->valnode ); + MEMFREE( memtype, tnode->child ); + MEMFREE( memtype, tnode ); + } else + QTNSort( node ); + pfree( counters ); + } + } else if ( QTNEq( node, ex ) ) { + QTNFree( node ); + if ( subs ) { + node = QTNCopy( subs, memtype ); + node->flags |= QTN_NOCHANGE; + } else { + node = NULL; + } + *isfind = true; + } + + return node; +} + +static QTNode * +dofindsubquery( QTNode *root, QTNode *ex, MemoryType memtype, QTNode *subs, bool *isfind ) { + root = findeq( root, ex, memtype, subs, isfind ); + + if ( root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type==OPR) { + int i; + for(i=0;inchild;i++) + root->child[i] = dofindsubquery( root->child[i], ex, memtype, subs, isfind ); + } + + return root; +} + +static QTNode * +dropvoidsubtree( QTNode *root ) { + + if ( !root ) + return NULL; + + if ( root->valnode->type==OPR ) { + int i,j=0; + + for(i=0;inchild;i++) { + if ( root->child[i] ) { + root->child[j] = root->child[i]; + j++; + } + } + + root->nchild = j; + + if ( root->valnode->val == (int4)'!' && root->nchild==0 ) { + QTNFree(root); + root=NULL; + } else if ( root->nchild==1 ) { + QTNode *nroot = root->child[0]; + pfree(root); + root = nroot; + } + } + + return root; +} + +static QTNode * +findsubquery( QTNode *root, QTNode *ex, MemoryType memtype, QTNode *subs, bool *isfind ) { + bool DidFind = false; + root = dofindsubquery( root, ex, memtype, subs, &DidFind ); + + if ( !subs && DidFind ) + root = dropvoidsubtree( root ); + + if ( isfind ) + *isfind = DidFind; + + return root; +} + +static Oid tsqOid = InvalidOid; +static void +get_tsq_Oid(void) +{ + int ret; + bool isnull; + + if ((ret = SPI_exec("select oid from pg_type where typname='tsquery'", 1)) < 0) + /* internal error */ + elog(ERROR, "SPI_exec to get tsquery oid returns %d", ret); + + if (SPI_processed < 0) + /* internal error */ + elog(ERROR, "There is no tsvector type"); + tsqOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); + if (tsqOid == InvalidOid) + /* internal error */ + elog(ERROR, "tsquery type has InvalidOid"); +} + + +PG_FUNCTION_INFO_V1(tsquery_rewrite); +PG_FUNCTION_INFO_V1(rewrite_accum); +Datum rewrite_accum(PG_FUNCTION_ARGS); + +Datum +rewrite_accum(PG_FUNCTION_ARGS) { + QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0); + ArrayType *qa = (ArrayType *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); + QUERYTYPE *q; + QTNode *qex, *subs = NULL, *acctree; + bool isfind = false; + + AggregateContext = ((AggState *) fcinfo->context)->aggcontext; + + if (acc == NULL || PG_ARGISNULL(0)) { + acc = (QUERYTYPE*)MEMALLOC( AggMemory, sizeof(QUERYTYPE) ); + acc->len = HDRSIZEQT; + acc->size = 0; + } + + if ( qa == NULL || PG_ARGISNULL(1) ) { + PG_FREE_IF_COPY( qa, 1 ); + PG_RETURN_POINTER( acc ); + } + + if ( ARR_NDIM(qa) != 1 ) + elog(ERROR, "array must be one-dimensional, not %d dimension", ARR_NDIM(qa)); + + if ( ArrayGetNItems( ARR_NDIM(qa), ARR_DIMS(qa)) != 3 ) + elog(ERROR, "array should have only three elements"); + + if (tsqOid == InvalidOid) { + SPI_connect(); + get_tsq_Oid(); + SPI_finish(); + } + + if (ARR_ELEMTYPE(qa) != tsqOid) + elog(ERROR, "array should contain tsquery type"); + + q = (QUERYTYPE*)ARR_DATA_PTR(qa); + if ( q->size == 0 ) + PG_RETURN_POINTER( acc ); + + if ( !acc->size ) { + if ( acc->len > HDRSIZEQT ) + PG_RETURN_POINTER( acc ); + else + acctree = QT2QTN( GETQUERY(q), GETOPERAND(q) ); + } else + acctree = QT2QTN( GETQUERY(acc), GETOPERAND(acc) ); + + QTNTernary( acctree ); + QTNSort( acctree ); + + q = (QUERYTYPE*)( ((char*)ARR_DATA_PTR(qa)) + MAXALIGN( q->len ) ); + if ( q->size == 0 ) + PG_RETURN_POINTER( acc ); + qex = QT2QTN( GETQUERY(q), GETOPERAND(q) ); + QTNTernary( qex ); + QTNSort( qex ); + + q = (QUERYTYPE*)( ((char*)q) + MAXALIGN( q->len ) ); + if ( q->size ) + subs = QT2QTN( GETQUERY(q), GETOPERAND(q) ); + + acctree = findsubquery( acctree, qex, PlainMemory, subs, &isfind ); + + if ( isfind || !acc->size ) { + /* pfree( acc ); do not pfree(p), because nodeAgg.c will */ + if ( acctree ) { + QTNBinary( acctree ); + acc = QTN2QT( acctree, AggMemory ); + } else { + acc = (QUERYTYPE*)MEMALLOC( AggMemory, HDRSIZEQT*2 ); + acc->len = HDRSIZEQT * 2; + acc->size = 0; + } + } + + QTNFree( qex ); + QTNFree( subs ); + QTNFree( acctree ); + + PG_RETURN_POINTER( acc ); +} + +PG_FUNCTION_INFO_V1(rewrite_finish); +Datum rewrite_finish(PG_FUNCTION_ARGS); + +Datum +rewrite_finish(PG_FUNCTION_ARGS) { + QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0); + QUERYTYPE *rewrited; + + if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0 ) { + acc = (QUERYTYPE*)palloc(sizeof(QUERYTYPE)); + acc->len = HDRSIZEQT; + acc->size = 0; + } + + rewrited = (QUERYTYPE*) palloc( acc->len ); + memcpy( rewrited, acc, acc->len ); + pfree( acc ); + + PG_RETURN_POINTER(rewrited); +} + +Datum tsquery_rewrite(PG_FUNCTION_ARGS); + +Datum +tsquery_rewrite(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + text *in = PG_GETARG_TEXT_P(1); + QUERYTYPE *rewrited = query; + QTNode *tree; + char *buf; + void *plan; + Portal portal; + bool isnull; + int i; + + if ( query->size == 0 ) { + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER( rewrited ); + } + + tree = QT2QTN( GETQUERY(query), GETOPERAND(query) ); + QTNTernary( tree ); + QTNSort( tree ); + + buf = (char*)palloc( VARSIZE(in) ); + memcpy(buf, VARDATA(in), VARSIZE(in) - VARHDRSZ); + buf[ VARSIZE(in) - VARHDRSZ ] = '\0'; + + SPI_connect(); + + if (tsqOid == InvalidOid) + get_tsq_Oid(); + + if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare('%s') returns NULL", buf); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL) + elog(ERROR, "SPI_cursor_open('%s') returns NULL", buf); + + SPI_cursor_fetch(portal, true, 100); + + if (SPI_tuptable->tupdesc->natts != 2) + elog(ERROR, "number of fields doesn't equal to 2"); + + if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tsqOid ) + elog(ERROR, "column #1 isn't of tsquery type"); + + if (SPI_gettypeid(SPI_tuptable->tupdesc, 2) != tsqOid ) + elog(ERROR, "column #2 isn't of tsquery type"); + + while (SPI_processed > 0 && tree ) { + for (i = 0; i < SPI_processed && tree; i++) { + Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); + Datum sdata; + + if ( isnull ) continue; + + sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); + + if (!isnull) { + QUERYTYPE *qtex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(qdata)); + QUERYTYPE *qtsubs = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(sdata)); + QTNode *qex, *qsubs = NULL; + + if (qtex->size == 0) { + if ( qtex != (QUERYTYPE *) DatumGetPointer(qdata) ) + pfree( qtex ); + if ( qtsubs != (QUERYTYPE *) DatumGetPointer(sdata) ) + pfree( qtsubs ); + continue; + } + + qex = QT2QTN( GETQUERY(qtex), GETOPERAND(qtex) ); + + QTNTernary( qex ); + QTNSort( qex ); + + if ( qtsubs->size ) + qsubs = QT2QTN( GETQUERY(qtsubs), GETOPERAND(qtsubs) ); + + tree = findsubquery( tree, qex, SPIMemory, qsubs, NULL ); + + QTNFree( qex ); + if ( qtex != (QUERYTYPE *) DatumGetPointer(qdata) ) + pfree( qtex ); + QTNFree( qsubs ); + if ( qtsubs != (QUERYTYPE *) DatumGetPointer(sdata) ) + pfree( qtsubs ); + } + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + SPI_finish(); + + + if ( tree ) { + QTNBinary( tree ); + rewrited = QTN2QT( tree, PlainMemory ); + QTNFree( tree ); + PG_FREE_IF_COPY(query, 0); + } else { + rewrited->len = HDRSIZEQT; + rewrited->size = 0; + } + + pfree(buf); + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER( rewrited ); +} + + +PG_FUNCTION_INFO_V1(tsquery_rewrite_query); +Datum tsquery_rewrite_query(PG_FUNCTION_ARGS); + +Datum +tsquery_rewrite_query(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + QUERYTYPE *subst = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2))); + QUERYTYPE *rewrited = query; + QTNode *tree, *qex, *subs = NULL; + + if ( query->size == 0 || ex->size == 0 ) { + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER( rewrited ); + } + + tree = QT2QTN( GETQUERY(query), GETOPERAND(query) ); + QTNTernary( tree ); + QTNSort( tree ); + + qex = QT2QTN( GETQUERY(ex), GETOPERAND(ex) ); + QTNTernary( qex ); + QTNSort( qex ); + + if ( subst->size ) + subs = QT2QTN( GETQUERY(subst), GETOPERAND(subst) ); + + tree = findsubquery( tree, qex, PlainMemory, subs, NULL ); + QTNFree( qex ); + QTNFree( subs ); + + if ( !tree ) { + rewrited->len = HDRSIZEQT; + rewrited->size = 0; + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER( rewrited ); + } else { + QTNBinary( tree ); + rewrited = QTN2QT( tree, PlainMemory ); + QTNFree( tree ); + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER( rewrited ); +} + diff --git a/contrib/tsearch2/query_support.c b/contrib/tsearch2/query_support.c new file mode 100644 index 0000000000..c973def7d4 --- /dev/null +++ b/contrib/tsearch2/query_support.c @@ -0,0 +1,76 @@ +#include "postgres.h" +#include "fmgr.h" + +#include "query_util.h" + +PG_FUNCTION_INFO_V1(tsquery_numnode); +Datum tsquery_numnode(PG_FUNCTION_ARGS); + +Datum +tsquery_numnode(PG_FUNCTION_ARGS) { + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + int nnode = query->size; + PG_FREE_IF_COPY(query,0); + PG_RETURN_INT32(nnode); +} + +static int +CompareTSQ( QUERYTYPE *a, QUERYTYPE *b ) { + if ( a->size != b->size ) { + return ( a->size < b->size ) ? -1 : 1; + } else if ( a->len != b->len ) { + return ( a->len < b->len ) ? -1 : 1; + } else { + QTNode *an = QT2QTN( GETQUERY(a), GETOPERAND(a) ); + QTNode *bn = QT2QTN( GETQUERY(b), GETOPERAND(b) ); + int res = QTNodeCompare(an, bn); + + QTNFree(an); + QTNFree(bn); + + return res; + } + + return 0; +} + +PG_FUNCTION_INFO_V1(tsquery_cmp); \ +Datum tsquery_cmp(PG_FUNCTION_ARGS); + +Datum +tsquery_cmp(PG_FUNCTION_ARGS) { + QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); + QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); + int res = CompareTSQ(a,b); + + PG_FREE_IF_COPY(a,0); + PG_FREE_IF_COPY(b,1); + + PG_RETURN_INT32(res); +} + +#define CMPFUNC( NAME, ACTION ) \ +PG_FUNCTION_INFO_V1(NAME); \ +Datum NAME(PG_FUNCTION_ARGS); \ + \ +Datum \ +NAME(PG_FUNCTION_ARGS) { \ + QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); \ + QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); \ + int res = CompareTSQ(a,b); \ + \ + PG_FREE_IF_COPY(a,0); \ + PG_FREE_IF_COPY(b,1); \ + \ + PG_RETURN_BOOL( ACTION ); \ +} + +CMPFUNC( tsquery_lt, res <0 ); +CMPFUNC( tsquery_le, res<=0 ); +CMPFUNC( tsquery_eq, res==0 ); +CMPFUNC( tsquery_ge, res>=0 ); +CMPFUNC( tsquery_gt, res >0 ); +CMPFUNC( tsquery_ne, res!=0 ); + + + diff --git a/contrib/tsearch2/query_util.c b/contrib/tsearch2/query_util.c new file mode 100644 index 0000000000..f509d9b642 --- /dev/null +++ b/contrib/tsearch2/query_util.c @@ -0,0 +1,257 @@ +#include "postgres.h" +#include "executor/spi.h" +#include "query_util.h" + +QTNode* +QT2QTN( ITEM *in, char *operand ) { + QTNode *node = (QTNode*)palloc0( sizeof(QTNode) ); + + node->valnode = in; + + if (in->type == OPR) { + node->child = (QTNode**)palloc0( sizeof(QTNode*) * 2 ); + node->child[0] = QT2QTN( in + 1, operand ); + node->sign = node->child[0]->sign; + if (in->val == (int4) '!') + node->nchild = 1; + else { + node->nchild = 2; + node->child[1] = QT2QTN( in + in->left, operand ); + node->sign |= node->child[1]->sign; + } + } else if ( operand ) { + node->word = operand + in->distance; + node->sign = 1 << ( in->val % 32 ); + } + + return node; +} + +void +QTNFree( QTNode* in ) { + if ( !in ) + return; + + if ( in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) !=0 ) + pfree( in->word ); + + if ( in->child ) { + if ( in->valnode ) { + if ( in->valnode->type == OPR && in->nchild > 0 ) { + int i; + for (i=0;inchild;i++) + QTNFree( in->child[i] ); + } + if ( in->flags & QTN_NEEDFREE ) + pfree( in->valnode ); + } + pfree( in->child ); + } + + pfree( in ); +} + +int +QTNodeCompare( QTNode *an, QTNode *bn ) { + if ( an->valnode->type != bn->valnode->type ) + return ( an->valnode->type > bn->valnode->type ) ? -1 : 1; + else if ( an->valnode->val != bn->valnode->val ) + return ( an->valnode->val > bn->valnode->val ) ? -1 : 1; + else if ( an->valnode->type == VAL ) { + if ( an->valnode->length == bn->valnode->length ) + return strncmp( an->word, bn->word, an->valnode->length ); + else + return ( an->valnode->length > bn->valnode->length ) ? -1 : 1; + } else if ( an->nchild != bn->nchild ) { + return ( an->nchild > bn->nchild ) ? -1 : 1; + } else { + int i,res; + + for( i=0; inchild; i++ ) + if ( (res=QTNodeCompare(an->child[i], bn->child[i]))!=0 ) + return res; + } + + return 0; +} + +static int +cmpQTN( const void *a, const void *b ) { + return QTNodeCompare( *(QTNode**)a, *(QTNode**)b ); +} + +void +QTNSort( QTNode* in ) { + int i; + + if ( in->valnode->type != OPR ) + return; + + for (i=0;inchild;i++) + QTNSort( in->child[i] ); + if ( in->nchild > 1 ) + qsort((void *) in->child, in->nchild, sizeof(QTNode*), cmpQTN); +} + +bool +QTNEq( QTNode* a, QTNode* b ) { + uint32 sign = a->sign & b->sign; + if ( !(sign == a->sign && sign == b->sign) ) + return 0; + + return ( QTNodeCompare(a,b) == 0 ) ? true : false; +} + +void +QTNTernary( QTNode* in ) { + int i; + + if ( in->valnode->type != OPR ) + return; + + for (i=0;inchild;i++) + QTNTernary( in->child[i] ); + + for (i=0;inchild;i++) { + if ( in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val ) { + QTNode* cc = in->child[i]; + int oldnchild = in->nchild; + + in->nchild += cc->nchild-1; + in->child = (QTNode**)repalloc( in->child, in->nchild * sizeof(QTNode*) ); + + if ( i+1 != oldnchild ) + memmove( in->child + i + cc->nchild, in->child + i + 1, + (oldnchild-i-1)*sizeof(QTNode*) ); + + memcpy( in->child + i, cc->child, cc->nchild * sizeof(QTNode*) ); + i += cc->nchild-1; + + pfree(cc); + } + } +} + +void +QTNBinary( QTNode* in ) { + int i; + + if ( in->valnode->type != OPR ) + return; + + for (i=0;inchild;i++) + QTNBinary( in->child[i] ); + + if ( in->nchild <= 2 ) + return; + + while( in->nchild > 2 ) { + QTNode *nn = (QTNode*)palloc0( sizeof(QTNode) ); + nn->valnode = (ITEM*)palloc0( sizeof(ITEM) ); + nn->child = (QTNode**)palloc0( sizeof(QTNode*) * 2 ); + + nn->nchild = 2; + nn->flags = QTN_NEEDFREE; + + nn->child[0] = in->child[0]; + nn->child[1] = in->child[1]; + nn->sign = nn->child[0]->sign | nn->child[1]->sign; + + nn->valnode->type = in->valnode->type; + nn->valnode->val = in->valnode->val; + + in->child[0] = nn; + in->child[1] = in->child[ in->nchild-1 ]; + in->nchild--; + } +} + +static void +cntsize(QTNode *in, int4 *sumlen, int4 *nnode) { + *nnode += 1; + if ( in->valnode->type == OPR ) { + int i; + for (i=0;inchild;i++) + cntsize(in->child[i], sumlen, nnode); + } else { + *sumlen += in->valnode->length+1; + } +} + +typedef struct { + ITEM *curitem; + char *operand; + char *curoperand; +} QTN2QTState; + +static void +fillQT( QTN2QTState *state, QTNode *in ) { + *(state->curitem) = *(in->valnode); + + if ( in->valnode->type == VAL ) { + memcpy( state->curoperand, in->word, in->valnode->length ); + state->curitem->distance = state->curoperand - state->operand; + state->curoperand[ in->valnode->length ] = '\0'; + state->curoperand += in->valnode->length + 1; + state->curitem++; + } else { + ITEM *curitem = state->curitem; + + Assert( in->nchild<=2 ); + state->curitem++; + + fillQT( state, in->child[0] ); + + if ( in->nchild==2 ) { + curitem->left = state->curitem - curitem; + fillQT( state, in->child[1] ); + } + } +} + +QUERYTYPE* +QTN2QT( QTNode* in, MemoryType memtype ) { + QUERYTYPE *out; + int len; + int sumlen=0, nnode=0; + QTN2QTState state; + + cntsize(in, &sumlen, &nnode); + len = COMPUTESIZE( nnode, sumlen ); + + out = (QUERYTYPE*)MEMALLOC(memtype, len); + out->len = len; + out->size = nnode; + + state.curitem = GETQUERY( out ); + state.operand = state.curoperand = GETOPERAND( out ); + + fillQT( &state, in ); + return out; +} + +QTNode * +QTNCopy( QTNode* in, MemoryType memtype ) { + QTNode *out = (QTNode*)MEMALLOC( memtype, sizeof(QTNode) ); + + *out = *in; + out->valnode = (ITEM*)MEMALLOC( memtype, sizeof(ITEM) ); + *(out->valnode) = *(in->valnode); + out->flags |= QTN_NEEDFREE; + + if ( in->valnode->type == VAL ) { + out->word = MEMALLOC( memtype, in->valnode->length + 1 ); + memcpy( out->word, in->word, in->valnode->length ); + out->word[ in->valnode->length ] = '\0'; + out->flags |= QTN_WORDFREE; + } else { + int i; + + out->child = (QTNode**)MEMALLOC( memtype, sizeof(QTNode*) * in->nchild ); + + for(i=0;inchild;i++) + out->child[i] = QTNCopy( in->child[i], memtype ); + } + + return out; +} diff --git a/contrib/tsearch2/query_util.h b/contrib/tsearch2/query_util.h new file mode 100644 index 0000000000..cf3900925a --- /dev/null +++ b/contrib/tsearch2/query_util.h @@ -0,0 +1,44 @@ +#ifndef __QUERY_UTIL_H__ +#define __QUERY_UTIL_H__ + +#include "postgres.h" +#include "utils/memutils.h" + +#include "query.h" + +typedef struct QTNode { + ITEM *valnode; + uint32 flags; + int4 nchild; + char *word; + uint32 sign; + struct QTNode **child; +} QTNode; + +#define QTN_NEEDFREE 0x01 +#define QTN_NOCHANGE 0x02 +#define QTN_WORDFREE 0x04 + +typedef enum { + PlainMemory, + SPIMemory, + AggMemory +} MemoryType; + +QTNode* QT2QTN( ITEM *in, char *operand ); +QUERYTYPE* QTN2QT( QTNode* in, MemoryType memtype ); +void QTNFree( QTNode* in ); +void QTNSort( QTNode* in ); +void QTNTernary( QTNode* in ); +void QTNBinary( QTNode* in ); +int QTNodeCompare( QTNode *an, QTNode *bn ); +QTNode* QTNCopy( QTNode* in, MemoryType memtype); +bool QTNEq( QTNode* a, QTNode* b ); + + +extern MemoryContext AggregateContext; + +#define MEMALLOC(us, s) ( ((us)==SPIMemory) ? SPI_palloc(s) : ( ( (us)==PlainMemory ) ? palloc(s) : MemoryContextAlloc(AggregateContext, (s)) ) ) +#define MEMFREE(us, p) ( ((us)==SPIMemory) ? SPI_pfree(p) : pfree(p) ) + +#endif diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c index 445898eff6..e678b205d6 100644 --- a/contrib/tsearch2/rank.c +++ b/contrib/tsearch2/rank.c @@ -407,7 +407,9 @@ rank_def(PG_FUNCTION_ARGS) typedef struct { - ITEM *item; + ITEM **item; + int16 nitem; + bool needfree; int32 pos; } DocRepresentation; @@ -419,123 +421,80 @@ compareDocR(const void *a, const void *b) return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1; } - -typedef struct -{ - DocRepresentation *doc; - int len; -} ChkDocR; - static bool -checkcondition_DR(void *checkval, ITEM * val) -{ - DocRepresentation *ptr = ((ChkDocR *) checkval)->doc; - - while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len) - { - if (val == ptr->item || compareITEM(&val, &(ptr->item)) == 0) - return true; - ptr++; - } - - return false; +checkcondition_ITEM(void *checkval, ITEM * val) { + return (bool)(val->istrue); } +static void +reset_istrue_flag(QUERYTYPE *query) { + ITEM *item = GETQUERY(query); + int i; + /* reset istrue flag */ + for(i = 0; i < query->size; i++) { + if ( item->type == VAL ) + item->istrue = 0; + item++; + } +} + static bool Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q) { - int i; - DocRepresentation *ptr, - *f = (DocRepresentation *) 0xffffffff; - ITEM *item = GETQUERY(query); + DocRepresentation *ptr; int lastpos = *pos; - int oldq = *q; + int i; + bool found=false; + reset_istrue_flag(query); + *p = 0x7fffffff; *q = 0; + ptr = doc + *pos; - for (i = 0; i < query->size; i++) - { - if (item->type != VAL) - { - item++; - continue; + /* find upper bound of cover from current position, move up */ + while (ptr - doc < len) { + for(i=0;initem;i++) + ptr->item[i]->istrue = 1; + if ( TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM) ) { + if (ptr->pos > *q) { + *q = ptr->pos; + lastpos = ptr - doc; + found = true; + } + break; } - ptr = doc + *pos; - - while (ptr - doc < len) - { - if (ptr->item == item) - { - if (ptr->pos > *q) - { - *q = ptr->pos; - lastpos = ptr - doc; - } - break; - } - ptr++; - } - - item++; + ptr++; } - if (*q == 0) + if (!found) return false; - if (*q == oldq) - { /* already check this pos */ - (*pos)++; - return Cover(doc, len, query, pos, p, q); + reset_istrue_flag(query); + + ptr = doc + lastpos; + + /* find lower bound of cover from founded upper bound, move down */ + while (ptr >= doc ) { + for(i=0;initem;i++) + ptr->item[i]->istrue = 1; + if ( TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM) ) { + if (ptr->pos < *p) + *p = ptr->pos; + break; + } + ptr--; } - item = GETQUERY(query); - for (i = 0; i < query->size; i++) - { - if (item->type != VAL) - { - item++; - continue; - } - ptr = doc + lastpos; - - while (ptr >= doc + *pos) - { - if (ptr->item == item) - { - if (ptr->pos < *p) - { - *p = ptr->pos; - f = ptr; - } - break; - } - ptr--; - } - item++; + if ( *p <= *q ) { + /* set position for next try to next lexeme after begining of founded cover */ + *pos= (ptr-doc) + 1; + return true; } - if (*p <= *q) - { - ChkDocR ch; - - ch.doc = f; - ch.len = (doc + lastpos) - f + 1; - *pos = f - doc + 1; - SortAndUniqOperand = GETOPERAND(query); - if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR)) - { - /* - * elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q); - */ - return true; - } - else - return Cover(doc, len, query, pos, p, q); - } - - return false; + (*pos)++; + return Cover( doc, len, query, pos, p, q ); } static DocRepresentation * @@ -553,9 +512,12 @@ get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen) *(uint16 *) POSNULL = lengthof(POSNULL) - 1; doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len); + SortAndUniqOperand = GETOPERAND(query); + reset_istrue_flag(query); + for (i = 0; i < query->size; i++) { - if (item[i].type != VAL) + if (item[i].type != VAL || item[i].istrue) continue; entry = find_wordentry(txt, query, &(item[i])); @@ -581,7 +543,27 @@ get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen) for (j = 0; j < dimt; j++) { - doc[cur].item = &(item[i]); + if ( j == 0 ) { + ITEM *kptr, *iptr = item+i; + int k; + + doc[cur].needfree = false; + doc[cur].nitem = 0; + doc[cur].item = (ITEM**)palloc( sizeof(ITEM*) * query->size ); + + for(k=0; k < query->size; k++) { + kptr = item+k; + if ( k==i || ( item[k].type == VAL && compareITEM( &kptr, &iptr ) == 0 ) ) { + doc[cur].item[ doc[cur].nitem ] = item+k; + doc[cur].nitem++; + kptr->istrue = 1; + } + } + } else { + doc[cur].needfree = false; + doc[cur].nitem = doc[cur-1].nitem; + doc[cur].item = doc[cur-1].item; + } doc[cur].pos = WEP_GETPOS(post[j]); cur++; } @@ -606,16 +588,18 @@ rank_cd(PG_FUNCTION_ARGS) { int K = PG_GETARG_INT32(0); tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); - QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); + QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2)); int method = DEF_NORM_METHOD; DocRepresentation *doc; float res = 0.0; int p = 0, q = 0, len, - cur; + cur, + i, + doclen=0; - doc = get_docrep(txt, query, &len); + doc = get_docrep(txt, query, &doclen); if (!doc) { PG_FREE_IF_COPY(txt, 1); @@ -626,7 +610,7 @@ rank_cd(PG_FUNCTION_ARGS) cur = 0; if (K <= 0) K = 4; - while (Cover(doc, len, query, &cur, &p, &q)) + while (Cover(doc, doclen, query, &cur, &p, &q)) res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0; if (PG_NARGS() == 4) @@ -649,6 +633,9 @@ rank_cd(PG_FUNCTION_ARGS) elog(ERROR, "unrecognized normalization method: %d", method); } + for(i=0;i 'b & c'::tsquery; +select 'a | f' < 'b & c'::tsquery; +select 'a | ff' < 'b & c'::tsquery; +select 'a | f | g' < 'b & c'::tsquery; + +select numnode( 'new'::tsquery ); +select numnode( 'new & york'::tsquery ); +select numnode( 'new & york | qwery'::tsquery ); + +create table test_tsquery (txtkeyword text, txtsample text); +\set ECHO none +\copy test_tsquery from stdin +'New York' new & york | big & apple | nyc +Moscow moskva | moscow +'Sanct Peter' Peterburg | peter | 'Sanct Peterburg' +'foo bar qq' foo & (bar | qq) & city +\. +\set ECHO all + +alter table test_tsquery add column keyword tsquery; +update test_tsquery set keyword = to_tsquery('default', txtkeyword); +alter table test_tsquery add column sample tsquery; +update test_tsquery set sample = to_tsquery('default', txtsample::text); + +create unique index bt_tsq on test_tsquery (keyword); + +select count(*) from test_tsquery where keyword < 'new & york'; +select count(*) from test_tsquery where keyword <= 'new & york'; +select count(*) from test_tsquery where keyword = 'new & york'; +select count(*) from test_tsquery where keyword >= 'new & york'; +select count(*) from test_tsquery where keyword > 'new & york'; + +set enable_seqscan=off; + +select count(*) from test_tsquery where keyword < 'new & york'; +select count(*) from test_tsquery where keyword <= 'new & york'; +select count(*) from test_tsquery where keyword = 'new & york'; +select count(*) from test_tsquery where keyword >= 'new & york'; +select count(*) from test_tsquery where keyword > 'new & york'; + +set enable_seqscan=on; + +select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city'); + +select rewrite('moscow', 'select keyword, sample from test_tsquery'::text ); +select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text ); +select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text ); + +select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery; +select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery; +select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery; + + +select keyword from test_tsquery where keyword @ 'new'; +select keyword from test_tsquery where keyword @ 'moscow'; +select keyword from test_tsquery where keyword ~ 'new'; +select keyword from test_tsquery where keyword ~ 'moscow'; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword; + +create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops); +set enable_seqscan='off'; + +select keyword from test_tsquery where keyword @ 'new'; +select keyword from test_tsquery where keyword @ 'moscow'; +select keyword from test_tsquery where keyword ~ 'new'; +select keyword from test_tsquery where keyword ~ 'moscow'; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword; +select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword; +set enable_seqscan='on'; + + + select lexize('simple', 'ASD56 hsdkf'); select lexize('en_stem', 'SKIES Problems identity'); diff --git a/contrib/tsearch2/tsearch.sql.in b/contrib/tsearch2/tsearch.sql.in index b51b049b9a..dd51128e5d 100644 --- a/contrib/tsearch2/tsearch.sql.in +++ b/contrib/tsearch2/tsearch.sql.in @@ -813,6 +813,249 @@ CREATE OPERATOR CLASS tsvector_ops OPERATOR 5 > , FUNCTION 1 tsvector_cmp(tsvector, tsvector); +----------------Compare functions and operators for tsquery +CREATE OR REPLACE FUNCTION tsquery_cmp(tsquery,tsquery) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsquery_lt(tsquery,tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsquery_le(tsquery,tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsquery_eq(tsquery,tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsquery_ge(tsquery,tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsquery_gt(tsquery,tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsquery_ne(tsquery,tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict,iscachable); + + +CREATE OPERATOR < ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_lt, + COMMUTATOR = '>', + NEGATOR = '>=', + RESTRICT = contsel, + JOIN = contjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_le, + COMMUTATOR = '>=', + NEGATOR = '>', + RESTRICT = contsel, + JOIN = contjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_ge, + COMMUTATOR = '<=', + NEGATOR = '<', + RESTRICT = contsel, + JOIN = contjoinsel +); + +CREATE OPERATOR > ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_gt, + COMMUTATOR = '<', + NEGATOR = '<=', + RESTRICT = contsel, + JOIN = contjoinsel +); + + +CREATE OPERATOR = ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_eq, + COMMUTATOR = '=', + NEGATOR = '<>', + RESTRICT = eqsel, + JOIN = eqjoinsel, + SORT1 = '<', + SORT2 = '<' +); + +CREATE OPERATOR <> ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsquery_ne, + COMMUTATOR = '<>', + NEGATOR = '=', + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR CLASS tsquery_ops + DEFAULT FOR TYPE tsquery USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 tsquery_cmp(tsquery, tsquery); + +CREATE OR REPLACE FUNCTION numnode(tsquery) + returns int4 + as 'MODULE_PATHNAME', 'tsquery_numnode' + language 'C' + with (isstrict,iscachable); + +--------------rewrite subsystem + +CREATE OR REPLACE FUNCTION rewrite(tsquery, text) + returns tsquery + as 'MODULE_PATHNAME', 'tsquery_rewrite' + language 'C' + with (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION rewrite(tsquery, tsquery, tsquery) + returns tsquery + as 'MODULE_PATHNAME', 'tsquery_rewrite_query' + language 'C' + with (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION rewrite_accum(tsquery,tsquery[]) + RETURNS tsquery + AS 'MODULE_PATHNAME' + LANGUAGE 'C'; + +CREATE OR REPLACE FUNCTION rewrite_finish(tsquery) + returns tsquery + as 'MODULE_PATHNAME' + language 'C'; + +CREATE AGGREGATE rewrite ( + BASETYPE=tsquery[], + SFUNC=rewrite_accum, + STYPE=tsquery, + FINALFUNC = rewrite_finish +); + +CREATE OR REPLACE FUNCTION tsq_mcontains(tsquery, tsquery) + returns bool + as 'MODULE_PATHNAME' + language 'C' + with (isstrict,iscachable); + +CREATE OR REPLACE FUNCTION tsq_mcontained(tsquery, tsquery) + returns bool + as 'MODULE_PATHNAME' + language 'C' + with (isstrict,iscachable); + +CREATE OPERATOR @ ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsq_mcontains, + COMMUTATOR = '~', + RESTRICT = contsel, + JOIN = contjoinsel +); + +CREATE OPERATOR ~ ( + LEFTARG = tsquery, + RIGHTARG = tsquery, + PROCEDURE = tsq_mcontained, + COMMUTATOR = '@', + RESTRICT = contsel, + JOIN = contjoinsel +); + +-----------gist support of rewrite------------------ + +CREATE FUNCTION gtsq_in(cstring) +RETURNS gtsq +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION gtsq_out(gtsq) +RETURNS cstring +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE gtsq ( + INTERNALLENGTH = 8, + INPUT = gtsq_in, + OUTPUT = gtsq_out +); + +CREATE FUNCTION gtsq_consistent(gtsq,internal,int4) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsq_compress(internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsq_decompress(internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsq_penalty(internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION gtsq_picksplit(internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsq_union(bytea, internal) +RETURNS _int4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsq_same(gtsq, gtsq, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE OPERATOR CLASS gist_tp_tsquery_ops +DEFAULT FOR TYPE tsquery USING gist +AS + OPERATOR 1 @ (tsquery, tsquery) RECHECK, + OPERATOR 2 ~ (tsquery, tsquery) RECHECK, + FUNCTION 1 gtsq_consistent (gtsq, internal, int4), + FUNCTION 2 gtsq_union (bytea, internal), + FUNCTION 3 gtsq_compress (internal), + FUNCTION 4 gtsq_decompress (internal), + FUNCTION 5 gtsq_penalty (internal, internal, internal), + FUNCTION 6 gtsq_picksplit (internal, internal), + FUNCTION 7 gtsq_same (gtsq, gtsq, internal), + STORAGE gtsq; + --example of ISpell dictionary --update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template'; --example of synonym dict