New features for tsearch2:

1 Comparison operation for tsquery
2 Btree index on tsquery
3 numnode(tsquery) - returns 'length' of tsquery
4 tsquery @ tsquery, tsquery ~ tsquery - contains, contained for tsquery.
  Note: They don't gurantee exact result, only MAY BE, so it
  useful only for speed up rewrite functions
5 GiST index support for @,~
6 rewrite():
        select rewrite(orig, what, to);
        select rewrite(ARRAY[orig, what, to]) from tsquery_table;
        select rewrite(orig, 'select what, to from tsquery_table;');
7 significantly improve cover algorithm
This commit is contained in:
Teodor Sigaev 2005-11-08 17:08:46 +00:00
parent 6521ea008e
commit 0645663e6c
14 changed files with 1876 additions and 114 deletions

View File

@ -1,11 +1,12 @@
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.10 2005/09/27 17:13:11 tgl Exp $
# $PostgreSQL: pgsql/contrib/tsearch2/Makefile,v 1.11 2005/11/08 17:08:46 teodor Exp $
MODULE_big = tsearch2
OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \
dict_snowball.o dict_ispell.o dict_syn.o \
wparser.o wparser_def.o \
ts_cfg.o tsvector.o rewrite.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o
ts_cfg.o tsvector.o query_cleanup.o crc32.o query.o gistidx.o \
tsvector_op.o rank.o ts_stat.o \
query_util.o query_support.o query_rewrite.o query_gist.o
SUBDIRS := snowball ispell wordparser
SUBDIROBJS := $(SUBDIRS:%=%/SUBSYS.o)

View File

@ -16,6 +16,9 @@ psql:tsearch2.sql:401: NOTICE: argument type tsquery is only a shell
psql:tsearch2.sql:543: NOTICE: type "gtsvector" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:548: NOTICE: argument type gtsvector is only a shell
psql:tsearch2.sql:997: NOTICE: type "gtsq" is not yet defined
DETAIL: Creating a shell type definition.
psql:tsearch2.sql:1002: NOTICE: argument type gtsq is only a shell
--tsvector
SELECT '1'::tsvector;
tsvector
@ -342,6 +345,286 @@ SELECT '''the wether'':dc & '' sKies '':BC & a:d b:a';
'the wether':dc & ' sKies ':BC & a:d b:a
(1 row)
select 'a' < 'b & c'::tsquery;
?column?
----------
t
(1 row)
select 'a' > 'b & c'::tsquery;
?column?
----------
f
(1 row)
select 'a | f' < 'b & c'::tsquery;
?column?
----------
t
(1 row)
select 'a | ff' < 'b & c'::tsquery;
?column?
----------
f
(1 row)
select 'a | f | g' < 'b & c'::tsquery;
?column?
----------
f
(1 row)
select numnode( 'new'::tsquery );
numnode
---------
1
(1 row)
select numnode( 'new & york'::tsquery );
numnode
---------
3
(1 row)
select numnode( 'new & york | qwery'::tsquery );
numnode
---------
5
(1 row)
create table test_tsquery (txtkeyword text, txtsample text);
\set ECHO none
alter table test_tsquery add column keyword tsquery;
update test_tsquery set keyword = to_tsquery('default', txtkeyword);
alter table test_tsquery add column sample tsquery;
update test_tsquery set sample = to_tsquery('default', txtsample::text);
create unique index bt_tsq on test_tsquery (keyword);
select count(*) from test_tsquery where keyword < 'new & york';
count
-------
1
(1 row)
select count(*) from test_tsquery where keyword <= 'new & york';
count
-------
2
(1 row)
select count(*) from test_tsquery where keyword = 'new & york';
count
-------
1
(1 row)
select count(*) from test_tsquery where keyword >= 'new & york';
count
-------
3
(1 row)
select count(*) from test_tsquery where keyword > 'new & york';
count
-------
2
(1 row)
set enable_seqscan=off;
select count(*) from test_tsquery where keyword < 'new & york';
count
-------
1
(1 row)
select count(*) from test_tsquery where keyword <= 'new & york';
count
-------
2
(1 row)
select count(*) from test_tsquery where keyword = 'new & york';
count
-------
1
(1 row)
select count(*) from test_tsquery where keyword >= 'new & york';
count
-------
3
(1 row)
select count(*) from test_tsquery where keyword > 'new & york';
count
-------
2
(1 row)
set enable_seqscan=on;
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
rewrite
----------------------------------------------------------------------------------
'qq' & 'foo' & 'bar' & ( 'city' & 'york' & 'new' | ( 'nyc' | 'apple' & 'big' ) )
(1 row)
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
rewrite
---------------------
'moskva' | 'moscow'
(1 row)
select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text );
rewrite
-----------------------------------
( 'moskva' | 'moscow' ) & 'hotel'
(1 row)
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
rewrite
-------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
(1 row)
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
rewrite
---------------------
'moskva' | 'moscow'
(1 row)
select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
rewrite
-----------------------------------
( 'moskva' | 'moscow' ) & 'hotel'
(1 row)
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
rewrite
-------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
(1 row)
select keyword from test_tsquery where keyword @ 'new';
keyword
----------------
'new' & 'york'
(1 row)
select keyword from test_tsquery where keyword @ 'moscow';
keyword
----------
'moscow'
(1 row)
select keyword from test_tsquery where keyword ~ 'new';
keyword
---------
(0 rows)
select keyword from test_tsquery where keyword ~ 'moscow';
keyword
----------
'moscow'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query;
rewrite
---------------------
'moskva' | 'moscow'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query;
rewrite
-----------------------------------
( 'moskva' | 'moscow' ) & 'hotel'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query;
rewrite
-------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword;
rewrite
---------------------
'moskva' | 'moscow'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword;
rewrite
-----------------------------------
( 'moskva' | 'moscow' ) & 'hotel'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword;
rewrite
-------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
(1 row)
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
set enable_seqscan='off';
select keyword from test_tsquery where keyword @ 'new';
keyword
----------------
'new' & 'york'
(1 row)
select keyword from test_tsquery where keyword @ 'moscow';
keyword
----------
'moscow'
(1 row)
select keyword from test_tsquery where keyword ~ 'new';
keyword
---------
(0 rows)
select keyword from test_tsquery where keyword ~ 'moscow';
keyword
----------
'moscow'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query;
rewrite
---------------------
'moskva' | 'moscow'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query;
rewrite
-----------------------------------
( 'moskva' | 'moscow' ) & 'hotel'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query;
rewrite
-------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword;
rewrite
---------------------
'moskva' | 'moscow'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword;
rewrite
-----------------------------------
( 'moskva' | 'moscow' ) & 'hotel'
(1 row)
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword;
rewrite
-------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'qq' | 'bar' ) & ( 'nyc' | ( 'appl' & 'big' | 'york' & 'new' ) )
(1 row)
set enable_seqscan='on';
select lexize('simple', 'ASD56 hsdkf');
lexize
-----------------

View File

@ -23,7 +23,7 @@
#include "tsvector.h"
#include "crc32.h"
#include "query.h"
#include "rewrite.h"
#include "query_cleanup.h"
#include "common.h"

View File

@ -17,7 +17,7 @@ typedef struct ITEM
int4 val;
/* user-friendly value, must correlate with WordEntry */
uint32
unused:1,
istrue:1, /* use for ranking in Cover */
length:11,
distance:20;
} ITEM;

View File

@ -7,14 +7,10 @@
#include <float.h>
#include "access/gist.h"
#include "access/itup.h"
#include "storage/bufpage.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "query.h"
#include "rewrite.h"
#include "query_cleanup.h"
typedef struct NODE
{

View File

@ -0,0 +1,324 @@
#include "postgres.h"
#include "storage/bufpage.h"
#include "access/skey.h"
#include "access/gist.h"
#include "query.h"
typedef uint64 TPQTGist;
#define GETENTRY(vec,pos) ((TPQTGist *) DatumGetPointer((vec)->vector[(pos)].key))
PG_FUNCTION_INFO_V1(tsq_mcontains);
Datum tsq_mcontains(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(tsq_mcontained);
Datum tsq_mcontained(PG_FUNCTION_ARGS);
static TPQTGist
makesign(QUERYTYPE* a) {
int i;
ITEM *ptr = GETQUERY(a);
TPQTGist sign = 0;
for (i = 0; i < a->size; i++) {
if ( ptr->type == VAL )
sign |= 1 << (ptr->val % 64);
ptr++;
}
return sign;
}
Datum
tsq_mcontains(PG_FUNCTION_ARGS) {
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
TPQTGist sq, se;
int i,j;
ITEM *iq, *ie;
if ( query->size < ex->size ) {
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL( false );
}
sq = makesign(query);
se = makesign(ex);
if ( (sq&se)!=se ) {
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL( false );
}
ie = GETQUERY(ex);
for(i=0;i<ex->size;i++) {
iq = GETQUERY(query);
if ( ie[i].type != VAL )
continue;
for(j=0;j<query->size;j++)
if ( iq[j].type == VAL && ie[i].val == iq[j].val ) {
j = query->size+1;
break;
}
if ( j == query->size ) {
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL( false );
}
}
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL( true );
}
Datum
tsq_mcontained(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM(
DirectFunctionCall2(
tsq_mcontains,
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(0)
)
);
}
PG_FUNCTION_INFO_V1(gtsq_in);
Datum gtsq_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_out);
Datum gtsq_out(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_compress);
Datum gtsq_compress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_decompress);
Datum gtsq_decompress(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_consistent);
Datum gtsq_consistent(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_union);
Datum gtsq_union(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_same);
Datum gtsq_same(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_penalty);
Datum gtsq_penalty(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtsq_picksplit);
Datum gtsq_picksplit(PG_FUNCTION_ARGS);
Datum
gtsq_in(PG_FUNCTION_ARGS) {
elog(ERROR, "Not implemented");
PG_RETURN_DATUM(0);
}
Datum
gtsq_out(PG_FUNCTION_ARGS) {
elog(ERROR, "Not implemented");
PG_RETURN_DATUM(0);
}
Datum
gtsq_compress(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey) {
TPQTGist *sign = (TPQTGist*)palloc( sizeof(TPQTGist) );
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
*sign = makesign( (QUERYTYPE*)DatumGetPointer(PG_DETOAST_DATUM(entry->key)) );
gistentryinit(*retval, PointerGetDatum(sign),
entry->rel, entry->page,
entry->offset, sizeof(TPQTGist), FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
gtsq_decompress(PG_FUNCTION_ARGS) {
PG_RETURN_DATUM(PG_GETARG_DATUM(0));
}
Datum
gtsq_consistent(PG_FUNCTION_ARGS) {
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TPQTGist *key = (TPQTGist*) DatumGetPointer(entry->key);
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
TPQTGist sq = makesign(query);
if ( GIST_LEAF(entry) )
PG_RETURN_BOOL( ( (*key) & sq ) == ((strategy==1) ? sq : *key) );
else
PG_RETURN_BOOL( (*key) & sq );
}
Datum
gtsq_union(PG_FUNCTION_ARGS) {
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
TPQTGist *sign = (TPQTGist*)palloc( sizeof(TPQTGist) );
int i;
int *size = (int *) PG_GETARG_POINTER(1);
memset( sign, 0, sizeof(TPQTGist) );
for (i = 0; i < entryvec->n;i++)
*sign |= *GETENTRY(entryvec, i);
*size = sizeof(TPQTGist);
PG_RETURN_POINTER(sign);
}
Datum
gtsq_same(PG_FUNCTION_ARGS) {
TPQTGist *a = (TPQTGist *) PG_GETARG_POINTER(0);
TPQTGist *b = (TPQTGist *) PG_GETARG_POINTER(1);
PG_RETURN_POINTER( *a == *b );
}
static int
sizebitvec(TPQTGist sign) {
int size=0,i;
for(i=0;i<64;i++)
size += 0x01 & (sign>>i);
}
static int
hemdist(TPQTGist a, TPQTGist b) {
TPQTGist res = a ^ b;
return sizebitvec(res);
}
Datum
gtsq_penalty(PG_FUNCTION_ARGS) {
TPQTGist *origval = (TPQTGist*) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
TPQTGist *newval = (TPQTGist*) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
*penalty = hemdist(*origval, *newval);
PG_RETURN_POINTER(penalty);
}
typedef struct {
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b) {
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
return 0;
else
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
Datum
gtsq_picksplit(PG_FUNCTION_ARGS) {
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber maxoff = entryvec->n - 2;
OffsetNumber k,j;
TPQTGist *datum_l, *datum_r;
int4 size_alpha, size_beta;
int4 size_waste, waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0, seed_2 = 0;
OffsetNumber *left, *right;
SPLITCOST *costvector;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
v->spl_nleft = v->spl_nright = 0;
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) {
size_waste = hemdist( *GETENTRY(entryvec,j), *GETENTRY(entryvec,k) );
if (size_waste > waste) {
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
if (seed_1 == 0 || seed_2 == 0) {
seed_1 = 1;
seed_2 = 2;
}
datum_l = (TPQTGist*)palloc( sizeof(TPQTGist) );
*datum_l=*GETENTRY(entryvec,seed_1);
datum_r = (TPQTGist*)palloc( sizeof(TPQTGist) );
*datum_r=*GETENTRY(entryvec,seed_2);
maxoff = OffsetNumberNext(maxoff);
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) {
costvector[j - 1].pos = j;
size_alpha = hemdist( *GETENTRY(entryvec,seed_1), *GETENTRY(entryvec,j) );
size_beta = hemdist( *GETENTRY(entryvec,seed_2), *GETENTRY(entryvec,j) );
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++) {
j = costvector[k].pos;
if ( j == seed_1 ) {
*left++ = j;
v->spl_nleft++;
continue;
} else if ( j == seed_2 ) {
*right++ = j;
v->spl_nright++;
continue;
}
size_alpha = hemdist( *datum_l, *GETENTRY(entryvec,j) );
size_beta = hemdist( *datum_r, *GETENTRY(entryvec,j) );
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) {
*datum_l |= *GETENTRY(entryvec,j);
*left++ = j;
v->spl_nleft++;
} else {
*datum_r |= *GETENTRY(entryvec,j);
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}

View File

@ -0,0 +1,466 @@
#include "postgres.h"
#include "executor/spi.h"
#include "query_util.h"
MemoryContext AggregateContext = NULL;
static int
addone(int * counters, int last, int total) {
counters[last]++;
if ( counters[last]>=total ) {
if (last==0)
return 0;
if ( addone( counters, last-1, total-1 ) == 0 )
return 0;
counters[last] = counters[last-1]+1;
}
return 1;
}
static QTNode *
findeq(QTNode *node, QTNode *ex, MemoryType memtype, QTNode *subs, bool *isfind) {
if ( (node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val )
return node;
if ( node->flags & QTN_NOCHANGE )
return node;
if ( node->valnode->type==OPR ) {
if ( node->nchild == ex->nchild ) {
if ( QTNEq( node, ex ) ) {
QTNFree( node );
if ( subs ) {
node = QTNCopy( subs, memtype );
node->flags |= QTN_NOCHANGE;
} else
node = NULL;
*isfind = true;
}
} else if ( node->nchild > ex->nchild ) {
int *counters = (int*)palloc( sizeof(int) * node->nchild );
int i;
QTNode *tnode = (QTNode*)MEMALLOC( memtype, sizeof(QTNode) );
memset(tnode, 0, sizeof(QTNode));
tnode->child = (QTNode**)MEMALLOC( memtype, sizeof(QTNode*) * ex->nchild );
tnode->nchild = ex->nchild;
tnode->valnode = (ITEM*)MEMALLOC( memtype, sizeof(ITEM) );
*(tnode->valnode) = *(ex->valnode);
for(i=0;i<ex->nchild;i++)
counters[i]=i;
do {
tnode->sign=0;
for(i=0;i<ex->nchild;i++) {
tnode->child[i] = node->child[ counters[i] ];
tnode->sign |= tnode->child[i]->sign;
}
if ( QTNEq( tnode, ex ) ) {
int j=0;
MEMFREE( memtype, tnode->valnode );
MEMFREE( memtype, tnode->child );
MEMFREE( memtype, tnode );
if ( subs ) {
tnode = QTNCopy( subs, memtype );
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
} else
tnode = NULL;
node->child[ counters[0] ] = tnode;
for(i=1;i<ex->nchild;i++)
node->child[ counters[i] ] = NULL;
for(i=0;i<node->nchild;i++) {
if ( node->child[i] ) {
node->child[j] = node->child[i];
j++;
}
}
node->nchild = j;
*isfind = true;
break;
}
} while (addone(counters,ex->nchild-1,node->nchild));
if ( tnode && (tnode->flags & QTN_NOCHANGE) == 0 ) {
MEMFREE( memtype, tnode->valnode );
MEMFREE( memtype, tnode->child );
MEMFREE( memtype, tnode );
} else
QTNSort( node );
pfree( counters );
}
} else if ( QTNEq( node, ex ) ) {
QTNFree( node );
if ( subs ) {
node = QTNCopy( subs, memtype );
node->flags |= QTN_NOCHANGE;
} else {
node = NULL;
}
*isfind = true;
}
return node;
}
static QTNode *
dofindsubquery( QTNode *root, QTNode *ex, MemoryType memtype, QTNode *subs, bool *isfind ) {
root = findeq( root, ex, memtype, subs, isfind );
if ( root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type==OPR) {
int i;
for(i=0;i<root->nchild;i++)
root->child[i] = dofindsubquery( root->child[i], ex, memtype, subs, isfind );
}
return root;
}
static QTNode *
dropvoidsubtree( QTNode *root ) {
if ( !root )
return NULL;
if ( root->valnode->type==OPR ) {
int i,j=0;
for(i=0;i<root->nchild;i++) {
if ( root->child[i] ) {
root->child[j] = root->child[i];
j++;
}
}
root->nchild = j;
if ( root->valnode->val == (int4)'!' && root->nchild==0 ) {
QTNFree(root);
root=NULL;
} else if ( root->nchild==1 ) {
QTNode *nroot = root->child[0];
pfree(root);
root = nroot;
}
}
return root;
}
static QTNode *
findsubquery( QTNode *root, QTNode *ex, MemoryType memtype, QTNode *subs, bool *isfind ) {
bool DidFind = false;
root = dofindsubquery( root, ex, memtype, subs, &DidFind );
if ( !subs && DidFind )
root = dropvoidsubtree( root );
if ( isfind )
*isfind = DidFind;
return root;
}
static Oid tsqOid = InvalidOid;
static void
get_tsq_Oid(void)
{
int ret;
bool isnull;
if ((ret = SPI_exec("select oid from pg_type where typname='tsquery'", 1)) < 0)
/* internal error */
elog(ERROR, "SPI_exec to get tsquery oid returns %d", ret);
if (SPI_processed < 0)
/* internal error */
elog(ERROR, "There is no tsvector type");
tsqOid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
if (tsqOid == InvalidOid)
/* internal error */
elog(ERROR, "tsquery type has InvalidOid");
}
PG_FUNCTION_INFO_V1(tsquery_rewrite);
PG_FUNCTION_INFO_V1(rewrite_accum);
Datum rewrite_accum(PG_FUNCTION_ARGS);
Datum
rewrite_accum(PG_FUNCTION_ARGS) {
QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0);
ArrayType *qa = (ArrayType *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
QUERYTYPE *q;
QTNode *qex, *subs = NULL, *acctree;
bool isfind = false;
AggregateContext = ((AggState *) fcinfo->context)->aggcontext;
if (acc == NULL || PG_ARGISNULL(0)) {
acc = (QUERYTYPE*)MEMALLOC( AggMemory, sizeof(QUERYTYPE) );
acc->len = HDRSIZEQT;
acc->size = 0;
}
if ( qa == NULL || PG_ARGISNULL(1) ) {
PG_FREE_IF_COPY( qa, 1 );
PG_RETURN_POINTER( acc );
}
if ( ARR_NDIM(qa) != 1 )
elog(ERROR, "array must be one-dimensional, not %d dimension", ARR_NDIM(qa));
if ( ArrayGetNItems( ARR_NDIM(qa), ARR_DIMS(qa)) != 3 )
elog(ERROR, "array should have only three elements");
if (tsqOid == InvalidOid) {
SPI_connect();
get_tsq_Oid();
SPI_finish();
}
if (ARR_ELEMTYPE(qa) != tsqOid)
elog(ERROR, "array should contain tsquery type");
q = (QUERYTYPE*)ARR_DATA_PTR(qa);
if ( q->size == 0 )
PG_RETURN_POINTER( acc );
if ( !acc->size ) {
if ( acc->len > HDRSIZEQT )
PG_RETURN_POINTER( acc );
else
acctree = QT2QTN( GETQUERY(q), GETOPERAND(q) );
} else
acctree = QT2QTN( GETQUERY(acc), GETOPERAND(acc) );
QTNTernary( acctree );
QTNSort( acctree );
q = (QUERYTYPE*)( ((char*)ARR_DATA_PTR(qa)) + MAXALIGN( q->len ) );
if ( q->size == 0 )
PG_RETURN_POINTER( acc );
qex = QT2QTN( GETQUERY(q), GETOPERAND(q) );
QTNTernary( qex );
QTNSort( qex );
q = (QUERYTYPE*)( ((char*)q) + MAXALIGN( q->len ) );
if ( q->size )
subs = QT2QTN( GETQUERY(q), GETOPERAND(q) );
acctree = findsubquery( acctree, qex, PlainMemory, subs, &isfind );
if ( isfind || !acc->size ) {
/* pfree( acc ); do not pfree(p), because nodeAgg.c will */
if ( acctree ) {
QTNBinary( acctree );
acc = QTN2QT( acctree, AggMemory );
} else {
acc = (QUERYTYPE*)MEMALLOC( AggMemory, HDRSIZEQT*2 );
acc->len = HDRSIZEQT * 2;
acc->size = 0;
}
}
QTNFree( qex );
QTNFree( subs );
QTNFree( acctree );
PG_RETURN_POINTER( acc );
}
PG_FUNCTION_INFO_V1(rewrite_finish);
Datum rewrite_finish(PG_FUNCTION_ARGS);
Datum
rewrite_finish(PG_FUNCTION_ARGS) {
QUERYTYPE *acc = (QUERYTYPE *) PG_GETARG_POINTER(0);
QUERYTYPE *rewrited;
if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0 ) {
acc = (QUERYTYPE*)palloc(sizeof(QUERYTYPE));
acc->len = HDRSIZEQT;
acc->size = 0;
}
rewrited = (QUERYTYPE*) palloc( acc->len );
memcpy( rewrited, acc, acc->len );
pfree( acc );
PG_RETURN_POINTER(rewrited);
}
Datum tsquery_rewrite(PG_FUNCTION_ARGS);
Datum
tsquery_rewrite(PG_FUNCTION_ARGS) {
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
text *in = PG_GETARG_TEXT_P(1);
QUERYTYPE *rewrited = query;
QTNode *tree;
char *buf;
void *plan;
Portal portal;
bool isnull;
int i;
if ( query->size == 0 ) {
PG_FREE_IF_COPY(in, 1);
PG_RETURN_POINTER( rewrited );
}
tree = QT2QTN( GETQUERY(query), GETOPERAND(query) );
QTNTernary( tree );
QTNSort( tree );
buf = (char*)palloc( VARSIZE(in) );
memcpy(buf, VARDATA(in), VARSIZE(in) - VARHDRSZ);
buf[ VARSIZE(in) - VARHDRSZ ] = '\0';
SPI_connect();
if (tsqOid == InvalidOid)
get_tsq_Oid();
if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
elog(ERROR, "SPI_prepare('%s') returns NULL", buf);
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
elog(ERROR, "SPI_cursor_open('%s') returns NULL", buf);
SPI_cursor_fetch(portal, true, 100);
if (SPI_tuptable->tupdesc->natts != 2)
elog(ERROR, "number of fields doesn't equal to 2");
if (SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tsqOid )
elog(ERROR, "column #1 isn't of tsquery type");
if (SPI_gettypeid(SPI_tuptable->tupdesc, 2) != tsqOid )
elog(ERROR, "column #2 isn't of tsquery type");
while (SPI_processed > 0 && tree ) {
for (i = 0; i < SPI_processed && tree; i++) {
Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
Datum sdata;
if ( isnull ) continue;
sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
if (!isnull) {
QUERYTYPE *qtex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(qdata));
QUERYTYPE *qtsubs = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(sdata));
QTNode *qex, *qsubs = NULL;
if (qtex->size == 0) {
if ( qtex != (QUERYTYPE *) DatumGetPointer(qdata) )
pfree( qtex );
if ( qtsubs != (QUERYTYPE *) DatumGetPointer(sdata) )
pfree( qtsubs );
continue;
}
qex = QT2QTN( GETQUERY(qtex), GETOPERAND(qtex) );
QTNTernary( qex );
QTNSort( qex );
if ( qtsubs->size )
qsubs = QT2QTN( GETQUERY(qtsubs), GETOPERAND(qtsubs) );
tree = findsubquery( tree, qex, SPIMemory, qsubs, NULL );
QTNFree( qex );
if ( qtex != (QUERYTYPE *) DatumGetPointer(qdata) )
pfree( qtex );
QTNFree( qsubs );
if ( qtsubs != (QUERYTYPE *) DatumGetPointer(sdata) )
pfree( qtsubs );
}
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_fetch(portal, true, 100);
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_close(portal);
SPI_freeplan(plan);
SPI_finish();
if ( tree ) {
QTNBinary( tree );
rewrited = QTN2QT( tree, PlainMemory );
QTNFree( tree );
PG_FREE_IF_COPY(query, 0);
} else {
rewrited->len = HDRSIZEQT;
rewrited->size = 0;
}
pfree(buf);
PG_FREE_IF_COPY(in, 1);
PG_RETURN_POINTER( rewrited );
}
PG_FUNCTION_INFO_V1(tsquery_rewrite_query);
Datum tsquery_rewrite_query(PG_FUNCTION_ARGS);
Datum
tsquery_rewrite_query(PG_FUNCTION_ARGS) {
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
QUERYTYPE *ex = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
QUERYTYPE *subst = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
QUERYTYPE *rewrited = query;
QTNode *tree, *qex, *subs = NULL;
if ( query->size == 0 || ex->size == 0 ) {
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER( rewrited );
}
tree = QT2QTN( GETQUERY(query), GETOPERAND(query) );
QTNTernary( tree );
QTNSort( tree );
qex = QT2QTN( GETQUERY(ex), GETOPERAND(ex) );
QTNTernary( qex );
QTNSort( qex );
if ( subst->size )
subs = QT2QTN( GETQUERY(subst), GETOPERAND(subst) );
tree = findsubquery( tree, qex, PlainMemory, subs, NULL );
QTNFree( qex );
QTNFree( subs );
if ( !tree ) {
rewrited->len = HDRSIZEQT;
rewrited->size = 0;
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER( rewrited );
} else {
QTNBinary( tree );
rewrited = QTN2QT( tree, PlainMemory );
QTNFree( tree );
}
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER( rewrited );
}

View File

@ -0,0 +1,76 @@
#include "postgres.h"
#include "fmgr.h"
#include "query_util.h"
PG_FUNCTION_INFO_V1(tsquery_numnode);
Datum tsquery_numnode(PG_FUNCTION_ARGS);
Datum
tsquery_numnode(PG_FUNCTION_ARGS) {
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
int nnode = query->size;
PG_FREE_IF_COPY(query,0);
PG_RETURN_INT32(nnode);
}
static int
CompareTSQ( QUERYTYPE *a, QUERYTYPE *b ) {
if ( a->size != b->size ) {
return ( a->size < b->size ) ? -1 : 1;
} else if ( a->len != b->len ) {
return ( a->len < b->len ) ? -1 : 1;
} else {
QTNode *an = QT2QTN( GETQUERY(a), GETOPERAND(a) );
QTNode *bn = QT2QTN( GETQUERY(b), GETOPERAND(b) );
int res = QTNodeCompare(an, bn);
QTNFree(an);
QTNFree(bn);
return res;
}
return 0;
}
PG_FUNCTION_INFO_V1(tsquery_cmp); \
Datum tsquery_cmp(PG_FUNCTION_ARGS);
Datum
tsquery_cmp(PG_FUNCTION_ARGS) {
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0)));
QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1)));
int res = CompareTSQ(a,b);
PG_FREE_IF_COPY(a,0);
PG_FREE_IF_COPY(b,1);
PG_RETURN_INT32(res);
}
#define CMPFUNC( NAME, ACTION ) \
PG_FUNCTION_INFO_V1(NAME); \
Datum NAME(PG_FUNCTION_ARGS); \
\
Datum \
NAME(PG_FUNCTION_ARGS) { \
QUERYTYPE *a = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(0))); \
QUERYTYPE *b = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1))); \
int res = CompareTSQ(a,b); \
\
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1); \
\
PG_RETURN_BOOL( ACTION ); \
}
CMPFUNC( tsquery_lt, res <0 );
CMPFUNC( tsquery_le, res<=0 );
CMPFUNC( tsquery_eq, res==0 );
CMPFUNC( tsquery_ge, res>=0 );
CMPFUNC( tsquery_gt, res >0 );
CMPFUNC( tsquery_ne, res!=0 );

View File

@ -0,0 +1,257 @@
#include "postgres.h"
#include "executor/spi.h"
#include "query_util.h"
QTNode*
QT2QTN( ITEM *in, char *operand ) {
QTNode *node = (QTNode*)palloc0( sizeof(QTNode) );
node->valnode = in;
if (in->type == OPR) {
node->child = (QTNode**)palloc0( sizeof(QTNode*) * 2 );
node->child[0] = QT2QTN( in + 1, operand );
node->sign = node->child[0]->sign;
if (in->val == (int4) '!')
node->nchild = 1;
else {
node->nchild = 2;
node->child[1] = QT2QTN( in + in->left, operand );
node->sign |= node->child[1]->sign;
}
} else if ( operand ) {
node->word = operand + in->distance;
node->sign = 1 << ( in->val % 32 );
}
return node;
}
void
QTNFree( QTNode* in ) {
if ( !in )
return;
if ( in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) !=0 )
pfree( in->word );
if ( in->child ) {
if ( in->valnode ) {
if ( in->valnode->type == OPR && in->nchild > 0 ) {
int i;
for (i=0;i<in->nchild;i++)
QTNFree( in->child[i] );
}
if ( in->flags & QTN_NEEDFREE )
pfree( in->valnode );
}
pfree( in->child );
}
pfree( in );
}
int
QTNodeCompare( QTNode *an, QTNode *bn ) {
if ( an->valnode->type != bn->valnode->type )
return ( an->valnode->type > bn->valnode->type ) ? -1 : 1;
else if ( an->valnode->val != bn->valnode->val )
return ( an->valnode->val > bn->valnode->val ) ? -1 : 1;
else if ( an->valnode->type == VAL ) {
if ( an->valnode->length == bn->valnode->length )
return strncmp( an->word, bn->word, an->valnode->length );
else
return ( an->valnode->length > bn->valnode->length ) ? -1 : 1;
} else if ( an->nchild != bn->nchild ) {
return ( an->nchild > bn->nchild ) ? -1 : 1;
} else {
int i,res;
for( i=0; i<an->nchild; i++ )
if ( (res=QTNodeCompare(an->child[i], bn->child[i]))!=0 )
return res;
}
return 0;
}
static int
cmpQTN( const void *a, const void *b ) {
return QTNodeCompare( *(QTNode**)a, *(QTNode**)b );
}
void
QTNSort( QTNode* in ) {
int i;
if ( in->valnode->type != OPR )
return;
for (i=0;i<in->nchild;i++)
QTNSort( in->child[i] );
if ( in->nchild > 1 )
qsort((void *) in->child, in->nchild, sizeof(QTNode*), cmpQTN);
}
bool
QTNEq( QTNode* a, QTNode* b ) {
uint32 sign = a->sign & b->sign;
if ( !(sign == a->sign && sign == b->sign) )
return 0;
return ( QTNodeCompare(a,b) == 0 ) ? true : false;
}
void
QTNTernary( QTNode* in ) {
int i;
if ( in->valnode->type != OPR )
return;
for (i=0;i<in->nchild;i++)
QTNTernary( in->child[i] );
for (i=0;i<in->nchild;i++) {
if ( in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val ) {
QTNode* cc = in->child[i];
int oldnchild = in->nchild;
in->nchild += cc->nchild-1;
in->child = (QTNode**)repalloc( in->child, in->nchild * sizeof(QTNode*) );
if ( i+1 != oldnchild )
memmove( in->child + i + cc->nchild, in->child + i + 1,
(oldnchild-i-1)*sizeof(QTNode*) );
memcpy( in->child + i, cc->child, cc->nchild * sizeof(QTNode*) );
i += cc->nchild-1;
pfree(cc);
}
}
}
void
QTNBinary( QTNode* in ) {
int i;
if ( in->valnode->type != OPR )
return;
for (i=0;i<in->nchild;i++)
QTNBinary( in->child[i] );
if ( in->nchild <= 2 )
return;
while( in->nchild > 2 ) {
QTNode *nn = (QTNode*)palloc0( sizeof(QTNode) );
nn->valnode = (ITEM*)palloc0( sizeof(ITEM) );
nn->child = (QTNode**)palloc0( sizeof(QTNode*) * 2 );
nn->nchild = 2;
nn->flags = QTN_NEEDFREE;
nn->child[0] = in->child[0];
nn->child[1] = in->child[1];
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val;
in->child[0] = nn;
in->child[1] = in->child[ in->nchild-1 ];
in->nchild--;
}
}
static void
cntsize(QTNode *in, int4 *sumlen, int4 *nnode) {
*nnode += 1;
if ( in->valnode->type == OPR ) {
int i;
for (i=0;i<in->nchild;i++)
cntsize(in->child[i], sumlen, nnode);
} else {
*sumlen += in->valnode->length+1;
}
}
typedef struct {
ITEM *curitem;
char *operand;
char *curoperand;
} QTN2QTState;
static void
fillQT( QTN2QTState *state, QTNode *in ) {
*(state->curitem) = *(in->valnode);
if ( in->valnode->type == VAL ) {
memcpy( state->curoperand, in->word, in->valnode->length );
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[ in->valnode->length ] = '\0';
state->curoperand += in->valnode->length + 1;
state->curitem++;
} else {
ITEM *curitem = state->curitem;
Assert( in->nchild<=2 );
state->curitem++;
fillQT( state, in->child[0] );
if ( in->nchild==2 ) {
curitem->left = state->curitem - curitem;
fillQT( state, in->child[1] );
}
}
}
QUERYTYPE*
QTN2QT( QTNode* in, MemoryType memtype ) {
QUERYTYPE *out;
int len;
int sumlen=0, nnode=0;
QTN2QTState state;
cntsize(in, &sumlen, &nnode);
len = COMPUTESIZE( nnode, sumlen );
out = (QUERYTYPE*)MEMALLOC(memtype, len);
out->len = len;
out->size = nnode;
state.curitem = GETQUERY( out );
state.operand = state.curoperand = GETOPERAND( out );
fillQT( &state, in );
return out;
}
QTNode *
QTNCopy( QTNode* in, MemoryType memtype ) {
QTNode *out = (QTNode*)MEMALLOC( memtype, sizeof(QTNode) );
*out = *in;
out->valnode = (ITEM*)MEMALLOC( memtype, sizeof(ITEM) );
*(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE;
if ( in->valnode->type == VAL ) {
out->word = MEMALLOC( memtype, in->valnode->length + 1 );
memcpy( out->word, in->word, in->valnode->length );
out->word[ in->valnode->length ] = '\0';
out->flags |= QTN_WORDFREE;
} else {
int i;
out->child = (QTNode**)MEMALLOC( memtype, sizeof(QTNode*) * in->nchild );
for(i=0;i<in->nchild;i++)
out->child[i] = QTNCopy( in->child[i], memtype );
}
return out;
}

View File

@ -0,0 +1,44 @@
#ifndef __QUERY_UTIL_H__
#define __QUERY_UTIL_H__
#include "postgres.h"
#include "utils/memutils.h"
#include "query.h"
typedef struct QTNode {
ITEM *valnode;
uint32 flags;
int4 nchild;
char *word;
uint32 sign;
struct QTNode **child;
} QTNode;
#define QTN_NEEDFREE 0x01
#define QTN_NOCHANGE 0x02
#define QTN_WORDFREE 0x04
typedef enum {
PlainMemory,
SPIMemory,
AggMemory
} MemoryType;
QTNode* QT2QTN( ITEM *in, char *operand );
QUERYTYPE* QTN2QT( QTNode* in, MemoryType memtype );
void QTNFree( QTNode* in );
void QTNSort( QTNode* in );
void QTNTernary( QTNode* in );
void QTNBinary( QTNode* in );
int QTNodeCompare( QTNode *an, QTNode *bn );
QTNode* QTNCopy( QTNode* in, MemoryType memtype);
bool QTNEq( QTNode* a, QTNode* b );
extern MemoryContext AggregateContext;
#define MEMALLOC(us, s) ( ((us)==SPIMemory) ? SPI_palloc(s) : ( ( (us)==PlainMemory ) ? palloc(s) : MemoryContextAlloc(AggregateContext, (s)) ) )
#define MEMFREE(us, p) ( ((us)==SPIMemory) ? SPI_pfree(p) : pfree(p) )
#endif

View File

@ -407,7 +407,9 @@ rank_def(PG_FUNCTION_ARGS)
typedef struct
{
ITEM *item;
ITEM **item;
int16 nitem;
bool needfree;
int32 pos;
} DocRepresentation;
@ -419,123 +421,80 @@ compareDocR(const void *a, const void *b)
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
}
typedef struct
{
DocRepresentation *doc;
int len;
} ChkDocR;
static bool
checkcondition_DR(void *checkval, ITEM * val)
{
DocRepresentation *ptr = ((ChkDocR *) checkval)->doc;
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
{
if (val == ptr->item || compareITEM(&val, &(ptr->item)) == 0)
return true;
ptr++;
}
return false;
checkcondition_ITEM(void *checkval, ITEM * val) {
return (bool)(val->istrue);
}
static void
reset_istrue_flag(QUERYTYPE *query) {
ITEM *item = GETQUERY(query);
int i;
/* reset istrue flag */
for(i = 0; i < query->size; i++) {
if ( item->type == VAL )
item->istrue = 0;
item++;
}
}
static bool
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
{
int i;
DocRepresentation *ptr,
*f = (DocRepresentation *) 0xffffffff;
ITEM *item = GETQUERY(query);
DocRepresentation *ptr;
int lastpos = *pos;
int oldq = *q;
int i;
bool found=false;
reset_istrue_flag(query);
*p = 0x7fffffff;
*q = 0;
ptr = doc + *pos;
for (i = 0; i < query->size; i++)
{
if (item->type != VAL)
{
item++;
continue;
/* find upper bound of cover from current position, move up */
while (ptr - doc < len) {
for(i=0;i<ptr->nitem;i++)
ptr->item[i]->istrue = 1;
if ( TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM) ) {
if (ptr->pos > *q) {
*q = ptr->pos;
lastpos = ptr - doc;
found = true;
}
break;
}
ptr = doc + *pos;
while (ptr - doc < len)
{
if (ptr->item == item)
{
if (ptr->pos > *q)
{
*q = ptr->pos;
lastpos = ptr - doc;
}
break;
}
ptr++;
}
item++;
ptr++;
}
if (*q == 0)
if (!found)
return false;
if (*q == oldq)
{ /* already check this pos */
(*pos)++;
return Cover(doc, len, query, pos, p, q);
reset_istrue_flag(query);
ptr = doc + lastpos;
/* find lower bound of cover from founded upper bound, move down */
while (ptr >= doc ) {
for(i=0;i<ptr->nitem;i++)
ptr->item[i]->istrue = 1;
if ( TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM) ) {
if (ptr->pos < *p)
*p = ptr->pos;
break;
}
ptr--;
}
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
if (item->type != VAL)
{
item++;
continue;
}
ptr = doc + lastpos;
while (ptr >= doc + *pos)
{
if (ptr->item == item)
{
if (ptr->pos < *p)
{
*p = ptr->pos;
f = ptr;
}
break;
}
ptr--;
}
item++;
if ( *p <= *q ) {
/* set position for next try to next lexeme after begining of founded cover */
*pos= (ptr-doc) + 1;
return true;
}
if (*p <= *q)
{
ChkDocR ch;
ch.doc = f;
ch.len = (doc + lastpos) - f + 1;
*pos = f - doc + 1;
SortAndUniqOperand = GETOPERAND(query);
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
{
/*
* elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);
*/
return true;
}
else
return Cover(doc, len, query, pos, p, q);
}
return false;
(*pos)++;
return Cover( doc, len, query, pos, p, q );
}
static DocRepresentation *
@ -553,9 +512,12 @@ get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
SortAndUniqOperand = GETOPERAND(query);
reset_istrue_flag(query);
for (i = 0; i < query->size; i++)
{
if (item[i].type != VAL)
if (item[i].type != VAL || item[i].istrue)
continue;
entry = find_wordentry(txt, query, &(item[i]));
@ -581,7 +543,27 @@ get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
for (j = 0; j < dimt; j++)
{
doc[cur].item = &(item[i]);
if ( j == 0 ) {
ITEM *kptr, *iptr = item+i;
int k;
doc[cur].needfree = false;
doc[cur].nitem = 0;
doc[cur].item = (ITEM**)palloc( sizeof(ITEM*) * query->size );
for(k=0; k < query->size; k++) {
kptr = item+k;
if ( k==i || ( item[k].type == VAL && compareITEM( &kptr, &iptr ) == 0 ) ) {
doc[cur].item[ doc[cur].nitem ] = item+k;
doc[cur].nitem++;
kptr->istrue = 1;
}
}
} else {
doc[cur].needfree = false;
doc[cur].nitem = doc[cur-1].nitem;
doc[cur].item = doc[cur-1].item;
}
doc[cur].pos = WEP_GETPOS(post[j]);
cur++;
}
@ -606,16 +588,18 @@ rank_cd(PG_FUNCTION_ARGS)
{
int K = PG_GETARG_INT32(0);
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
int method = DEF_NORM_METHOD;
DocRepresentation *doc;
float res = 0.0;
int p = 0,
q = 0,
len,
cur;
cur,
i,
doclen=0;
doc = get_docrep(txt, query, &len);
doc = get_docrep(txt, query, &doclen);
if (!doc)
{
PG_FREE_IF_COPY(txt, 1);
@ -626,7 +610,7 @@ rank_cd(PG_FUNCTION_ARGS)
cur = 0;
if (K <= 0)
K = 4;
while (Cover(doc, len, query, &cur, &p, &q))
while (Cover(doc, doclen, query, &cur, &p, &q))
res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
if (PG_NARGS() == 4)
@ -649,6 +633,9 @@ rank_cd(PG_FUNCTION_ARGS)
elog(ERROR, "unrecognized normalization method: %d", method);
}
for(i=0;i<doclen;i++)
if ( doc[i].needfree )
pfree( doc[i].item );
pfree(doc);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
@ -693,7 +680,7 @@ Datum
get_covers(PG_FUNCTION_ARGS)
{
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
WordEntry *pptr = ARRPTR(txt);
int i,
dlen = 0,
@ -790,6 +777,9 @@ get_covers(PG_FUNCTION_ARGS)
VARATT_SIZEP(out) = cptr - ((char *) out);
pfree(dw);
for(i=0;i<rlen;i++)
if ( doc[i].needfree )
pfree( doc[i].item );
pfree(doc);
PG_FREE_IF_COPY(txt, 0);

View File

@ -65,6 +65,88 @@ SELECT '1&(2&(4&(5|!6)))'::tsquery;
SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
SELECT '''the wether'':dc & '' sKies '':BC & a:d b:a';
select 'a' < 'b & c'::tsquery;
select 'a' > 'b & c'::tsquery;
select 'a | f' < 'b & c'::tsquery;
select 'a | ff' < 'b & c'::tsquery;
select 'a | f | g' < 'b & c'::tsquery;
select numnode( 'new'::tsquery );
select numnode( 'new & york'::tsquery );
select numnode( 'new & york | qwery'::tsquery );
create table test_tsquery (txtkeyword text, txtsample text);
\set ECHO none
\copy test_tsquery from stdin
'New York' new & york | big & apple | nyc
Moscow moskva | moscow
'Sanct Peter' Peterburg | peter | 'Sanct Peterburg'
'foo bar qq' foo & (bar | qq) & city
\.
\set ECHO all
alter table test_tsquery add column keyword tsquery;
update test_tsquery set keyword = to_tsquery('default', txtkeyword);
alter table test_tsquery add column sample tsquery;
update test_tsquery set sample = to_tsquery('default', txtsample::text);
create unique index bt_tsq on test_tsquery (keyword);
select count(*) from test_tsquery where keyword < 'new & york';
select count(*) from test_tsquery where keyword <= 'new & york';
select count(*) from test_tsquery where keyword = 'new & york';
select count(*) from test_tsquery where keyword >= 'new & york';
select count(*) from test_tsquery where keyword > 'new & york';
set enable_seqscan=off;
select count(*) from test_tsquery where keyword < 'new & york';
select count(*) from test_tsquery where keyword <= 'new & york';
select count(*) from test_tsquery where keyword = 'new & york';
select count(*) from test_tsquery where keyword >= 'new & york';
select count(*) from test_tsquery where keyword > 'new & york';
set enable_seqscan=on;
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::text );
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
select keyword from test_tsquery where keyword @ 'new';
select keyword from test_tsquery where keyword @ 'moscow';
select keyword from test_tsquery where keyword ~ 'new';
select keyword from test_tsquery where keyword ~ 'moscow';
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword;
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
set enable_seqscan='off';
select keyword from test_tsquery where keyword @ 'new';
select keyword from test_tsquery where keyword @ 'moscow';
select keyword from test_tsquery where keyword ~ 'new';
select keyword from test_tsquery where keyword ~ 'moscow';
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where keyword ~ query;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where keyword ~ query;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where keyword ~ query;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow') as query where query @ keyword;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'moscow & hotel') as query where query @ keyword;
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('default', 'bar & new & qq & foo & york') as query where query @ keyword;
set enable_seqscan='on';
select lexize('simple', 'ASD56 hsdkf');
select lexize('en_stem', 'SKIES Problems identity');

View File

@ -813,6 +813,249 @@ CREATE OPERATOR CLASS tsvector_ops
OPERATOR 5 > ,
FUNCTION 1 tsvector_cmp(tsvector, tsvector);
----------------Compare functions and operators for tsquery
CREATE OR REPLACE FUNCTION tsquery_cmp(tsquery,tsquery)
RETURNS int4
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsquery_lt(tsquery,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsquery_le(tsquery,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsquery_eq(tsquery,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsquery_ge(tsquery,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsquery_gt(tsquery,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsquery_ne(tsquery,tsquery)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C' WITH (isstrict,iscachable);
CREATE OPERATOR < (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsquery_lt,
COMMUTATOR = '>',
NEGATOR = '>=',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR <= (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsquery_le,
COMMUTATOR = '>=',
NEGATOR = '>',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR >= (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsquery_ge,
COMMUTATOR = '<=',
NEGATOR = '<',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR > (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsquery_gt,
COMMUTATOR = '<',
NEGATOR = '<=',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR = (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsquery_eq,
COMMUTATOR = '=',
NEGATOR = '<>',
RESTRICT = eqsel,
JOIN = eqjoinsel,
SORT1 = '<',
SORT2 = '<'
);
CREATE OPERATOR <> (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsquery_ne,
COMMUTATOR = '<>',
NEGATOR = '=',
RESTRICT = neqsel,
JOIN = neqjoinsel
);
CREATE OPERATOR CLASS tsquery_ops
DEFAULT FOR TYPE tsquery USING btree AS
OPERATOR 1 < ,
OPERATOR 2 <= ,
OPERATOR 3 = ,
OPERATOR 4 >= ,
OPERATOR 5 > ,
FUNCTION 1 tsquery_cmp(tsquery, tsquery);
CREATE OR REPLACE FUNCTION numnode(tsquery)
returns int4
as 'MODULE_PATHNAME', 'tsquery_numnode'
language 'C'
with (isstrict,iscachable);
--------------rewrite subsystem
CREATE OR REPLACE FUNCTION rewrite(tsquery, text)
returns tsquery
as 'MODULE_PATHNAME', 'tsquery_rewrite'
language 'C'
with (isstrict,iscachable);
CREATE OR REPLACE FUNCTION rewrite(tsquery, tsquery, tsquery)
returns tsquery
as 'MODULE_PATHNAME', 'tsquery_rewrite_query'
language 'C'
with (isstrict,iscachable);
CREATE OR REPLACE FUNCTION rewrite_accum(tsquery,tsquery[])
RETURNS tsquery
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE OR REPLACE FUNCTION rewrite_finish(tsquery)
returns tsquery
as 'MODULE_PATHNAME'
language 'C';
CREATE AGGREGATE rewrite (
BASETYPE=tsquery[],
SFUNC=rewrite_accum,
STYPE=tsquery,
FINALFUNC = rewrite_finish
);
CREATE OR REPLACE FUNCTION tsq_mcontains(tsquery, tsquery)
returns bool
as 'MODULE_PATHNAME'
language 'C'
with (isstrict,iscachable);
CREATE OR REPLACE FUNCTION tsq_mcontained(tsquery, tsquery)
returns bool
as 'MODULE_PATHNAME'
language 'C'
with (isstrict,iscachable);
CREATE OPERATOR @ (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsq_mcontains,
COMMUTATOR = '~',
RESTRICT = contsel,
JOIN = contjoinsel
);
CREATE OPERATOR ~ (
LEFTARG = tsquery,
RIGHTARG = tsquery,
PROCEDURE = tsq_mcontained,
COMMUTATOR = '@',
RESTRICT = contsel,
JOIN = contjoinsel
);
-----------gist support of rewrite------------------
CREATE FUNCTION gtsq_in(cstring)
RETURNS gtsq
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION gtsq_out(gtsq)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE TYPE gtsq (
INTERNALLENGTH = 8,
INPUT = gtsq_in,
OUTPUT = gtsq_out
);
CREATE FUNCTION gtsq_consistent(gtsq,internal,int4)
RETURNS bool
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsq_compress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsq_decompress(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsq_penalty(internal,internal,internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C' with (isstrict);
CREATE FUNCTION gtsq_picksplit(internal, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsq_union(bytea, internal)
RETURNS _int4
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE FUNCTION gtsq_same(gtsq, gtsq, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE 'C';
CREATE OPERATOR CLASS gist_tp_tsquery_ops
DEFAULT FOR TYPE tsquery USING gist
AS
OPERATOR 1 @ (tsquery, tsquery) RECHECK,
OPERATOR 2 ~ (tsquery, tsquery) RECHECK,
FUNCTION 1 gtsq_consistent (gtsq, internal, int4),
FUNCTION 2 gtsq_union (bytea, internal),
FUNCTION 3 gtsq_compress (internal),
FUNCTION 4 gtsq_decompress (internal),
FUNCTION 5 gtsq_penalty (internal, internal, internal),
FUNCTION 6 gtsq_picksplit (internal, internal),
FUNCTION 7 gtsq_same (gtsq, gtsq, internal),
STORAGE gtsq;
--example of ISpell dictionary
--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_name='ispell_template';
--example of synonym dict