diff --git a/contrib/intarray/Makefile b/contrib/intarray/Makefile index b4d125b0f9..3d45077825 100644 --- a/contrib/intarray/Makefile +++ b/contrib/intarray/Makefile @@ -1,5 +1,3 @@ -# $Header: /cvsroot/pgsql/contrib/intarray/Makefile,v 1.3 2001/02/20 19:20:27 petere Exp $ - subdir = contrib/intarray top_builddir = ../.. include $(top_builddir)/src/Makefile.global @@ -12,7 +10,7 @@ NAME= _int SO_MAJOR_VERSION= 1 SO_MINOR_VERSION= 0 -override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) -DPGSQL71 +override CPPFLAGS += -I$(srcdir) -DPGSQL71 OBJS= _int.o @@ -23,31 +21,46 @@ include $(top_srcdir)/src/Makefile.shlib $(NAME).sql: $(NAME).sql.in - sed 's,MODULE_PATHNAME,$(libdir)/$(shlib),g' $< >$@ + sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@ .PHONY: submake submake: $(MAKE) -C $(top_builddir)/src/test/regress pg_regress +# against installed postmaster installcheck: submake $(top_builddir)/src/test/regress/pg_regress _int +# in-tree test doesn't work yet (no way to install my shared library) +#check: all submake +# $(top_builddir)/src/test/regress/pg_regress --temp-install \ +# --top-builddir=$(top_builddir) _int check: - @echo "'$(MAKE) check' is not supported." - @echo "Do '$(MAKE) install', then '$(MAKE) installcheck' instead." + @echo "'make check' is not supported." + @echo "Do 'make install', then 'make installcheck' instead." install: all installdirs install-lib - $(INSTALL_DATA) $(srcdir)/README.intarray $(docdir)/contrib + #$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib $(INSTALL_DATA) $(NAME).sql $(datadir)/contrib installdirs: $(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir) uninstall: uninstall-lib - rm -f $(docdir)/contrib/README.intarray $(datadir)/contrib/$(NAME).sql + rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql clean distclean maintainer-clean: clean-lib - rm -f $(OBJS) $(NAME).sql + rm -f *.so y.tab.c y.tab.h $(OBJS) $(NAME).sql # things created by various check targets - rm -rf results - rm -f regression.diffs regression.out + rm -rf results tmp_check log + rm -f regression.diffs regression.out regress.out run_check.out +ifeq ($(PORTNAME), win) + rm -f regress.def +endif + +depend dep: + $(CC) -MM $(CFLAGS) *.c >depend + +ifeq (depend,$(wildcard depend)) +include depend +endif diff --git a/contrib/intarray/README.intarray b/contrib/intarray/README.intarray index 2829a74855..dc7cc9a88d 100644 --- a/contrib/intarray/README.intarray +++ b/contrib/intarray/README.intarray @@ -1,7 +1,11 @@ This is an implementation of RD-tree data structure using GiST interface of PostgreSQL. It has built-in lossy compression - must be declared -in index creation - with (islossy). Current implementation has index support -for one-dimensional array of int4's. +in index creation - with (islossy). Current implementation provides index +support for one-dimensional array of int4's - gist__int_ops, suitable for +small and medium size of arrays (used on default), and gist__intbig_ops for +indexing large arrays (we use superimposed signature with length of 4096 +bits to represent sets). + All work was done by Teodor Sigaev (teodor@stack.net) and Oleg Bartunov (oleg@sai.msu.su). See http://www.sai.msu.su/~megera/postgres/gist for additional information. @@ -25,7 +29,7 @@ EXAMPLE USAGE: -- create indices CREATE unique index message_key on message ( mid ); CREATE unique index message_section_map_key2 on message_section_map (sid, mid ); -CREATE INDEX message_rdtree_idx on message using gist ( sections ) with ( islossy ); +CREATE INDEX message_rdtree_idx on message using gist ( sections gist__int_ops) with ( islossy ); -- select some messages with section in 1 OR 2 - OVERLAP operator select message.mid from message where message.sections && '{1,2}'; diff --git a/contrib/intarray/_int.c b/contrib/intarray/_int.c index a34b36942b..1e28cec6e2 100644 --- a/contrib/intarray/_int.c +++ b/contrib/intarray/_int.c @@ -4,11 +4,11 @@ format for these routines is dictated by Postgres architecture. ******************************************************************************/ -#include "postgres.h" - +#include #include #include +#include "postgres.h" #include "access/gist.h" #include "access/itup.h" #include "access/rtree.h" @@ -32,6 +32,7 @@ #endif #define NDIM 1 + #define ARRISNULL(x) ( (x) ? ( ( ARR_NDIM(x) == NDIM ) ? ( ( ARRSIZE( x ) ) ? 0 : 1 ) : 1 ) : 1 ) #define SORT(x) if ( ARRSIZE( x ) > 1 ) isort( (void*)ARRPTR( x ), ARRSIZE( x ) ); #define PREPAREARR(x) \ @@ -39,6 +40,40 @@ if ( isort( (void*)ARRPTR( x ), ARRSIZE( x ) ) )\ x = _int_unique( x );\ } + +/* bigint defines */ +#define BITBYTE 8 +#define SIGLENINT 128 +#define SIGLEN ( sizeof(int)*SIGLENINT ) +#define SIGLENBIT (SIGLEN*BITBYTE) + +typedef char BITVEC[SIGLEN]; +typedef char* BITVECP; +#define SIGPTR(x) ( (BITVECP) ARR_DATA_PTR(x) ) + +#define NULLIFY(a) MemSet( a, 0, sizeof( BITVEC ) ) +#define NEWSIG(a) \ + a=(BITVECP) malloc( sizeof( BITVEC );\ + NULLIFY(a); + +#define LOOPBYTE(a) \ + for(i=0;i> ( i % BITBYTE ) & 0x01 ) + +#define union_sig(a,b,r) LOOPBYTE(r[i] = a[i] | b[i]) +#define inter_sig(a,b,r) LOOPBYTE(r[i] = a[i] & b[i]) + /* #define GIST_DEBUG #define GIST_QUERY_DEBUG @@ -58,17 +93,46 @@ static void printarr ( ArrayType * a, int num ) { } elog(NOTICE, "\t\t%s", bbb); } +static void printbitvec( BITVEC bv ) { + int i; + char str[ SIGLENBIT+1 ]; + str[ SIGLENBIT ] ='\0'; + LOOPBIT( str[i] = ( getbit(bv,i) ) ? '1' : '0' ); + + elog(NOTICE,"BV: %s", str); +} #endif +/* +** types for functions +*/ +typedef ArrayType * (*formarray) (ArrayType*, ArrayType*); +typedef void (*formfloat) (ArrayType*, float*); + /* ** usefull function */ -bool isort( int *a, const int len ); -ArrayType * new_intArrayType( int num ); -ArrayType * copy_intArrayType( ArrayType * a ); -ArrayType * resize_intArrayType( ArrayType * a, int num ); -int internal_size( int *a, int len ); -ArrayType * _int_unique( ArrayType * a ); +static bool isort( int *a, const int len ); +static ArrayType * new_intArrayType( int num ); +static ArrayType * copy_intArrayType( ArrayType * a ); +static ArrayType * resize_intArrayType( ArrayType * a, int num ); +static int internal_size( int *a, int len ); +static ArrayType * _int_unique( ArrayType * a ); + +/* common gist function*/ +static GIST_SPLITVEC * _int_common_picksplit(bytea *entryvec, + GIST_SPLITVEC *v, + formarray unionf, + formarray interf, + formfloat sizef); +static float * _int_common_penalty(GISTENTRY *origentry, + GISTENTRY *newentry, + float *result, + formarray unionf, + formfloat sizef); +static ArrayType * _int_common_union(bytea *entryvec, + int *sizep, + formarray unionf); /* ** GiST support methods @@ -78,7 +142,6 @@ GISTENTRY * g_int_compress(GISTENTRY *entry); GISTENTRY * g_int_decompress(GISTENTRY *entry); float * g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result); GIST_SPLITVEC * g_int_picksplit(bytea *entryvec, GIST_SPLITVEC *v); -bool g_int_internal_consistent(ArrayType *key, ArrayType *query, StrategyNumber strategy); ArrayType * g_int_union(bytea *entryvec, int *sizep); bool * g_int_same(ArrayType *b1, ArrayType *b2, bool *result); @@ -100,6 +163,23 @@ ArrayType * _int_union(ArrayType *a, ArrayType *b); ArrayType * _int_inter(ArrayType *a, ArrayType *b); void rt__int_size(ArrayType *a, float* sz); +/* +** _intbig methods +*/ +bool g_intbig_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy); +GISTENTRY * g_intbig_compress(GISTENTRY *entry); +GISTENTRY * g_intbig_decompress(GISTENTRY *entry); +float * g_intbig_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result); +GIST_SPLITVEC * g_intbig_picksplit(bytea *entryvec, GIST_SPLITVEC *v); +ArrayType * g_intbig_union(bytea *entryvec, int *sizep); +bool * g_intbig_same(ArrayType *a, ArrayType *b, bool *result); + +static bool _intbig_contains(ArrayType *a, ArrayType *b); +static bool _intbig_overlap(ArrayType *a, ArrayType *b); +static ArrayType * _intbig_union(ArrayType *a, ArrayType *b); +/*static ArrayType * _intbig_inter(ArrayType *a, ArrayType *b);*/ +static void rt__intbig_size(ArrayType *a, float* sz); +static void gensign(BITVEC sign, int * a, int len); /***************************************************************************** * GiST functions @@ -116,51 +196,33 @@ g_int_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy) { + bool retval; /* sort query for fast search, key is already sorted */ if ( ARRISNULL( query ) ) return FALSE; PREPAREARR( query ); - /* - ** if entry is not leaf, use g_int_internal_consistent, - ** else use g_int_leaf_consistent - */ - return(g_int_internal_consistent((ArrayType *)(entry->pred), query, strategy)); + + switch(strategy) { + case RTOverlapStrategyNumber: + retval = (bool)inner_int_overlap((ArrayType *)(entry->pred), query); + break; + case RTSameStrategyNumber: + case RTContainsStrategyNumber: + retval = (bool)inner_int_contains((ArrayType *)(entry->pred), query); + break; + case RTContainedByStrategyNumber: + retval = (bool)inner_int_overlap((ArrayType *)(entry->pred), query); + break; + default: + retval = FALSE; + } + return(retval); } -/* -** The GiST Union method for _intments -** returns the minimal set that encloses all the entries in entryvec -*/ ArrayType * g_int_union(bytea *entryvec, int *sizep) { - int numranges, i; - ArrayType *out = (ArrayType *)NULL; - ArrayType *tmp; - - numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY); - tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred; - -#ifdef GIST_DEBUG - elog(NOTICE, "union %d", numranges); -#endif - - for (i = 1; i < numranges; i++) { - out = inner_int_union(tmp, (ArrayType *) - (((GISTENTRY *)(VARDATA(entryvec)))[i]).pred); - if (i > 1 && tmp) pfree(tmp); - tmp = out; - } - - *sizep = VARSIZE( out ); -#ifdef GIST_DEBUG - elog(NOTICE, "\t ENDunion %d %d", *sizep, ARRSIZE( out ) ); -#endif - if ( *sizep == 0 ) { - pfree( out ); - return NULL; - } - return(out); + return _int_common_union( entryvec, sizep, inner_int_union ); } /* @@ -194,7 +256,6 @@ g_int_compress(GISTENTRY *entry) #ifdef GIST_DEBUG elog(NOTICE, "COMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, len); - /* printarr( r, len ); */ #endif if ( len >= 2*MAXNUMRANGE ) { /*compress*/ @@ -260,7 +321,6 @@ g_int_decompress(GISTENTRY *entry) #ifdef GIST_DEBUG elog(NOTICE, "DECOMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, lenin); - /* printarr( in, lenin ); */ #endif lenr = internal_size(din, lenin); @@ -287,180 +347,19 @@ g_int_decompress(GISTENTRY *entry) float * g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result) { - Datum ud; - float tmp1, tmp2; - -#ifdef GIST_DEBUG - elog(NOTICE, "penalty"); -#endif - ud = (Datum)inner_int_union((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred)); - rt__int_size((ArrayType *)ud, &tmp1); - rt__int_size((ArrayType *)(origentry->pred), &tmp2); - *result = tmp1 - tmp2; - pfree((char *)ud); - -#ifdef GIST_DEBUG - elog(NOTICE, "--penalty\t%g", *result); -#endif - - return(result); + return _int_common_penalty( origentry, newentry, result, inner_int_union, rt__int_size); } - -/* -** The GiST PickSplit method for _intments -** We use Guttman's poly time split algorithm -*/ GIST_SPLITVEC * g_int_picksplit(bytea *entryvec, GIST_SPLITVEC *v) { - OffsetNumber i, j; - ArrayType *datum_alpha, *datum_beta; - ArrayType *datum_l, *datum_r; - ArrayType *union_d, *union_dl, *union_dr; - ArrayType *inter_d; - bool firsttime; - float size_alpha, size_beta, size_union, size_inter; - float size_waste, waste; - float size_l, size_r; - int nbytes; - OffsetNumber seed_1 = 0, seed_2 = 0; - OffsetNumber *left, *right; - OffsetNumber maxoff; -#ifdef GIST_DEBUG - elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)); -#endif - - maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2; - nbytes = (maxoff + 2) * sizeof(OffsetNumber); - v->spl_left = (OffsetNumber *) palloc(nbytes); - v->spl_right = (OffsetNumber *) palloc(nbytes); - - firsttime = true; - waste = 0.0; - - for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { - datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); - for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { - datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred); - - /* compute the wasted space by unioning these guys */ - /* size_waste = size_union - size_inter; */ - union_d = (ArrayType *)inner_int_union(datum_alpha, datum_beta); - rt__int_size(union_d, &size_union); - inter_d = (ArrayType *)inner_int_inter(datum_alpha, datum_beta); - rt__int_size(inter_d, &size_inter); - size_waste = size_union - size_inter; - - pfree(union_d); - - if (inter_d != (ArrayType *) NULL) - pfree(inter_d); - - /* - * are these a more promising split that what we've - * already seen? - */ - - if (size_waste > waste || firsttime) { - waste = size_waste; - seed_1 = i; - seed_2 = j; - firsttime = false; - } - } - } - - left = v->spl_left; - v->spl_nleft = 0; - right = v->spl_right; - v->spl_nright = 0; - - datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred); - datum_l = copy_intArrayType( datum_alpha ); - rt__int_size((ArrayType *)datum_l, &size_l); - datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred); - datum_r = copy_intArrayType( datum_beta ); - rt__int_size((ArrayType *)datum_r, &size_r); - - /* - * Now split up the regions between the two seeds. An important - * property of this split algorithm is that the split vector v - * has the indices of items to be split in order in its left and - * right vectors. We exploit this property by doing a merge in - * the code that actually splits the page. - * - * For efficiency, we also place the new index tuple in this loop. - * This is handled at the very end, when we have placed all the - * existing tuples and i == maxoff + 1. - */ - - maxoff = OffsetNumberNext(maxoff); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - - - /* - * If we've already decided where to place this item, just - * put it on the right list. Otherwise, we need to figure - * out which page needs the least enlargement in order to - * store the item. - */ - - if (i == seed_1) { - *left++ = i; - v->spl_nleft++; - continue; - } else if (i == seed_2) { - *right++ = i; - v->spl_nright++; - continue; - } - - /* okay, which page needs least enlargement? */ - datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); - union_dl = (ArrayType *)inner_int_union(datum_l, datum_alpha); - union_dr = (ArrayType *)inner_int_union(datum_r, datum_alpha); - rt__int_size((ArrayType *)union_dl, &size_alpha); - rt__int_size((ArrayType *)union_dr, &size_beta); - - /* pick which page to add it to */ - if (size_alpha - size_l < size_beta - size_r) { - if ( datum_l ) pfree(datum_l); - if ( union_dr ) pfree(union_dr); - datum_l = union_dl; - size_l = size_alpha; - *left++ = i; - v->spl_nleft++; - } else { - if ( datum_r ) pfree(datum_r); - if ( union_dl ) pfree(union_dl); - datum_r = union_dr; - size_r = size_beta; - *right++ = i; - v->spl_nright++; - } - } - /**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */ - - if ( *(left-1) > *(right-1) ) { - *right = FirstOffsetNumber; - *(left-1) = InvalidOffsetNumber; - } else { - *left = FirstOffsetNumber; - *(right-1) = InvalidOffsetNumber; - } - - - v->spl_ldatum = (char *)datum_l; - v->spl_rdatum = (char *)datum_r; - -#ifdef GIST_DEBUG - elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright); -#endif - return v; + return _int_common_picksplit( entryvec, v, + inner_int_union, + inner_int_inter, + rt__int_size); } /* @@ -478,34 +377,6 @@ g_int_same(ArrayType *b1, ArrayType *b2, bool *result) return(result); } -bool -g_int_internal_consistent(ArrayType *key, - ArrayType *query, - StrategyNumber strategy) -{ - bool retval; - -#ifdef GIST_QUERY_DEBUG - elog(NOTICE, "internal_consistent, %d", strategy); -#endif - - switch(strategy) { - case RTOverlapStrategyNumber: - retval = (bool)inner_int_overlap(key, query); - break; - case RTSameStrategyNumber: - case RTContainsStrategyNumber: - retval = (bool)inner_int_contains(key, query); - break; - case RTContainedByStrategyNumber: - retval = (bool)inner_int_overlap(key, query); - break; - default: - retval = FALSE; - } - return(retval); -} - bool _int_contained(ArrayType *a, ArrayType *b) { @@ -653,7 +524,7 @@ inner_int_union ( ArrayType *a, ArrayType *b ) { int i,j; #ifdef GIST_DEBUG - /* elog(NOTICE, "inner_union %d %d", ARRISNULL( a ) , ARRISNULL( b ) ); */ + elog(NOTICE, "inner_union %d %d", ARRISNULL( a ) , ARRISNULL( b ) ); #endif if ( ARRISNULL( a ) && ARRISNULL( b ) ) return new_intArrayType(0); @@ -709,7 +580,7 @@ inner_int_inter ( ArrayType *a, ArrayType *b ) { int i,j; #ifdef GIST_DEBUG - /* elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) ); */ + elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) ); #endif if ( ARRISNULL( a ) || ARRISNULL( b ) ) return NULL; @@ -756,7 +627,8 @@ rt__int_size(ArrayType *a, float *size) *****************************************************************************/ /* len >= 2 */ -bool isort ( int *a, int len ) { +static bool +isort ( int *a, int len ) { int tmp, index; int *cur, *end; bool r = FALSE; @@ -776,7 +648,8 @@ bool isort ( int *a, int len ) { return r; } -ArrayType * new_intArrayType( int num ) { +static ArrayType * +new_intArrayType( int num ) { ArrayType * r; int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num; @@ -795,7 +668,8 @@ ArrayType * new_intArrayType( int num ) { return r; } -ArrayType * resize_intArrayType( ArrayType * a, int num ) { +static ArrayType * +resize_intArrayType( ArrayType * a, int num ) { int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num; if ( num == ARRSIZE(a) ) return a; @@ -809,7 +683,8 @@ ArrayType * resize_intArrayType( ArrayType * a, int num ) { return a; } -ArrayType * copy_intArrayType( ArrayType * a ) { +static ArrayType * +copy_intArrayType( ArrayType * a ) { ArrayType * r; if ( ! a ) return NULL; r = new_intArrayType( ARRSIZE(a) ); @@ -818,7 +693,8 @@ ArrayType * copy_intArrayType( ArrayType * a ) { } /* num for compressed key */ -int internal_size (int *a, int len ) { +static int +internal_size (int *a, int len ) { int i,size=0; for(i=0;i 1 */ -ArrayType * _int_unique( ArrayType * r ) { +static ArrayType * +_int_unique( ArrayType * r ) { int *tmp, *dr, *data; int num = ARRSIZE(r); data = tmp = dr = ARRPTR( r ); @@ -839,4 +716,510 @@ ArrayType * _int_unique( ArrayType * r ) { else tmp++; return resize_intArrayType(r, dr + 1 - ARRPTR(r) ); -} +} + +/********************************************************************* +** intbig functions +*********************************************************************/ + +static void +gensign(BITVEC sign, int * a, int len) { + int i; + NULLIFY(sign); + for(i=0; ipred ) + in = (ArrayType *)PG_DETOAST_DATUM( entry->pred ); + else + in = NULL; +#else + in = (ArrayType *) entry->pred; +#endif + + if ( ! entry->leafkey ) return entry; + + retval = palloc(sizeof(GISTENTRY)); + if ( ! retval ) + elog(ERROR,"Can't allocate memory for compression"); + + if ( ARRISNULL( in ) ) { +#ifdef PGSQL71 + if ( in ) if ( (char*)in != (char*)entry->pred ) pfree(in); +#endif + gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset,0, FALSE); + return( retval ); + } + + r = new_intArrayType( SIGLENINT ); + gensign( SIGPTR( r ), + ARRPTR ( in ), + ARRSIZE( in ) ); + + gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE); + +#ifdef PGSQL71 + if ( in ) if ( (char*)in != (char*)entry->pred ) pfree(in); +#endif + + return(retval); +} + +GISTENTRY * +g_intbig_decompress(GISTENTRY *entry) { + return entry; +} + +GIST_SPLITVEC * +g_intbig_picksplit(bytea *entryvec, GIST_SPLITVEC *v) { + + OffsetNumber k; + ArrayType *datum_l, *datum_r, *datum_alpha; + ArrayType *unionarr; + float size_l, size_r; + int nbytes; + OffsetNumber *left, *right; + OffsetNumber maxoff; + +#ifdef GIST_DEBUG + elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)); +#endif + + maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2; + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + maxoff = OffsetNumberNext(maxoff); + datum_l = datum_r = NULL; + + for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k)) { + datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[k].pred); + + if ( k != FirstOffsetNumber ) { + unionarr = (ArrayType *)_intbig_union(datum_l, datum_alpha); + if ( datum_l ) pfree(datum_l); + datum_l = unionarr; + rt__intbig_size((ArrayType *)unionarr, &size_l); + *left++ = k; + v->spl_nleft++; + } else { + unionarr = (ArrayType *)_intbig_union(datum_r, datum_alpha); + if ( datum_r ) pfree(datum_r); + datum_r = unionarr; + rt__intbig_size((ArrayType *)unionarr, &size_r); + *right++ = k; + v->spl_nright++; + } + } + + if ( *(left-1) > *(right-1) ) { + *right = FirstOffsetNumber; + *(left-1) = InvalidOffsetNumber; + } else { + *left = FirstOffsetNumber; + *(right-1) = InvalidOffsetNumber; + } + + + v->spl_ldatum = (char *)datum_l; + v->spl_rdatum = (char *)datum_r; + +#ifdef GIST_DEBUG + elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright); +#endif + return v; +} + +ArrayType * +g_intbig_union(bytea *entryvec, int *sizep) { + return _int_common_union( entryvec, sizep, _intbig_union ); +} + +float * +g_intbig_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result){ + _int_common_penalty( origentry, newentry, result, _intbig_union, rt__intbig_size); + *result= SIGLENBIT - *result; + return result; +} + +bool +g_intbig_consistent(GISTENTRY *entry, ArrayType *query, StrategyNumber strategy) { + bool retval; + ArrayType * q = new_intArrayType( SIGLENINT ); + + if ( ARRISNULL( query ) ) return FALSE; + + gensign( SIGPTR( q ), + ARRPTR( query ), + ARRSIZE( query ) ); + + switch(strategy) { + case RTOverlapStrategyNumber: + retval = (bool)_intbig_overlap((ArrayType *)(entry->pred), q); + break; + case RTSameStrategyNumber: + case RTContainsStrategyNumber: + retval = (bool)_intbig_contains((ArrayType *)(entry->pred), q); + break; + case RTContainedByStrategyNumber: + retval = (bool)_intbig_overlap((ArrayType *)(entry->pred), q); + break; + default: + retval = FALSE; + } + pfree( q ); + return(retval); +} + +/***************************************************************** +** Common GiST Method +*****************************************************************/ + +/* +** The GiST Union method for _intments +** returns the minimal set that encloses all the entries in entryvec +*/ +ArrayType * +_int_common_union(bytea *entryvec, int *sizep, formarray unionf) { + int numranges, i; + ArrayType *out = (ArrayType *)NULL; + ArrayType *tmp; +#ifdef GIST_DEBUG + elog(NOTICE, "_int_common_union in"); +#endif + + numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY); + tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred; + + for (i = 1; i < numranges; i++) { + out = (*unionf)(tmp, (ArrayType *) + (((GISTENTRY *)(VARDATA(entryvec)))[i]).pred); + if (i > 1 && tmp) pfree(tmp); + tmp = out; + } + + *sizep = VARSIZE( out ); + if ( *sizep == 0 ) { + pfree( out ); +#ifdef GIST_DEBUG + elog(NOTICE, "_int_common_union out1"); +#endif + return NULL; + } +#ifdef GIST_DEBUG + elog(NOTICE, "_int_common_union out"); +#endif + return(out); + +} + +/* +** The GiST Penalty method for _intments +*/ +float * +_int_common_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result, + formarray unionf, + formfloat sizef) +{ + Datum ud; + float tmp1, tmp2; + +#ifdef GIST_DEBUG + elog(NOTICE, "penalty"); +#endif + ud = (Datum)(*unionf)((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred)); + (*sizef)((ArrayType *)ud, &tmp1); + (*sizef)((ArrayType *)(origentry->pred), &tmp2); + *result = tmp1 - tmp2; + pfree((char *)ud); + +#ifdef GIST_DEBUG + elog(NOTICE, "--penalty\t%g", *result); +#endif + + return(result); +} + +/* +** The GiST PickSplit method for _intments +** We use Guttman's poly time split algorithm +*/ +GIST_SPLITVEC * +_int_common_picksplit(bytea *entryvec, + GIST_SPLITVEC *v, + formarray unionf, + formarray interf, + formfloat sizef) +{ + OffsetNumber i, j; + ArrayType *datum_alpha, *datum_beta; + ArrayType *datum_l, *datum_r; + ArrayType *union_d, *union_dl, *union_dr; + ArrayType *inter_d; + bool firsttime; + float size_alpha, size_beta, size_union, size_inter; + float size_waste, waste; + float size_l, size_r; + int nbytes; + OffsetNumber seed_1 = 0, seed_2 = 0; + OffsetNumber *left, *right; + OffsetNumber maxoff; + +#ifdef GIST_DEBUG + elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)); +#endif + + maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2; + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + firsttime = true; + waste = 0.0; + + for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { + datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); + for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { + datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred); + + /* compute the wasted space by unioning these guys */ + /* size_waste = size_union - size_inter; */ + union_d = (*unionf)(datum_alpha, datum_beta); + (*sizef)(union_d, &size_union); + inter_d = (*interf)(datum_alpha, datum_beta); + (*sizef)(inter_d, &size_inter); + size_waste = size_union - size_inter; + + pfree(union_d); + + if (inter_d != (ArrayType *) NULL) + pfree(inter_d); + + /* + * are these a more promising split that what we've + * already seen? + */ + + if (size_waste > waste || firsttime) { + waste = size_waste; + seed_1 = i; + seed_2 = j; + firsttime = false; + } + } + } + + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred); + datum_l = copy_intArrayType( datum_alpha ); + (*sizef)((ArrayType *)datum_l, &size_l); + datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred); + datum_r = copy_intArrayType( datum_beta ); + (*sizef)((ArrayType *)datum_r, &size_r); + + /* + * Now split up the regions between the two seeds. An important + * property of this split algorithm is that the split vector v + * has the indices of items to be split in order in its left and + * right vectors. We exploit this property by doing a merge in + * the code that actually splits the page. + * + * For efficiency, we also place the new index tuple in this loop. + * This is handled at the very end, when we have placed all the + * existing tuples and i == maxoff + 1. + */ + + maxoff = OffsetNumberNext(maxoff); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + + + /* + * If we've already decided where to place this item, just + * put it on the right list. Otherwise, we need to figure + * out which page needs the least enlargement in order to + * store the item. + */ + + if (i == seed_1) { + *left++ = i; + v->spl_nleft++; + continue; + } else if (i == seed_2) { + *right++ = i; + v->spl_nright++; + continue; + } + + /* okay, which page needs least enlargement? */ + datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); + union_dl = (ArrayType *)(*unionf)(datum_l, datum_alpha); + union_dr = (ArrayType *)(*unionf)(datum_r, datum_alpha); + (*sizef)((ArrayType *)union_dl, &size_alpha); + (*sizef)((ArrayType *)union_dr, &size_beta); + + /* pick which page to add it to */ + if (size_alpha - size_l < size_beta - size_r) { + if ( datum_l ) pfree(datum_l); + if ( union_dr ) pfree(union_dr); + datum_l = union_dl; + size_l = size_alpha; + *left++ = i; + v->spl_nleft++; + } else { + if ( datum_r ) pfree(datum_r); + if ( union_dl ) pfree(union_dl); + datum_r = union_dr; + size_r = size_beta; + *right++ = i; + v->spl_nright++; + } + } + /**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */ + + if ( *(left-1) > *(right-1) ) { + *right = FirstOffsetNumber; + *(left-1) = InvalidOffsetNumber; + } else { + *left = FirstOffsetNumber; + *(right-1) = InvalidOffsetNumber; + } + + + v->spl_ldatum = (char *)datum_l; + v->spl_rdatum = (char *)datum_r; + +#ifdef GIST_DEBUG + elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright); +#endif + return v; +} diff --git a/contrib/intarray/_int.sql.in b/contrib/intarray/_int.sql.in index ffd384afb2..2214bef21f 100644 --- a/contrib/intarray/_int.sql.in +++ b/contrib/intarray/_int.sql.in @@ -1,4 +1,4 @@ --- Create the user-defined type for the 1-D frloating point indervals (_int4) +-- Create the user-defined type for the 1-D integer arrays (_int4) -- BEGIN TRANSACTION; @@ -208,4 +208,116 @@ INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) WHERE amname = 'gist' and opcname = 'gist__int_ops' and proname = 'g_int_same'; + +--------------------------------------------- +-- intbig +--------------------------------------------- +-- define the GiST support methods +CREATE FUNCTION g_intbig_consistent(opaque,_int4,int4) RETURNS bool + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +CREATE FUNCTION g_intbig_compress(opaque) RETURNS opaque + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +CREATE FUNCTION g_intbig_decompress(opaque) RETURNS opaque + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +CREATE FUNCTION g_intbig_penalty(opaque,opaque,opaque) RETURNS opaque + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +CREATE FUNCTION g_intbig_picksplit(opaque, opaque) RETURNS opaque + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +CREATE FUNCTION g_intbig_union(bytea, opaque) RETURNS _int4 + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +CREATE FUNCTION g_intbig_same(_int4, _int4, opaque) RETURNS opaque + AS 'MODULE_PATHNAME' LANGUAGE 'c'; + +-- register the default opclass for indexing +INSERT INTO pg_opclass (opcname, opcdeftype) + values ( 'gist__intbig_ops', 0 ); + + +-- get the comparators for _intments and store them in a tmp table +SELECT o.oid AS opoid, o.oprname +INTO TABLE _int_ops_tmp +FROM pg_operator o, pg_type t +WHERE o.oprleft = t.oid and o.oprright = t.oid + and t.typname = '_int4'; + +-- make sure we have the right operators +-- SELECT * from _int_ops_tmp; + +-- using the tmp table, generate the amop entries + +-- _int_overlap +INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) + SELECT am.oid, opcl.oid, c.opoid, 3 + FROM pg_am am, pg_opclass opcl, _int_ops_tmp c + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and c.oprname = '&&'; + +-- _int_contains +INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) + SELECT am.oid, opcl.oid, c.opoid, 7 + FROM pg_am am, pg_opclass opcl, _int_ops_tmp c + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and c.oprname = '@'; + +-- _int_contained +INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy) + SELECT am.oid, opcl.oid, c.opoid, 8 + FROM pg_am am, pg_opclass opcl, _int_ops_tmp c + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and c.oprname = '~'; + +DROP TABLE _int_ops_tmp; + + +-- add the entries to amproc for the support methods +-- note the amprocnum numbers associated with each are specific! + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 1 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_consistent'; + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 2 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_union'; + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 3 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_compress'; + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 4 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_decompress'; + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 5 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_penalty'; + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 6 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_picksplit'; + +INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum) + SELECT am.oid, opcl.oid, pro.oid, 7 + FROM pg_am am, pg_opclass opcl, pg_proc pro + WHERE amname = 'gist' and opcname = 'gist__intbig_ops' + and proname = 'g_intbig_same'; + END TRANSACTION; diff --git a/contrib/intarray/bench/create_test.pl b/contrib/intarray/bench/create_test.pl index 6434fd037d..ada064a270 100755 --- a/contrib/intarray/bench/create_test.pl +++ b/contrib/intarray/bench/create_test.pl @@ -49,7 +49,7 @@ print <