/****************************************************************************** This file contains routines that can be bound to a Postgres backend and called by the backend in the process of processing queries. The calling format for these routines is dictated by Postgres architecture. ******************************************************************************/ #include "postgres.h" #include #include #include "access/gist.h" #include "access/itup.h" #include "access/rtree.h" #include "utils/elog.h" #include "utils/palloc.h" #include "utils/array.h" #include "utils/builtins.h" #include "storage/bufpage.h" #define MAXNUMRANGE 100 #define max(a,b) ((a) > (b) ? (a) : (b)) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define abs(a) ((a) < (0) ? (-a) : (a)) #define ARRPTR(x) ( (int4 *) ARR_DATA_PTR(x) ) #ifdef PGSQL71 #define ARRSIZE(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x)) #else #define ARRSIZE(x) getNitems( ARR_NDIM(x), ARR_DIMS(x)) #endif #define NDIM 1 #define ARRISNULL(x) ( (x) ? ( ( ARR_NDIM(x) == NDIM ) ? ( ( ARRSIZE( x ) ) ? 0 : 1 ) : 1 ) : 1 ) #define SORT(x) if ( ARRSIZE( x ) > 1 ) isort( (void*)ARRPTR( x ), ARRSIZE( x ) ); #define PREPAREARR(x) \ if ( ARRSIZE( x ) > 1 ) {\ if ( isort( (void*)ARRPTR( x ), ARRSIZE( x ) ) )\ x = _int_unique( x );\ } /* #define GIST_DEBUG #define GIST_QUERY_DEBUG */ #ifdef GIST_DEBUG static void printarr ( ArrayType * a, int num ) { char bbb[16384]; char *cur; int l; int *d; d = ARRPTR( a ); *bbb = '\0'; cur = bbb; for(l=0; lpred), query, strategy)); } /* ** The GiST Union method for _intments ** returns the minimal set that encloses all the entries in entryvec */ ArrayType * g_int_union(bytea *entryvec, int *sizep) { int numranges, i; ArrayType *out = (ArrayType *)NULL; ArrayType *tmp; numranges = (VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY); tmp = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[0]).pred; #ifdef GIST_DEBUG elog(NOTICE, "union %d", numranges); #endif for (i = 1; i < numranges; i++) { out = inner_int_union(tmp, (ArrayType *) (((GISTENTRY *)(VARDATA(entryvec)))[i]).pred); if (i > 1 && tmp) pfree(tmp); tmp = out; } *sizep = VARSIZE( out ); #ifdef GIST_DEBUG elog(NOTICE, "\t ENDunion %d %d", *sizep, ARRSIZE( out ) ); #endif if ( *sizep == 0 ) { pfree( out ); return NULL; } return(out); } /* ** GiST Compress and Decompress methods */ GISTENTRY * g_int_compress(GISTENTRY *entry) { GISTENTRY *retval; ArrayType * r; int len; int *dr; int i,min,cand; retval = palloc(sizeof(GISTENTRY)); if ( ! retval ) elog(ERROR,"Can't allocate memory for compression"); if ( ARRISNULL( (ArrayType *) entry->pred ) ) { #ifdef GIST_DEBUG elog(NOTICE,"COMP IN: NULL"); #endif gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset, 0, FALSE); return( retval ); } r = copy_intArrayType( (ArrayType *) entry->pred ); if ( entry->leafkey ) PREPAREARR( r ); len = ARRSIZE( r ); #ifdef GIST_DEBUG elog(NOTICE, "COMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, len); /* printarr( r, len ); */ #endif if ( len >= 2*MAXNUMRANGE ) { /*compress*/ r = resize_intArrayType( r, 2*( len ) ); dr = ARRPTR( r ); for(i=len-1; i>=0;i--) dr[2*i] = dr[2*i+1] = dr[i]; len *= 2; cand = 1; while( len > MAXNUMRANGE * 2 ) { min = 0x7fffffff; for( i=2; i (dr[i] - dr[i-1]) ) { min = (dr[i] - dr[i-1]); cand = i; } memmove( (void*)&dr[cand-1], (void*)&dr[cand+1], (len - cand - 1)*sizeof(int) ); len -= 2; } r = resize_intArrayType(r, len ); } gistentryinit(*retval, (char *)r, entry->rel, entry->page, entry->offset, VARSIZE( r ), FALSE); return(retval); } GISTENTRY * g_int_decompress(GISTENTRY *entry) { GISTENTRY *retval; ArrayType * r; int *dr, lenr; ArrayType * in; int lenin; int *din; int i,j; if ( entry->bytes < ARR_OVERHEAD( NDIM ) || ARRISNULL( (ArrayType *) entry->pred ) ) { retval = palloc(sizeof(GISTENTRY)); if ( ! retval ) elog(ERROR,"Can't allocate memory for decompression"); gistentryinit(*retval, (char *)NULL, entry->rel, entry->page, entry->offset, 0, FALSE); #ifdef GIST_DEBUG elog(NOTICE,"DECOMP IN: NULL"); #endif return( retval ); } in = (ArrayType *) entry->pred; lenin = ARRSIZE(in); din = ARRPTR(in); if ( lenin < 2*MAXNUMRANGE ) { /*not comressed value*/ /* sometimes strange bytesize */ gistentryinit(*entry, (char *)in, entry->rel, entry->page, entry->offset, VARSIZE( in ), FALSE); return (entry); } #ifdef GIST_DEBUG elog(NOTICE, "DECOMP IN: %d leaf; %d rel; %d page; %d offset; %d bytes; %d elems", entry->leafkey, (int)entry->rel, (int)entry->page, (int)entry->offset, (int)entry->bytes, lenin); /* printarr( in, lenin ); */ #endif lenr = internal_size(din, lenin); r = new_intArrayType( lenr ); dr = ARRPTR( r ); for(i=0;irel, entry->page, entry->offset, VARSIZE( r ), FALSE); return(retval); } /* ** The GiST Penalty method for _intments */ float * g_int_penalty(GISTENTRY *origentry, GISTENTRY *newentry, float *result) { Datum ud; float tmp1, tmp2; #ifdef GIST_DEBUG elog(NOTICE, "penalty"); #endif ud = (Datum)inner_int_union((ArrayType *)(origentry->pred), (ArrayType *)(newentry->pred)); rt__int_size((ArrayType *)ud, &tmp1); rt__int_size((ArrayType *)(origentry->pred), &tmp2); *result = tmp1 - tmp2; pfree((char *)ud); #ifdef GIST_DEBUG elog(NOTICE, "--penalty\t%g", *result); #endif return(result); } /* ** The GiST PickSplit method for _intments ** We use Guttman's poly time split algorithm */ GIST_SPLITVEC * g_int_picksplit(bytea *entryvec, GIST_SPLITVEC *v) { OffsetNumber i, j; ArrayType *datum_alpha, *datum_beta; ArrayType *datum_l, *datum_r; ArrayType *union_d, *union_dl, *union_dr; ArrayType *inter_d; bool firsttime; float size_alpha, size_beta, size_union, size_inter; float size_waste, waste; float size_l, size_r; int nbytes; OffsetNumber seed_1 = 0, seed_2 = 0; OffsetNumber *left, *right; OffsetNumber maxoff; #ifdef GIST_DEBUG elog(NOTICE, "--------picksplit %d",(VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)); #endif maxoff = ((VARSIZE(entryvec) - VARHDRSZ)/sizeof(GISTENTRY)) - 2; nbytes = (maxoff + 2) * sizeof(OffsetNumber); v->spl_left = (OffsetNumber *) palloc(nbytes); v->spl_right = (OffsetNumber *) palloc(nbytes); firsttime = true; waste = 0.0; for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[j].pred); /* compute the wasted space by unioning these guys */ /* size_waste = size_union - size_inter; */ union_d = (ArrayType *)inner_int_union(datum_alpha, datum_beta); rt__int_size(union_d, &size_union); inter_d = (ArrayType *)inner_int_inter(datum_alpha, datum_beta); rt__int_size(inter_d, &size_inter); size_waste = size_union - size_inter; pfree(union_d); if (inter_d != (ArrayType *) NULL) pfree(inter_d); /* * are these a more promising split that what we've * already seen? */ if (size_waste > waste || firsttime) { waste = size_waste; seed_1 = i; seed_2 = j; firsttime = false; } } } left = v->spl_left; v->spl_nleft = 0; right = v->spl_right; v->spl_nright = 0; datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_1].pred); datum_l = copy_intArrayType( datum_alpha ); rt__int_size((ArrayType *)datum_l, &size_l); datum_beta = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[seed_2].pred); datum_r = copy_intArrayType( datum_beta ); rt__int_size((ArrayType *)datum_r, &size_r); /* * Now split up the regions between the two seeds. An important * property of this split algorithm is that the split vector v * has the indices of items to be split in order in its left and * right vectors. We exploit this property by doing a merge in * the code that actually splits the page. * * For efficiency, we also place the new index tuple in this loop. * This is handled at the very end, when we have placed all the * existing tuples and i == maxoff + 1. */ maxoff = OffsetNumberNext(maxoff); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { /* * If we've already decided where to place this item, just * put it on the right list. Otherwise, we need to figure * out which page needs the least enlargement in order to * store the item. */ if (i == seed_1) { *left++ = i; v->spl_nleft++; continue; } else if (i == seed_2) { *right++ = i; v->spl_nright++; continue; } /* okay, which page needs least enlargement? */ datum_alpha = (ArrayType *)(((GISTENTRY *)(VARDATA(entryvec)))[i].pred); union_dl = (ArrayType *)inner_int_union(datum_l, datum_alpha); union_dr = (ArrayType *)inner_int_union(datum_r, datum_alpha); rt__int_size((ArrayType *)union_dl, &size_alpha); rt__int_size((ArrayType *)union_dr, &size_beta); /* pick which page to add it to */ if (size_alpha - size_l < size_beta - size_r) { if ( datum_l ) pfree(datum_l); if ( union_dr ) pfree(union_dr); datum_l = union_dl; size_l = size_alpha; *left++ = i; v->spl_nleft++; } else { if ( datum_r ) pfree(datum_r); if ( union_dl ) pfree(union_dl); datum_r = union_dr; size_r = size_beta; *right++ = i; v->spl_nright++; } } /**left = *right = FirstOffsetNumber;*/ /* sentinel value, see dosplit() */ if ( *(left-1) > *(right-1) ) { *right = FirstOffsetNumber; *(left-1) = InvalidOffsetNumber; } else { *left = FirstOffsetNumber; *(right-1) = InvalidOffsetNumber; } v->spl_ldatum = (char *)datum_l; v->spl_rdatum = (char *)datum_r; #ifdef GIST_DEBUG elog(NOTICE, "--------ENDpicksplit %d %d",v->spl_nleft, v->spl_nright); #endif return v; } /* ** Equality methods */ bool * g_int_same(ArrayType *b1, ArrayType *b2, bool *result) { if (_int_same(b1, b2)) *result = TRUE; else *result = FALSE; return(result); } bool g_int_internal_consistent(ArrayType *key, ArrayType *query, StrategyNumber strategy) { bool retval; #ifdef GIST_QUERY_DEBUG elog(NOTICE, "internal_consistent, %d", strategy); #endif switch(strategy) { case RTOverlapStrategyNumber: retval = (bool)inner_int_overlap(key, query); break; case RTSameStrategyNumber: case RTContainsStrategyNumber: retval = (bool)inner_int_contains(key, query); break; case RTContainedByStrategyNumber: retval = (bool)inner_int_overlap(key, query); break; default: retval = FALSE; } return(retval); } bool _int_contained(ArrayType *a, ArrayType *b) { return ( _int_contains(b, a) ); } bool _int_contains ( ArrayType *a, ArrayType *b ) { bool res; ArrayType *an, *bn; if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; an = copy_intArrayType( a ); bn = copy_intArrayType( b ); PREPAREARR(an); PREPAREARR(bn); res = inner_int_contains( an, bn ); pfree( an ); pfree( bn ); return res; } bool inner_int_contains ( ArrayType *a, ArrayType *b ) { int na, nb; int i,j, n; int *da, *db; if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; na = ARRSIZE( a ); nb = ARRSIZE( b ); da = ARRPTR( a ); db = ARRPTR( b ); #ifdef GIST_DEBUG elog(NOTICE, "contains %d %d", na, nb); #endif i = j = n = 0; while( i 1 ) r = _int_unique( r ); return r; } ArrayType * _int_inter ( ArrayType *a, ArrayType *b ) { if ( ARRISNULL( a ) || ARRISNULL( b ) ) return FALSE; SORT(a); SORT(b); return inner_int_inter( a, b ); } ArrayType * inner_int_inter ( ArrayType *a, ArrayType *b ) { ArrayType * r; int na , nb ; int *da, *db, *dr; int i,j; #ifdef GIST_DEBUG /* elog(NOTICE, "inner_inter %d %d", ARRISNULL( a ), ARRISNULL( b ) ); */ #endif if ( ARRISNULL( a ) || ARRISNULL( b ) ) return NULL; na = ARRSIZE( a ); nb = ARRSIZE( b ); da = ARRPTR( a ); db = ARRPTR( b ); r = new_intArrayType( min(na, nb) ); dr = ARRPTR( r ); i = j = 0; while( i0 && *(dr-1) != db[j] ) ) *dr++ = db[j]; i++; j++; } else j++; if ( (dr - ARRPTR(r)) == 0 ) { pfree( r ); return NULL; } else return resize_intArrayType(r, dr - ARRPTR(r) ); } void rt__int_size(ArrayType *a, float *size) { if ( ARRISNULL( a ) ) *size = 0.0; else *size = (float)ARRSIZE( a ); return; } /***************************************************************************** * Miscellaneous operators and functions *****************************************************************************/ /* len >= 2 */ bool isort ( int *a, int len ) { int tmp, index; int *cur, *end; bool r = FALSE; end = a + len; do { index = 0; cur = a + 1; while( cur < end ) { if( *(cur-1) > *cur ) { tmp=*(cur-1); *(cur-1) = *cur; *cur=tmp; index = 1; } else if ( ! r && *(cur-1) == *cur ) r = TRUE; cur++; } } while( index ); return r; } ArrayType * new_intArrayType( int num ) { ArrayType * r; int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num; r = (ArrayType *) palloc( nbytes ); if ( ! r ) elog(ERROR, "Can't allocate memory for new array"); MemSet(r, 0, nbytes); r->size = nbytes; r->ndim = NDIM; #ifndef PGSQL71 SET_LO_FLAG(false, r); #endif *( (int*)ARR_DIMS(r) ) = num; *( (int*)ARR_LBOUND(r) ) = 1; return r; } ArrayType * resize_intArrayType( ArrayType * a, int num ) { int nbytes = ARR_OVERHEAD( NDIM ) + sizeof(int)*num; if ( num == ARRSIZE(a) ) return a; a = (ArrayType *) repalloc( a, nbytes ); if ( ! a ) elog(ERROR, "Can't reallocate memory for new array"); a->size = nbytes; *( (int*)ARR_DIMS(a) ) = num; return a; } ArrayType * copy_intArrayType( ArrayType * a ) { ArrayType * r; if ( ! a ) return NULL; r = new_intArrayType( ARRSIZE(a) ); memmove(r,a,VARSIZE(a)); return r; } /* num for compressed key */ int internal_size (int *a, int len ) { int i,size=0; for(i=0;i 1 */ ArrayType * _int_unique( ArrayType * r ) { int *tmp, *dr, *data; int num = ARRSIZE(r); data = tmp = dr = ARRPTR( r ); while( tmp - data < num ) if ( *tmp != *dr ) *(++dr) = *tmp++; else tmp++; return resize_intArrayType(r, dr + 1 - ARRPTR(r) ); }