Make hashjoin give the right answer with toasted input data.

This commit is contained in:
Tom Lane 2001-08-13 19:50:11 +00:00
parent 95f8901a96
commit 38cfc95865
1 changed files with 32 additions and 18 deletions

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* *
* $Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $ * $Id: nodeHash.c,v 1.59 2001/08/13 19:50:11 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -533,19 +533,23 @@ ExecHashGetBucket(HashJoinTable hashtable,
int bucketno; int bucketno;
Datum keyval; Datum keyval;
bool isNull; bool isNull;
MemoryContext oldContext;
/* /*
* Get the join attribute value of the tuple * We reset the eval context each time to reclaim any memory leaked
* * in the hashkey expression or hashFunc itself.
* We reset the eval context each time to avoid any possibility of memory
* leaks in the hash function.
*/ */
ResetExprContext(econtext); ResetExprContext(econtext);
keyval = ExecEvalExprSwitchContext(hashkey, econtext, &isNull, NULL); oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
/* /*
* compute the hash function * Get the join attribute value of the tuple
*/
keyval = ExecEvalExpr(hashkey, econtext, &isNull, NULL);
/*
* Compute the hash function
*/ */
if (isNull) if (isNull)
bucketno = 0; bucketno = 0;
@ -564,6 +568,8 @@ ExecHashGetBucket(HashJoinTable hashtable,
printf("hash(%ld) = %d\n", (long) keyval, bucketno); printf("hash(%ld) = %d\n", (long) keyval, bucketno);
#endif #endif
MemoryContextSwitchTo(oldContext);
return bucketno; return bucketno;
} }
@ -624,17 +630,18 @@ ExecScanHashBucket(HashJoinState *hjstate,
* hashFunc * hashFunc
* *
* the hash function, copied from Margo * the hash function, copied from Margo
*
* XXX this probably ought to be replaced with datatype-specific
* hash functions, such as those already implemented for hash indexes.
* ---------------------------------------------------------------- * ----------------------------------------------------------------
*/ */
static int static int
hashFunc(Datum key, int len, bool byVal) hashFunc(Datum key, int len, bool byVal)
{ {
unsigned int h = 0; unsigned int h = 0;
unsigned char *k;
if (byVal) if (byVal)
{ {
/* /*
* If it's a by-value data type, use the 'len' least significant * If it's a by-value data type, use the 'len' least significant
* bytes of the Datum value. This should do the right thing on * bytes of the Datum value. This should do the right thing on
@ -649,22 +656,29 @@ hashFunc(Datum key, int len, bool byVal)
} }
else else
{ {
/* /*
* If this is a variable length type, then 'k' points to a "struct * If this is a variable length type, then 'key' points to a "struct
* varlena" and len == -1. NOTE: VARSIZE returns the "real" data * varlena" and len == -1. NOTE: VARSIZE returns the "real" data
* length plus the sizeof the "vl_len" attribute of varlena (the * length plus the sizeof the "vl_len" attribute of varlena (the
* length information). 'k' points to the beginning of the varlena * length information). 'key' points to the beginning of the varlena
* struct, so we have to use "VARDATA" to find the beginning of * struct, so we have to use "VARDATA" to find the beginning of
* the "real" data. * the "real" data. Also, we have to be careful to detoast the
* datum if it's toasted. (We don't worry about freeing the detoasted
* copy; that happens for free when the per-tuple memory context
* is reset in ExecHashGetBucket.)
*/ */
if (len == -1) unsigned char *k;
if (len < 0)
{ {
len = VARSIZE(key) - VARHDRSZ; struct varlena *vkey = PG_DETOAST_DATUM(key);
k = (unsigned char *) VARDATA(key);
len = VARSIZE(vkey) - VARHDRSZ;
k = (unsigned char *) VARDATA(vkey);
} }
else else
k = (unsigned char *) key; k = (unsigned char *) DatumGetPointer(key);
while (len-- > 0) while (len-- > 0)
h = (h * PRIME1) ^ (*k++); h = (h * PRIME1) ^ (*k++);
} }