diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index f03d738619..fd54c3d03c 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/27 02:51:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.96 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -718,6 +718,55 @@ ExecFetchSlotTuple(TupleTableSlot *slot) return ExecMaterializeSlot(slot); } +/* -------------------------------- + * ExecFetchSlotMinimalTuple + * Fetch the slot's minimal physical tuple. + * + * If the slot contains a virtual tuple, we convert it to minimal + * physical form. The slot retains ownership of the physical tuple. + * Likewise, if it contains a regular tuple we convert to minimal form. + * + * As above, the result must be treated as read-only. + * -------------------------------- + */ +MinimalTuple +ExecFetchSlotMinimalTuple(TupleTableSlot *slot) +{ + MinimalTuple newTuple; + MemoryContext oldContext; + + /* + * sanity checks + */ + Assert(slot != NULL); + Assert(!slot->tts_isempty); + + /* + * If we have a minimal physical tuple then just return it. + */ + if (slot->tts_mintuple) + return slot->tts_mintuple; + + /* + * Otherwise, build a minimal tuple, and then store it as the new slot + * value. (Note: tts_nvalid will be reset to zero here. There are cases + * in which this could be optimized but it's probably not worth worrying + * about.) + * + * We may be called in a context that is shorter-lived than the tuple + * slot, but we have to ensure that the materialized tuple will survive + * anyway. + */ + oldContext = MemoryContextSwitchTo(slot->tts_mcxt); + newTuple = ExecCopySlotMinimalTuple(slot); + MemoryContextSwitchTo(oldContext); + + ExecStoreMinimalTuple(newTuple, slot, true); + + Assert(slot->tts_mintuple); + return slot->tts_mintuple; +} + /* -------------------------------- * ExecMaterializeSlot * Force a slot into the "materialized" state. diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 5710afb2fc..3c8de3f5e7 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.104 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,7 +92,7 @@ MultiExecHash(HashState *node) /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys); - ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue); + ExecHashTableInsert(hashtable, slot, hashvalue); } /* must provide our own instrumentation support */ @@ -358,8 +358,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, * does not allow for any palloc overhead. The manipulations of spaceUsed * don't count palloc overhead either. */ - tupsize = MAXALIGN(sizeof(HashJoinTupleData)) + - MAXALIGN(sizeof(HeapTupleHeaderData)) + + tupsize = HJTUPLE_OVERHEAD + + MAXALIGN(sizeof(MinimalTupleData)) + MAXALIGN(tupwidth); inner_rel_bytes = ntuples * tupsize; @@ -548,7 +548,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) { /* dump it out */ Assert(batchno > curbatch); - ExecHashJoinSaveTuple(&tuple->htup, tuple->hashvalue, + ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple), + tuple->hashvalue, &hashtable->innerBatchFile[batchno]); /* and remove from hash table */ if (prevtuple) @@ -557,7 +558,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) hashtable->buckets[i] = nexttuple; /* prevtuple doesn't change */ hashtable->spaceUsed -= - MAXALIGN(sizeof(HashJoinTupleData)) + tuple->htup.t_len; + HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(tuple)->t_len; pfree(tuple); nfreed++; } @@ -592,12 +593,19 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) * ExecHashTableInsert * insert a tuple into the hash table depending on the hash value * it may just go to a temp file for later batches + * + * Note: the passed TupleTableSlot may contain a regular, minimal, or virtual + * tuple; the minimal case in particular is certain to happen while reloading + * tuples from batch files. We could save some cycles in the regular-tuple + * case by not forcing the slot contents into minimal form; not clear if it's + * worth the messiness required. */ void ExecHashTableInsert(HashJoinTable hashtable, - HeapTuple tuple, + TupleTableSlot *slot, uint32 hashvalue) { + MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot); int bucketno; int batchno; @@ -615,18 +623,11 @@ ExecHashTableInsert(HashJoinTable hashtable, HashJoinTuple hashTuple; int hashTupleSize; - hashTupleSize = MAXALIGN(sizeof(HashJoinTupleData)) + tuple->t_len; + hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len; hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt, hashTupleSize); hashTuple->hashvalue = hashvalue; - memcpy((char *) &hashTuple->htup, - (char *) tuple, - sizeof(hashTuple->htup)); - hashTuple->htup.t_data = (HeapTupleHeader) - (((char *) hashTuple) + MAXALIGN(sizeof(HashJoinTupleData))); - memcpy((char *) hashTuple->htup.t_data, - (char *) tuple->t_data, - tuple->t_len); + memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); hashTuple->next = hashtable->buckets[bucketno]; hashtable->buckets[bucketno] = hashTuple; hashtable->spaceUsed += hashTupleSize; @@ -639,7 +640,8 @@ ExecHashTableInsert(HashJoinTable hashtable, * put the tuple into a temp file for later batches */ Assert(batchno > hashtable->curbatch); - ExecHashJoinSaveTuple(tuple, hashvalue, + ExecHashJoinSaveTuple(tuple, + hashvalue, &hashtable->innerBatchFile[batchno]); } } @@ -749,7 +751,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable, * * The current outer tuple must be stored in econtext->ecxt_outertuple. */ -HeapTuple +HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext) { @@ -771,14 +773,12 @@ ExecScanHashBucket(HashJoinState *hjstate, { if (hashTuple->hashvalue == hashvalue) { - HeapTuple heapTuple = &hashTuple->htup; TupleTableSlot *inntuple; /* insert hashtable's tuple into exec slot so ExecQual sees it */ - inntuple = ExecStoreTuple(heapTuple, - hjstate->hj_HashTupleSlot, - InvalidBuffer, - false); /* do not pfree */ + inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple), + hjstate->hj_HashTupleSlot, + false); /* do not pfree */ econtext->ecxt_innertuple = inntuple; /* reset temp memory each time to avoid leaks from qual expr */ @@ -787,7 +787,7 @@ ExecScanHashBucket(HashJoinState *hjstate, if (ExecQual(hjclauses, econtext, false)) { hjstate->hj_CurTuple = hashTuple; - return heapTuple; + return hashTuple; } } diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 097343fd88..572aa1a591 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.82 2006/06/16 18:42:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.83 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,7 +54,7 @@ ExecHashJoin(HashJoinState *node) ExprContext *econtext; ExprDoneCond isDone; HashJoinTable hashtable; - HeapTuple curtuple; + HashJoinTuple curtuple; TupleTableSlot *outerTupleSlot; uint32 hashvalue; int batchno; @@ -224,7 +224,7 @@ ExecHashJoin(HashJoinState *node) * in the corresponding outer-batch file. */ Assert(batchno > hashtable->curbatch); - ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot), + ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot), hashvalue, &hashtable->outerBatchFile[batchno]); node->hj_NeedNewOuter = true; @@ -244,10 +244,9 @@ ExecHashJoin(HashJoinState *node) /* * we've got a match, but still need to test non-hashed quals */ - inntuple = ExecStoreTuple(curtuple, - node->hj_HashTupleSlot, - InvalidBuffer, - false); /* don't pfree this tuple */ + inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple), + node->hj_HashTupleSlot, + false); /* don't pfree */ econtext->ecxt_innertuple = inntuple; /* reset temp memory each time to avoid leaks from qual expr */ @@ -706,9 +705,7 @@ start_over: * NOTE: some tuples may be sent to future batches. Also, it is * possible for hashtable->nbatch to be increased here! */ - ExecHashTableInsert(hashtable, - ExecFetchSlotTuple(slot), - hashvalue); + ExecHashTableInsert(hashtable, slot, hashvalue); } /* @@ -741,15 +738,14 @@ start_over: * save a tuple to a batch file. * * The data recorded in the file for each tuple is its hash value, - * then an image of its HeapTupleData (with meaningless t_data pointer) - * followed by the HeapTupleHeader and tuple data. + * then the tuple in MinimalTuple format. * * Note: it is important always to call this in the regular executor * context, not in a shorter-lived context; else the temp file buffers * will get messed up. */ void -ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue, +ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr) { BufFile *file = *fileptr; @@ -768,14 +764,8 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue, (errcode_for_file_access(), errmsg("could not write to hash-join temporary file: %m"))); - written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData)); - if (written != sizeof(HeapTupleData)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to hash-join temporary file: %m"))); - - written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len); - if (written != (size_t) heapTuple->t_len) + written = BufFileWrite(file, (void *) tuple, tuple->t_len); + if (written != tuple->t_len) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to hash-join temporary file: %m"))); @@ -794,32 +784,36 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate, uint32 *hashvalue, TupleTableSlot *tupleSlot) { - HeapTupleData htup; + uint32 header[2]; size_t nread; - HeapTuple heapTuple; + MinimalTuple tuple; - nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32)); - if (nread == 0) - return NULL; /* end of file */ - if (nread != sizeof(uint32)) + /* + * Since both the hash value and the MinimalTuple length word are + * uint32, we can read them both in one BufFileRead() call without + * any type cheating. + */ + nread = BufFileRead(file, (void *) header, sizeof(header)); + if (nread == 0) /* end of file */ + { + ExecClearTuple(tupleSlot); + return NULL; + } + if (nread != sizeof(header)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from hash-join temporary file: %m"))); - nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData)); - if (nread != sizeof(HeapTupleData)) + *hashvalue = header[0]; + tuple = (MinimalTuple) palloc(header[1]); + tuple->t_len = header[1]; + nread = BufFileRead(file, + (void *) ((char *) tuple + sizeof(uint32)), + header[1] - sizeof(uint32)); + if (nread != header[1] - sizeof(uint32)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from hash-join temporary file: %m"))); - heapTuple = palloc(HEAPTUPLESIZE + htup.t_len); - memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData)); - heapTuple->t_data = (HeapTupleHeader) - ((char *) heapTuple + HEAPTUPLESIZE); - nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len); - if (nread != (size_t) htup.t_len) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read from hash-join temporary file: %m"))); - return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true); + return ExecStoreMinimalTuple(tuple, tupleSlot, true); } diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index 38cae6251e..c4e6e460fe 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.38 2006/03/05 15:58:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.39 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,9 +65,14 @@ typedef struct HashJoinTupleData { struct HashJoinTupleData *next; /* link to next tuple in same bucket */ uint32 hashvalue; /* tuple's hash code */ - HeapTupleData htup; /* tuple header */ + /* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */ } HashJoinTupleData; +#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData)) +#define HJTUPLE_MINTUPLE(hjtup) \ + ((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD)) + + typedef struct HashJoinTableData { int nbuckets; /* # buckets in the in-memory hash table */ diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 9a413827d7..0e0a9b5ec5 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.40 2006/03/05 15:58:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.41 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,7 @@ extern void ExecReScanHash(HashState *node, ExprContext *exprCtxt); extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators); extern void ExecHashTableDestroy(HashJoinTable hashtable); extern void ExecHashTableInsert(HashJoinTable hashtable, - HeapTuple tuple, + TupleTableSlot *slot, uint32 hashvalue); extern uint32 ExecHashGetHashValue(HashJoinTable hashtable, ExprContext *econtext, @@ -35,7 +35,7 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable, uint32 hashvalue, int *bucketno, int *batchno); -extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, +extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext); extern void ExecHashTableReset(HashJoinTable hashtable); extern void ExecChooseHashTableSize(double ntuples, int tupwidth, diff --git a/src/include/executor/nodeHashjoin.h b/src/include/executor/nodeHashjoin.h index 84f07d3644..cbbb76230b 100644 --- a/src/include/executor/nodeHashjoin.h +++ b/src/include/executor/nodeHashjoin.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.32 2006/03/05 15:58:56 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.33 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,7 +23,7 @@ extern TupleTableSlot *ExecHashJoin(HashJoinState *node); extern void ExecEndHashJoin(HashJoinState *node); extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt); -extern void ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue, +extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, BufFile **fileptr); #endif /* NODEHASHJOIN_H */ diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h index 8531835134..6d5bc02b93 100644 --- a/src/include/executor/tuptable.h +++ b/src/include/executor/tuptable.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.32 2006/06/27 02:51:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.33 2006/06/27 21:31:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -159,6 +159,7 @@ extern TupleTableSlot *ExecStoreAllNullTuple(TupleTableSlot *slot); extern HeapTuple ExecCopySlotTuple(TupleTableSlot *slot); extern MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot); extern HeapTuple ExecFetchSlotTuple(TupleTableSlot *slot); +extern MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot); extern HeapTuple ExecMaterializeSlot(TupleTableSlot *slot); extern TupleTableSlot *ExecCopySlot(TupleTableSlot *dstslot, TupleTableSlot *srcslot);