diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 9e0e4f0b8c..bea8963099 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -59,9 +59,6 @@ ExecHash(HashState *node) outerNode = outerPlanState(node); hashtable = node->hashtable; - if (hashtable == NULL) - elog(ERROR, "ExecHash: hash table is NULL."); - nbatch = hashtable->nbatch; if (nbatch > 0) @@ -284,20 +281,13 @@ ExecHashTableCreate(Hash *node) * allocate and initialize the file arrays in hashCxt */ hashtable->innerBatchFile = (BufFile **) - palloc(nbatch * sizeof(BufFile *)); + palloc0(nbatch * sizeof(BufFile *)); hashtable->outerBatchFile = (BufFile **) - palloc(nbatch * sizeof(BufFile *)); + palloc0(nbatch * sizeof(BufFile *)); hashtable->innerBatchSize = (long *) - palloc(nbatch * sizeof(long)); + palloc0(nbatch * sizeof(long)); hashtable->outerBatchSize = (long *) - palloc(nbatch * sizeof(long)); - for (i = 0; i < nbatch; i++) - { - hashtable->innerBatchFile[i] = NULL; - hashtable->outerBatchFile[i] = NULL; - hashtable->innerBatchSize[i] = 0; - hashtable->outerBatchSize[i] = 0; - } + palloc0(nbatch * sizeof(long)); /* The files will not be opened until later... */ } @@ -308,13 +298,7 @@ ExecHashTableCreate(Hash *node) MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) - palloc(nbuckets * sizeof(HashJoinTuple)); - - if (hashtable->buckets == NULL) - elog(ERROR, "Insufficient memory for hash table."); - - for (i = 0; i < nbuckets; i++) - hashtable->buckets[i] = NULL; + palloc0(nbuckets * sizeof(HashJoinTuple)); MemoryContextSwitchTo(oldcxt); @@ -414,15 +398,14 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, * totalbuckets/nbuckets; in fact, it is the number of groups we * will use for the part of the data that doesn't fall into the * first nbuckets hash buckets. We try to set it to make all the - * batches the same size. But we have to keep nbatch small - * enough to avoid integer overflow in ExecHashJoinGetBatch(). + * batches the same size. */ dtmp = ceil((inner_rel_bytes - hash_table_bytes) / hash_table_bytes); - if (dtmp < INT_MAX / totalbuckets) + if (dtmp < INT_MAX) nbatch = (int) dtmp; else - nbatch = INT_MAX / totalbuckets; + nbatch = INT_MAX; if (nbatch <= 0) nbatch = 1; } @@ -481,13 +464,14 @@ ExecHashTableInsert(HashJoinTable hashtable, List *hashkeys) { int bucketno = ExecHashGetBucket(hashtable, econtext, hashkeys); + int batchno = ExecHashGetBatch(bucketno, hashtable); TupleTableSlot *slot = econtext->ecxt_innertuple; HeapTuple heapTuple = slot->val; /* * decide whether to put the tuple in the hash table or a tmp file */ - if (bucketno < hashtable->nbuckets) + if (batchno < 0) { /* * put the tuple in hash table @@ -498,8 +482,6 @@ ExecHashTableInsert(HashJoinTable hashtable, hashTupleSize = MAXALIGN(sizeof(*hashTuple)) + heapTuple->t_len; hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt, hashTupleSize); - if (hashTuple == NULL) - elog(ERROR, "Insufficient memory for hash table."); memcpy((char *) &hashTuple->htup, (char *) heapTuple, sizeof(hashTuple->htup)); @@ -515,11 +497,8 @@ ExecHashTableInsert(HashJoinTable hashtable, else { /* - * put the tuple into a tmp file for other batches + * put the tuple into a tmp file for later batches */ - int batchno = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) / - (hashtable->totalbuckets - hashtable->nbuckets); - hashtable->innerBatchSize[batchno]++; ExecHashJoinSaveTuple(heapTuple, hashtable->innerBatchFile[batchno]); @@ -592,6 +571,24 @@ ExecHashGetBucket(HashJoinTable hashtable, return bucketno; } +/* ---------------------------------------------------------------- + * ExecHashGetBatch + * + * determine the batch number for a bucketno + * + * Returns -1 if bucket belongs to initial (or current) batch, + * else 0..nbatch-1 corresponding to external batch file number for bucket. + * ---------------------------------------------------------------- + */ +int +ExecHashGetBatch(int bucketno, HashJoinTable hashtable) +{ + if (bucketno < hashtable->nbuckets) + return -1; + + return (bucketno - hashtable->nbuckets) % hashtable->nbatch; +} + /* ---------------------------------------------------------------- * ExecScanHashBucket * @@ -727,7 +724,6 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples) { MemoryContext oldcxt; int nbuckets = hashtable->nbuckets; - int i; /* * Release all the hash buckets and tuples acquired in the prior pass, @@ -750,13 +746,7 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples) /* Reallocate and reinitialize the hash bucket headers. */ hashtable->buckets = (HashJoinTuple *) - palloc(nbuckets * sizeof(HashJoinTuple)); - - if (hashtable->buckets == NULL) - elog(ERROR, "Insufficient memory for hash table."); - - for (i = 0; i < nbuckets; i++) - hashtable->buckets[i] = NULL; + palloc0(nbuckets * sizeof(HashJoinTuple)); MemoryContextSwitchTo(oldcxt); } diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 8f899b577d..48cf30c21f 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.45 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,7 +27,6 @@ static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *node, static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, TupleTableSlot *tupleSlot); -static int ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable); static int ExecHashJoinNewBatch(HashJoinState *hjstate); @@ -179,17 +178,15 @@ ExecHashJoin(HashJoinState *node) */ if (hashtable->curbatch == 0) { - int batch = ExecHashJoinGetBatch(node->hj_CurBucketNo, - hashtable); + int batchno = ExecHashGetBatch(node->hj_CurBucketNo, + hashtable); - if (batch > 0) + if (batchno >= 0) { /* * Need to postpone this outer tuple to a later batch. * Save it in the corresponding outer-batch file. */ - int batchno = batch - 1; - hashtable->outerBatchSize[batchno]++; ExecHashJoinSaveTuple(outerTupleSlot->val, hashtable->outerBatchFile[batchno]); @@ -640,28 +637,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate) return newbatch; } -/* ---------------------------------------------------------------- - * ExecHashJoinGetBatch - * - * determine the batch number for a bucketno - * +----------------+-------+-------+ ... +-------+ - * 0 nbuckets totalbuckets - * batch 0 1 2 ... - * ---------------------------------------------------------------- - */ -static int -ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable) -{ - int b; - - if (bucketno < hashtable->nbuckets || hashtable->nbatch == 0) - return 0; - - b = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) / - (hashtable->totalbuckets - hashtable->nbuckets); - return b + 1; -} - /* ---------------------------------------------------------------- * ExecHashJoinSaveTuple * diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a19dd92c82..29b23948df 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.97 2002/12/26 23:38:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.98 2002/12/30 15:21:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,7 +85,8 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; -static Selectivity estimate_hash_bucketsize(Query *root, Var *var); +static Selectivity estimate_hash_bucketsize(Query *root, Var *var, + int nbuckets); static bool cost_qual_eval_walker(Node *node, Cost *total); static Selectivity approx_selectivity(Query *root, List *quals); static void set_rel_width(Query *root, RelOptInfo *rel); @@ -882,7 +883,9 @@ cost_hashjoin(Path *path, Query *root, outer_path->parent->width); double innerbytes = relation_byte_size(inner_path->parent->rows, inner_path->parent->width); - long hashtablebytes = SortMem * 1024L; + int virtualbuckets; + int physicalbuckets; + int numbatches; Selectivity innerbucketsize; List *hcl; @@ -898,6 +901,13 @@ cost_hashjoin(Path *path, Query *root, startup_cost += cpu_operator_cost * inner_path->parent->rows; run_cost += cpu_operator_cost * outer_path->parent->rows; + /* Get hash table size that executor would use for inner relation */ + ExecChooseHashTableSize(inner_path->parent->rows, + inner_path->parent->width, + &virtualbuckets, + &physicalbuckets, + &numbatches); + /* * Determine bucketsize fraction for inner relation. We use the * smallest bucketsize estimated for any individual hashclause; @@ -931,7 +941,8 @@ cost_hashjoin(Path *path, Query *root, if (thisbucketsize < 0) { /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, right); + thisbucketsize = estimate_hash_bucketsize(root, right, + virtualbuckets); restrictinfo->right_bucketsize = thisbucketsize; } } @@ -943,7 +954,8 @@ cost_hashjoin(Path *path, Query *root, if (thisbucketsize < 0) { /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, left); + thisbucketsize = estimate_hash_bucketsize(root, left, + virtualbuckets); restrictinfo->left_bucketsize = thisbucketsize; } } @@ -982,7 +994,7 @@ cost_hashjoin(Path *path, Query *root, * should be nice and sequential...). Writing the inner rel counts as * startup cost, all the rest as run cost. */ - if (innerbytes > hashtablebytes) + if (numbatches) { double outerpages = page_size(outer_path->parent->rows, outer_path->parent->width); @@ -1019,7 +1031,7 @@ cost_hashjoin(Path *path, Query *root, * smart enough to figure out how the restrict clauses might change the * distribution, so this will have to do for now. * - * We can get the number of buckets the executor will use for the given + * We are passed the number of buckets the executor will use for the given * input relation. If the data were perfectly distributed, with the same * number of tuples going into each available bucket, then the bucketsize * fraction would be 1/nbuckets. But this happy state of affairs will occur @@ -1039,13 +1051,10 @@ cost_hashjoin(Path *path, Query *root, * inner rel is well-dispersed (or the alternatives seem much worse). */ static Selectivity -estimate_hash_bucketsize(Query *root, Var *var) +estimate_hash_bucketsize(Query *root, Var *var, int nbuckets) { Oid relid; RelOptInfo *rel; - int virtualbuckets; - int physicalbuckets; - int numbatches; HeapTuple tuple; Form_pg_statistic stats; double estfract, @@ -1071,12 +1080,6 @@ estimate_hash_bucketsize(Query *root, Var *var) if (rel->tuples <= 0.0 || rel->rows <= 0.0) return 0.1; /* ensure we can divide below */ - /* Get hash table size that executor would use for this relation */ - ExecChooseHashTableSize(rel->rows, rel->width, - &virtualbuckets, - &physicalbuckets, - &numbatches); - tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(var->varattno), @@ -1093,7 +1096,7 @@ estimate_hash_bucketsize(Query *root, Var *var) case ObjectIdAttributeNumber: case SelfItemPointerAttributeNumber: /* these are unique, so buckets should be well-distributed */ - return 1.0 / (double) virtualbuckets; + return 1.0 / (double) nbuckets; case TableOidAttributeNumber: /* hashing this is a terrible idea... */ return 1.0; @@ -1134,8 +1137,8 @@ estimate_hash_bucketsize(Query *root, Var *var) * the number of buckets is less than the expected number of distinct * values; otherwise it is 1/ndistinct. */ - if (ndistinct > (double) virtualbuckets) - estfract = 1.0 / (double) virtualbuckets; + if (ndistinct > (double) nbuckets) + estfract = 1.0 / (double) nbuckets; else estfract = 1.0 / ndistinct; diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index c30073ec8c..02e5635526 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeHash.h,v 1.27 2002/12/05 15:50:37 tgl Exp $ + * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ extern void ExecHashTableInsert(HashJoinTable hashtable, extern int ExecHashGetBucket(HashJoinTable hashtable, ExprContext *econtext, List *hashkeys); +extern int ExecHashGetBatch(int bucketno, HashJoinTable hashtable); extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses, ExprContext *econtext); extern void ExecHashTableReset(HashJoinTable hashtable, long ntuples);