From 25442d8d2fd35389813062f523488821f4fc31d4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 18 Apr 2000 05:43:02 +0000 Subject: [PATCH] Correct oversight in hashjoin cost estimation: nodeHash sizes its hash table for an average of NTUP_PER_BUCKET tuples/bucket, but cost_hashjoin was assuming a target load of one tuple/bucket. This was causing a noticeable underestimate of hashjoin costs. --- src/backend/executor/nodeHash.c | 3 +-- src/backend/optimizer/path/costsize.c | 16 +++++++++++----- src/include/executor/nodeHash.h | 5 ++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index bee4a10f35..1d841576fe 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * - * $Id: nodeHash.c,v 1.44 2000/01/26 05:56:22 momjian Exp $ + * $Id: nodeHash.c,v 1.45 2000/04/18 05:43:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -221,7 +221,6 @@ ExecEndHash(Hash *node) * create a hashtable in shared memory for hashjoin. * ---------------------------------------------------------------- */ -#define NTUP_PER_BUCKET 10 #define FUDGE_FAC 2.0 HashJoinTable diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 6ecfb2a471..df3c6d5c42 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.57 2000/04/12 17:15:19 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.58 2000/04/18 05:43:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -51,6 +51,7 @@ #include +#include "executor/nodeHash.h" #include "miscadmin.h" #include "nodes/plannodes.h" #include "optimizer/clauses.h" @@ -604,12 +605,17 @@ cost_hashjoin(Path *path, run_cost += cpu_operator_cost * outer_path->parent->rows; /* - * the number of tuple comparisons needed is the number of outer - * tuples times the typical hash bucket size, which we estimate - * conservatively as the inner disbursion times the inner tuple count. + * The number of tuple comparisons needed is the number of outer + * tuples times the typical hash bucket size. nodeHash.c tries for + * average bucket loading of NTUP_PER_BUCKET, but that goal will + * be reached only if data values are uniformly distributed among + * the buckets. To be conservative, we scale up the target bucket + * size by the number of inner rows times inner disbursion, giving + * an estimate of the typical number of duplicates of each value. + * We then charge one cpu_operator_cost per tuple comparison. */ run_cost += cpu_operator_cost * outer_path->parent->rows * - ceil(inner_path->parent->rows * innerdisbursion); + NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdisbursion); /* * Estimate the number of tuples that get through the hashing filter diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h index 0460368d8b..b61ced7cdc 100644 --- a/src/include/executor/nodeHash.h +++ b/src/include/executor/nodeHash.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nodeHash.h,v 1.15 2000/01/26 05:58:05 momjian Exp $ + * $Id: nodeHash.h,v 1.16 2000/04/18 05:43:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,9 @@ #include "nodes/plannodes.h" +/* NTUP_PER_BUCKET is exported because planner wants to see it */ +#define NTUP_PER_BUCKET 10 + extern TupleTableSlot *ExecHash(Hash *node); extern bool ExecInitHash(Hash *node, EState *estate, Plan *parent); extern int ExecCountSlotsHash(Hash *node);