From 4f06c688c7b4726ac9f5279d4a9f32408eec5356 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 22 Jan 2007 20:00:40 +0000 Subject: [PATCH] Put back planner's ability to cache the results of mergejoinscansel(), which I had removed in the first cut of the EquivalenceClass rewrite to simplify that patch a little. But it's still important --- in a four-way join problem mergejoinscansel() was eating about 40% of the planning time according to gprof. Also, improve the EquivalenceClass code to re-use join RestrictInfos rather than generating fresh ones for each join considered. This saves some memory space but more importantly improves the effectiveness of caching planning info in RestrictInfos. --- src/backend/nodes/copyfuncs.c | 6 +- src/backend/nodes/outfuncs.c | 5 +- src/backend/optimizer/path/costsize.c | 75 +++++++-- src/backend/optimizer/path/equivclass.c | 195 +++++++++++++++++----- src/backend/optimizer/prep/prepunion.c | 5 +- src/backend/optimizer/util/restrictinfo.c | 5 +- src/backend/utils/adt/selfuncs.c | 31 +++- src/include/nodes/relation.h | 24 ++- src/include/utils/selfuncs.h | 4 +- 9 files changed, 285 insertions(+), 65 deletions(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 7b003dc095..1237dc7fe6 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.362 2007/01/20 20:45:38 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.363 2007/01/22 20:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1326,6 +1326,10 @@ _copyRestrictInfo(RestrictInfo *from) /* EquivalenceClasses are never copied, so shallow-copy the pointers */ COPY_SCALAR_FIELD(left_ec); COPY_SCALAR_FIELD(right_ec); + COPY_SCALAR_FIELD(left_em); + COPY_SCALAR_FIELD(right_em); + /* MergeScanSelCache isn't a Node, so hard to copy; just reset cache */ + newnode->scansel_cache = NIL; COPY_SCALAR_FIELD(outer_is_left); COPY_SCALAR_FIELD(hashjoinoperator); COPY_SCALAR_FIELD(left_bucketsize); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index f0b72ea0f6..b79b7d1a2d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.294 2007/01/20 20:45:38 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.295 2007/01/22 20:00:39 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -1304,6 +1304,7 @@ _outEquivalenceClass(StringInfo str, EquivalenceClass *node) WRITE_NODE_FIELD(ec_opfamilies); WRITE_NODE_FIELD(ec_members); WRITE_NODE_FIELD(ec_sources); + WRITE_NODE_FIELD(ec_derives); WRITE_BITMAPSET_FIELD(ec_relids); WRITE_BOOL_FIELD(ec_has_const); WRITE_BOOL_FIELD(ec_has_volatile); @@ -1354,6 +1355,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node) WRITE_NODE_FIELD(mergeopfamilies); WRITE_NODE_FIELD(left_ec); WRITE_NODE_FIELD(right_ec); + WRITE_NODE_FIELD(left_em); + WRITE_NODE_FIELD(right_em); WRITE_BOOL_FIELD(outer_is_left); WRITE_OID_FIELD(hashjoinoperator); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 2d241e774d..422ef92322 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.176 2007/01/22 01:35:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.177 2007/01/22 20:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,9 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; +static MergeScanSelCache *cached_scansel(PlannerInfo *root, + RestrictInfo *rinfo, + PathKey *pathkey); static bool cost_qual_eval_walker(Node *node, QualCost *total); static Selectivity approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype); @@ -1349,9 +1352,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) * (unless it's an outer join, in which case the outer side has to be * scanned all the way anyway). Estimate fraction of the left and right * inputs that will actually need to be scanned. We use only the first - * (most significant) merge clause for this purpose. - * - * XXX mergejoinscansel is a bit expensive, can we cache its results? + * (most significant) merge clause for this purpose. Since + * mergejoinscansel() is a fairly expensive computation, we cache the + * results in the merge clause RestrictInfo. */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { @@ -1360,8 +1363,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) List *ipathkeys; PathKey *opathkey; PathKey *ipathkey; - Selectivity leftscansel, - rightscansel; + MergeScanSelCache *cache; /* Get the input pathkeys to determine the sort-order details */ opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys; @@ -1376,22 +1378,21 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) opathkey->pk_nulls_first != ipathkey->pk_nulls_first) elog(ERROR, "left and right pathkeys do not match in mergejoin"); - mergejoinscansel(root, (Node *) firstclause->clause, - opathkey->pk_opfamily, opathkey->pk_strategy, - &leftscansel, &rightscansel); + /* Get the selectivity with caching */ + cache = cached_scansel(root, firstclause, opathkey); if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids)) { /* left side of clause is outer */ - outerscansel = leftscansel; - innerscansel = rightscansel; + outerscansel = cache->leftscansel; + innerscansel = cache->rightscansel; } else { /* left side of clause is inner */ - outerscansel = rightscansel; - innerscansel = leftscansel; + outerscansel = cache->rightscansel; + innerscansel = cache->leftscansel; } if (path->jpath.jointype == JOIN_LEFT) outerscansel = 1.0; @@ -1493,6 +1494,54 @@ cost_mergejoin(MergePath *path, PlannerInfo *root) path->jpath.path.total_cost = startup_cost + run_cost; } +/* + * run mergejoinscansel() with caching + */ +static MergeScanSelCache * +cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) +{ + MergeScanSelCache *cache; + ListCell *lc; + Selectivity leftscansel, + rightscansel; + MemoryContext oldcontext; + + /* Do we have this result already? */ + foreach(lc, rinfo->scansel_cache) + { + cache = (MergeScanSelCache *) lfirst(lc); + if (cache->opfamily == pathkey->pk_opfamily && + cache->strategy == pathkey->pk_strategy && + cache->nulls_first == pathkey->pk_nulls_first) + return cache; + } + + /* Nope, do the computation */ + mergejoinscansel(root, + (Node *) rinfo->clause, + pathkey->pk_opfamily, + pathkey->pk_strategy, + pathkey->pk_nulls_first, + &leftscansel, + &rightscansel); + + /* Cache the result in suitably long-lived workspace */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + cache = (MergeScanSelCache *) palloc(sizeof(MergeScanSelCache)); + cache->opfamily = pathkey->pk_opfamily; + cache->strategy = pathkey->pk_strategy; + cache->nulls_first = pathkey->pk_nulls_first; + cache->leftscansel = leftscansel; + cache->rightscansel = rightscansel; + + rinfo->scansel_cache = lappend(rinfo->scansel_cache, cache); + + MemoryContextSwitchTo(oldcontext); + + return cache; +} + /* * cost_hashjoin * Determines and returns the cost of joining two relations using the diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 063e8d5d01..b6503ef193 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.1 2007/01/20 20:45:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.2 2007/01/22 20:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,8 +26,9 @@ #include "utils/lsyscache.h" -static void add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, - bool is_child, Oid datatype); +static EquivalenceMember *add_eq_member(EquivalenceClass *ec, + Expr *expr, Relids relids, + bool is_child, Oid datatype); static void generate_base_implied_equalities_const(PlannerInfo *root, EquivalenceClass *ec); static void generate_base_implied_equalities_no_const(PlannerInfo *root, @@ -46,6 +47,11 @@ static List *generate_join_implied_equalities_broken(PlannerInfo *root, RelOptInfo *inner_rel); static Oid select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype); +static RestrictInfo *create_join_clause(PlannerInfo *root, + EquivalenceClass *ec, Oid opno, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec); static void reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, bool outer_on_left); @@ -95,6 +101,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, List *opfamilies; EquivalenceClass *ec1, *ec2; + EquivalenceMember *em1, + *em2; ListCell *lc1; /* Extract info from given clause */ @@ -152,6 +160,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, * there is no shortcut here for item1 and item2 equal.) */ ec1 = ec2 = NULL; + em1 = em2 = NULL; foreach(lc1, root->eq_classes) { EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1); @@ -188,6 +197,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, equal(item1, cur_em->em_expr)) { ec1 = cur_ec; + em1 = cur_em; if (ec2) break; } @@ -197,6 +207,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, equal(item2, cur_em->em_expr)) { ec2 = cur_ec; + em2 = cur_em; if (ec1) break; } @@ -215,6 +226,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, { ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + /* mark the RI as usable with this pair of EMs */ + /* NB: can't set left_ec/right_ec until merging is finished */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; return true; } @@ -227,6 +242,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, */ ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members); ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources); + ec1->ec_derives = list_concat(ec1->ec_derives, ec2->ec_derives); ec1->ec_relids = bms_join(ec1->ec_relids, ec2->ec_relids); ec1->ec_has_const |= ec2->ec_has_const; /* can't need to set has_volatile */ @@ -236,23 +252,33 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, /* just to avoid debugging confusion w/ dangling pointers: */ ec2->ec_members = NIL; ec2->ec_sources = NIL; + ec2->ec_derives = NIL; ec2->ec_relids = NULL; ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; } else if (ec1) { /* Case 3: add item2 to ec1 */ - add_eq_member(ec1, item2, item2_relids, false, item2_type); + em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; } else if (ec2) { /* Case 3: add item1 to ec2 */ - add_eq_member(ec2, item1, item1_relids, false, item1_type); + em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_below_outer_join |= below_outer_join; + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; } else { @@ -262,16 +288,21 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec->ec_opfamilies = opfamilies; ec->ec_members = NIL; ec->ec_sources = list_make1(restrictinfo); + ec->ec_derives = NIL; ec->ec_relids = NULL; ec->ec_has_const = false; ec->ec_has_volatile = false; ec->ec_below_outer_join = below_outer_join; ec->ec_broken = false; ec->ec_merged = NULL; - add_eq_member(ec, item1, item1_relids, false, item1_type); - add_eq_member(ec, item2, item2_relids, false, item2_type); + em1 = add_eq_member(ec, item1, item1_relids, false, item1_type); + em2 = add_eq_member(ec, item2, item2_relids, false, item2_type); root->eq_classes = lappend(root->eq_classes, ec); + + /* mark the RI as usable with this pair of EMs */ + restrictinfo->left_em = em1; + restrictinfo->right_em = em2; } return true; @@ -280,7 +311,7 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, /* * add_eq_member - build a new EquivalenceMember and add it to an EC */ -static void +static EquivalenceMember * add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, bool is_child, Oid datatype) { @@ -312,6 +343,8 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids, ec->ec_relids = bms_add_members(ec->ec_relids, relids); } ec->ec_members = lappend(ec->ec_members, em); + + return em; } @@ -337,6 +370,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, List *opfamilies) { EquivalenceClass *newec; + EquivalenceMember *newem; ListCell *lc1; MemoryContext oldcontext; @@ -383,14 +417,15 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_opfamilies = list_copy(opfamilies); newec->ec_members = NIL; newec->ec_sources = NIL; + newec->ec_derives = NIL; newec->ec_relids = NULL; newec->ec_has_const = false; newec->ec_has_volatile = contain_volatile_functions((Node *) expr); newec->ec_below_outer_join = false; newec->ec_broken = false; newec->ec_merged = NULL; - add_eq_member(newec, expr, pull_varnos((Node *) expr), - false, expr_datatype); + newem = add_eq_member(newec, expr, pull_varnos((Node *) expr), + false, expr_datatype); /* * add_eq_member doesn't check for volatile functions or aggregates, @@ -402,7 +437,7 @@ get_eclass_for_sort_expr(PlannerInfo *root, if (newec->ec_has_volatile || contain_agg_clause((Node *) expr)) { newec->ec_has_const = false; - ((EquivalenceMember *) linitial(newec->ec_members))->em_is_const = false; + newem->em_is_const = false; } } @@ -455,6 +490,12 @@ get_eclass_for_sort_expr(PlannerInfo *root, * process_implied_equality (in plan/initsplan.c) to be inserted into the * restrictinfo datastructures. Note that this must be called after initial * scanning of the quals and before Path construction begins. + * + * We make no attempt to avoid generating duplicate RestrictInfos here: we + * don't search ec_sources for matches, nor put the created RestrictInfos + * into ec_derives. Doing so would require some slightly ugly changes in + * initsplan.c's API, and there's no real advantage, because the clauses + * generated here can't duplicate anything we will generate for joins anyway. */ void generate_base_implied_equalities(PlannerInfo *root) @@ -664,6 +705,13 @@ generate_base_implied_equalities_broken(PlannerInfo *root, * for use in a nestloop-with-inner-indexscan join, however. indxpath.c makes * its own selections of clauses to use, and if the ones we pick here are * redundant with those, the extras will be eliminated in createplan.c. + * + * Because the same join clauses are likely to be needed multiple times as + * we consider different join paths, we avoid generating multiple copies: + * whenever we select a particular pair of EquivalenceMembers to join, + * we check to see if the pair matches any original clause (in ec_sources) + * or previously-built clause (in ec_derives). This saves memory and allows + * re-use of information cached in RestrictInfos. */ List * generate_join_implied_equalities(PlannerInfo *root, @@ -818,15 +866,13 @@ generate_join_implied_equalities_normal(PlannerInfo *root, return NIL; } - rinfo = build_implied_join_equality(best_eq_op, - best_outer_em->em_expr, - best_inner_em->em_expr, - ec->ec_relids); - /* mark restrictinfo as redundant with other joinclauses */ - rinfo->parent_ec = ec; - /* we can set these too, rather than letting them be looked up later */ - rinfo->left_ec = ec; - rinfo->right_ec = ec; + /* + * Create clause, setting parent_ec to mark it as redundant with other + * joinclauses + */ + rinfo = create_join_clause(root, ec, best_eq_op, + best_outer_em, best_inner_em, + ec); result = lappend(result, rinfo); } @@ -867,16 +913,10 @@ generate_join_implied_equalities_normal(PlannerInfo *root, ec->ec_broken = true; return NIL; } - rinfo = build_implied_join_equality(eq_op, - prev_em->em_expr, - cur_em->em_expr, - ec->ec_relids); - /* do NOT set parent_ec, this qual is not redundant! */ - - /* we can set these, though */ - rinfo->left_ec = ec; - rinfo->right_ec = ec; + rinfo = create_join_clause(root, ec, eq_op, + prev_em, cur_em, + NULL); result = lappend(result, rinfo); } @@ -941,6 +981,86 @@ select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) } +/* + * create_join_clause + * Find or make a RestrictInfo comparing the two given EC members + * with the given operator. + * + * parent_ec is either equal to ec (if the clause is a potentially-redundant + * join clause) or NULL (if not). We have to treat this as part of the + * match requirements --- it's possible that a clause comparing the same two + * EMs is a join clause in one join path and a restriction clause in another. + */ +static RestrictInfo * +create_join_clause(PlannerInfo *root, + EquivalenceClass *ec, Oid opno, + EquivalenceMember *leftem, + EquivalenceMember *rightem, + EquivalenceClass *parent_ec) +{ + RestrictInfo *rinfo; + ListCell *lc; + MemoryContext oldcontext; + + /* + * Search to see if we already built a RestrictInfo for this pair of + * EquivalenceMembers. We can use either original source clauses or + * previously-derived clauses. The check on opno is probably redundant, + * but be safe ... + */ + foreach(lc, ec->ec_sources) + { + rinfo = (RestrictInfo *) lfirst(lc); + if (rinfo->left_em == leftem && + rinfo->right_em == rightem && + rinfo->parent_ec == parent_ec && + opno == ((OpExpr *) rinfo->clause)->opno) + return rinfo; + } + + foreach(lc, ec->ec_derives) + { + rinfo = (RestrictInfo *) lfirst(lc); + if (rinfo->left_em == leftem && + rinfo->right_em == rightem && + rinfo->parent_ec == parent_ec && + opno == ((OpExpr *) rinfo->clause)->opno) + return rinfo; + } + + /* + * Not there, so build it, in planner context so we can re-use it. + * (Not important in normal planning, but definitely so in GEQO.) + */ + oldcontext = MemoryContextSwitchTo(root->planner_cxt); + + rinfo = build_implied_join_equality(opno, + leftem->em_expr, + rightem->em_expr, + ec->ec_relids); + + /* Mark the clause as redundant, or not */ + rinfo->parent_ec = parent_ec; + + /* + * We can set these now, rather than letting them be looked up later, + * since this is only used after EC merging is complete. + */ + rinfo->left_ec = ec; + rinfo->right_ec = ec; + + /* Mark it as usable with these EMs */ + rinfo->left_em = leftem; + rinfo->right_em = rightem; + /* and save it for possible re-use */ + ec->ec_derives = lappend(ec->ec_derives, rinfo); + + MemoryContextSwitchTo(oldcontext); + + return rinfo; +} + + /* * reconsider_outer_join_clauses * Re-examine any outer-join clauses that were set aside by @@ -1364,8 +1484,8 @@ add_child_rel_equivalences(PlannerInfo *root, child_expr = (Expr *) adjust_appendrel_attrs((Node *) cur_em->em_expr, appinfo); - add_eq_member(cur_ec, child_expr, child_rel->relids, - true, cur_em->em_datatype); + (void) add_eq_member(cur_ec, child_expr, child_rel->relids, + true, cur_em->em_datatype); } } } @@ -1451,15 +1571,10 @@ find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel, /* Found a suitable joinclause */ RestrictInfo *rinfo; - rinfo = build_implied_join_equality(best_eq_op, - cur_em->em_expr, - best_outer_em->em_expr, - cur_ec->ec_relids); - /* mark restrictinfo as redundant with other joinclauses */ - rinfo->parent_ec = cur_ec; - /* we can set these too */ - rinfo->left_ec = cur_ec; - rinfo->right_ec = cur_ec; + /* set parent_ec to mark as redundant with other joinclauses */ + rinfo = create_join_clause(root, cur_ec, best_eq_op, + cur_em, best_outer_em, + cur_ec); result = lappend(result, rinfo); /* diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index ec93b0dad0..5ee03b75cf 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.136 2007/01/20 20:45:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.137 2007/01/22 20:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1197,6 +1197,9 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context) newinfo->this_selec = -1; newinfo->left_ec = NULL; newinfo->right_ec = NULL; + newinfo->left_em = NULL; + newinfo->right_em = NULL; + newinfo->scansel_cache = NIL; newinfo->left_bucketsize = -1; newinfo->right_bucketsize = -1; diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index ea8bb5c970..8251e75d65 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.52 2007/01/20 20:45:40 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.53 2007/01/22 20:00:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -344,6 +344,9 @@ make_restrictinfo_internal(Expr *clause, restrictinfo->left_ec = NULL; restrictinfo->right_ec = NULL; + restrictinfo->left_em = NULL; + restrictinfo->right_em = NULL; + restrictinfo->scansel_cache = NIL; restrictinfo->outer_is_left = false; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 493df17b6c..85e66a3b1c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.220 2007/01/20 20:45:40 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.221 2007/01/22 20:00:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2112,8 +2112,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS) * we can estimate how much of the input will actually be read. This * can have a considerable impact on the cost when using indexscans. * - * clause should be a clause already known to be mergejoinable. opfamily and - * strategy specify the sort ordering being used. + * clause should be a clause already known to be mergejoinable. opfamily, + * strategy, and nulls_first specify the sort ordering being used. * * *leftscan is set to the fraction of the left-hand variable expected * to be scanned (0 to 1), and similarly *rightscan for the right-hand @@ -2121,7 +2121,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS) */ void mergejoinscansel(PlannerInfo *root, Node *clause, - Oid opfamily, int strategy, + Oid opfamily, int strategy, bool nulls_first, Selectivity *leftscan, Selectivity *rightscan) { @@ -2214,18 +2214,39 @@ mergejoinscansel(PlannerInfo *root, Node *clause, /* * Now, the fraction of the left variable that will be scanned is the * fraction that's <= the right-side maximum value. But only believe - * non-default estimates, else stick with our 1.0. + * non-default estimates, else stick with our 1.0. Also, if the sort + * order is nulls-first, we're going to have to read over any nulls too. */ selec = scalarineqsel(root, leop, false, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) + { + if (nulls_first && HeapTupleIsValid(leftvar.statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple); + selec += stats->stanullfrac; + CLAMP_PROBABILITY(selec); + } *leftscan = selec; + } /* And similarly for the right variable. */ selec = scalarineqsel(root, revleop, false, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) + { + if (nulls_first && HeapTupleIsValid(rightvar.statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple); + selec += stats->stanullfrac; + CLAMP_PROBABILITY(selec); + } *rightscan = selec; + } /* * Only one of the two fractions can really be less than 1.0; believe the diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index a83a20d21b..c67c067a5f 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.133 2007/01/20 20:45:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.134 2007/01/22 20:00:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -397,6 +397,7 @@ typedef struct EquivalenceClass List *ec_opfamilies; /* btree operator family OIDs */ List *ec_members; /* list of EquivalenceMembers */ List *ec_sources; /* list of generating RestrictInfos */ + List *ec_derives; /* list of derived RestrictInfos */ Relids ec_relids; /* all relids appearing in ec_members */ bool ec_has_const; /* any pseudoconstants in ec_members? */ bool ec_has_volatile; /* the (sole) member is a volatile expr */ @@ -890,6 +891,9 @@ typedef struct RestrictInfo /* cache space for mergeclause processing; NULL if not yet set */ EquivalenceClass *left_ec; /* EquivalenceClass containing lefthand */ EquivalenceClass *right_ec; /* EquivalenceClass containing righthand */ + EquivalenceMember *left_em; /* EquivalenceMember for lefthand */ + EquivalenceMember *right_em; /* EquivalenceMember for righthand */ + List *scansel_cache; /* list of MergeScanSelCache structs */ /* transient workspace for use while considering a specific join path */ bool outer_is_left; /* T = outer var on left, F = on right */ @@ -902,6 +906,24 @@ typedef struct RestrictInfo Selectivity right_bucketsize; /* avg bucketsize of right side */ } RestrictInfo; +/* + * Since mergejoinscansel() is a relatively expensive function, and would + * otherwise be invoked many times while planning a large join tree, + * we go out of our way to cache its results. Each mergejoinable + * RestrictInfo carries a list of the specific sort orderings that have + * been considered for use with it, and the resulting selectivities. + */ +typedef struct MergeScanSelCache +{ + /* Ordering details (cache lookup key) */ + Oid opfamily; /* btree opfamily defining the ordering */ + int strategy; /* sort direction (ASC or DESC) */ + bool nulls_first; /* do NULLs come before normal values? */ + /* Results */ + Selectivity leftscansel; /* scan fraction for clause left side */ + Selectivity rightscansel; /* scan fraction for clause right side */ +} MergeScanSelCache; + /* * Inner indexscan info. * diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 84549ca1d3..f0c6f20427 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.38 2007/01/05 22:19:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.39 2007/01/22 20:00:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -160,7 +160,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root, int varRelid, JoinType jointype); extern void mergejoinscansel(PlannerInfo *root, Node *clause, - Oid opfamily, int strategy, + Oid opfamily, int strategy, bool nulls_first, Selectivity *leftscan, Selectivity *rightscan);