1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* relnode.c
|
2000-02-07 05:41:04 +01:00
|
|
|
* Relation-node lookup/construction routines
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2016-01-02 19:33:40 +01:00
|
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/optimizer/util/relnode.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2016-01-28 20:05:36 +01:00
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "catalog/pg_class.h"
|
|
|
|
#include "foreign/foreign.h"
|
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating
whether a plan for that relation could conceivably be run inside of
a parallel worker. Right now, we're pretty conservative: for example,
it might be possible to defer applying a parallel-restricted qual
in a worker, and later do it in the leader, but right now we just
don't try to parallelize access to that relation. That's probably
the right decision in most cases, anyway.
Using the new flag, generate parallel sequential scan plans for plain
baserels, meaning that we now have parallel sequential scan in
PostgreSQL. The logic here is pretty unsophisticated right now: the
costing model probably isn't right in detail, and we can't push joins
beneath Gather nodes, so the number of plans that can actually benefit
from this is pretty limited right now. Lots more work is needed.
Nevertheless, it seems time to enable this functionality so that all
this code can actually be tested easily by users and developers.
Note that, if you wish to test this functionality, it will be
necessary to set max_parallel_degree to a value greater than the
default of 0. Once a few more loose ends have been tidied up here, we
might want to consider changing the default value of this GUC, but
I'm leaving it alone for now.
Along the way, fix a bug in cost_gather: the previous coding thought
that a Gather node's transfer overhead should be costed on the basis of
the relation size rather than the number of tuples that actually need
to be passed off to the leader.
Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
|
|
|
#include "optimizer/clauses.h"
|
2000-02-07 05:41:04 +01:00
|
|
|
#include "optimizer/cost.h"
|
1999-07-16 05:14:30 +02:00
|
|
|
#include "optimizer/pathnode.h"
|
2007-01-20 21:45:41 +01:00
|
|
|
#include "optimizer/paths.h"
|
2008-10-21 22:42:53 +02:00
|
|
|
#include "optimizer/placeholder.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "optimizer/plancat.h"
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
#include "optimizer/restrictinfo.h"
|
2005-06-09 01:02:05 +02:00
|
|
|
#include "utils/hsearch.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
|
|
|
|
|
2005-06-09 01:02:05 +02:00
|
|
|
typedef struct JoinHashEntry
|
|
|
|
{
|
|
|
|
Relids join_relids; /* hash key --- MUST BE FIRST */
|
|
|
|
RelOptInfo *join_rel;
|
|
|
|
} JoinHashEntry;
|
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
|
2005-10-15 04:49:52 +02:00
|
|
|
RelOptInfo *input_rel);
|
2005-06-06 00:32:58 +02:00
|
|
|
static List *build_joinrel_restrictlist(PlannerInfo *root,
|
2007-11-15 22:14:46 +01:00
|
|
|
RelOptInfo *joinrel,
|
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel);
|
2000-02-07 05:41:04 +01:00
|
|
|
static void build_joinrel_joinlist(RelOptInfo *joinrel,
|
2000-04-12 19:17:23 +02:00
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel);
|
2000-02-07 05:41:04 +01:00
|
|
|
static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
|
2007-01-20 21:45:41 +01:00
|
|
|
List *joininfo_list,
|
|
|
|
List *new_restrictlist);
|
|
|
|
static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel,
|
|
|
|
List *joininfo_list,
|
|
|
|
List *new_joininfo);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2011-09-03 21:35:12 +02:00
|
|
|
/*
|
|
|
|
* setup_simple_rel_arrays
|
|
|
|
* Prepare the arrays we use for quickly accessing base relations.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
setup_simple_rel_arrays(PlannerInfo *root)
|
|
|
|
{
|
|
|
|
Index rti;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Arrays are accessed using RT indexes (1..N) */
|
|
|
|
root->simple_rel_array_size = list_length(root->parse->rtable) + 1;
|
|
|
|
|
|
|
|
/* simple_rel_array is initialized to all NULLs */
|
|
|
|
root->simple_rel_array = (RelOptInfo **)
|
|
|
|
palloc0(root->simple_rel_array_size * sizeof(RelOptInfo *));
|
|
|
|
|
|
|
|
/* simple_rte_array is an array equivalent of the rtable list */
|
|
|
|
root->simple_rte_array = (RangeTblEntry **)
|
|
|
|
palloc0(root->simple_rel_array_size * sizeof(RangeTblEntry *));
|
|
|
|
rti = 1;
|
|
|
|
foreach(lc, root->parse->rtable)
|
|
|
|
{
|
|
|
|
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
|
|
|
|
|
|
|
|
root->simple_rte_array[rti++] = rte;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2006-01-31 22:39:25 +01:00
|
|
|
* build_simple_rel
|
|
|
|
* Construct a new RelOptInfo for a base relation or 'other' relation.
|
2001-05-20 22:28:20 +02:00
|
|
|
*/
|
|
|
|
RelOptInfo *
|
2006-01-31 22:39:25 +01:00
|
|
|
build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
|
2001-05-20 22:28:20 +02:00
|
|
|
{
|
|
|
|
RelOptInfo *rel;
|
2006-01-31 22:39:25 +01:00
|
|
|
RangeTblEntry *rte;
|
2001-05-20 22:28:20 +02:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/* Rel should not exist already */
|
2007-04-21 23:01:45 +02:00
|
|
|
Assert(relid > 0 && relid < root->simple_rel_array_size);
|
2006-01-31 22:39:25 +01:00
|
|
|
if (root->simple_rel_array[relid] != NULL)
|
|
|
|
elog(ERROR, "rel %d already exists", relid);
|
2000-11-12 01:37:02 +01:00
|
|
|
|
2007-04-21 23:01:45 +02:00
|
|
|
/* Fetch RTE for relation */
|
|
|
|
rte = root->simple_rte_array[relid];
|
|
|
|
Assert(rte != NULL);
|
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
rel = makeNode(RelOptInfo);
|
2005-05-23 05:01:14 +02:00
|
|
|
rel->reloptkind = reloptkind;
|
2003-02-08 21:20:55 +01:00
|
|
|
rel->relids = bms_make_singleton(relid);
|
2000-02-07 05:41:04 +01:00
|
|
|
rel->rows = 0;
|
|
|
|
rel->width = 0;
|
2012-09-02 00:16:24 +02:00
|
|
|
/* cheap startup cost is interesting iff not all tuples to be retrieved */
|
|
|
|
rel->consider_startup = (root->tuple_fraction > 0);
|
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans.
When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick. The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case. If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is. To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.
Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths. That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic. In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem. To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.
To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans. There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.
Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.
Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
|
|
|
rel->consider_param_startup = false; /* might get changed later */
|
2015-12-08 00:56:14 +01:00
|
|
|
rel->consider_parallel = false; /* might get changed later */
|
2004-06-01 05:03:05 +02:00
|
|
|
rel->reltargetlist = NIL;
|
2000-02-07 05:41:04 +01:00
|
|
|
rel->pathlist = NIL;
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
rel->ppilist = NIL;
|
2016-01-20 20:29:22 +01:00
|
|
|
rel->partial_pathlist = NIL;
|
2000-02-15 21:49:31 +01:00
|
|
|
rel->cheapest_startup_path = NULL;
|
|
|
|
rel->cheapest_total_path = NULL;
|
2003-01-20 19:55:07 +01:00
|
|
|
rel->cheapest_unique_path = NULL;
|
2012-01-28 01:26:38 +01:00
|
|
|
rel->cheapest_parameterized_paths = NIL;
|
2015-12-11 21:52:16 +01:00
|
|
|
rel->direct_lateral_relids = NULL;
|
2015-12-08 00:56:14 +01:00
|
|
|
rel->lateral_relids = NULL;
|
2003-02-08 21:20:55 +01:00
|
|
|
rel->relid = relid;
|
2002-05-12 22:10:05 +02:00
|
|
|
rel->rtekind = rte->rtekind;
|
2003-06-30 01:05:05 +02:00
|
|
|
/* min_attr, max_attr, attr_needed, attr_widths are set below */
|
2012-08-27 04:48:55 +02:00
|
|
|
rel->lateral_vars = NIL;
|
2013-08-18 02:22:37 +02:00
|
|
|
rel->lateral_referencers = NULL;
|
2001-05-20 22:28:20 +02:00
|
|
|
rel->indexlist = NIL;
|
2000-02-07 05:41:04 +01:00
|
|
|
rel->pages = 0;
|
|
|
|
rel->tuples = 0;
|
2011-10-14 23:23:01 +02:00
|
|
|
rel->allvisfrac = 0;
|
2000-09-29 20:21:41 +02:00
|
|
|
rel->subplan = NULL;
|
2011-09-03 21:35:12 +02:00
|
|
|
rel->subroot = NULL;
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
rel->subplan_params = NIL;
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
rel->serverid = InvalidOid;
|
2016-01-28 20:05:36 +01:00
|
|
|
rel->umid = InvalidOid;
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
rel->fdwroutine = NULL;
|
|
|
|
rel->fdw_private = NULL;
|
2000-02-07 05:41:04 +01:00
|
|
|
rel->baserestrictinfo = NIL;
|
2003-01-12 23:35:29 +01:00
|
|
|
rel->baserestrictcost.startup = 0;
|
|
|
|
rel->baserestrictcost.per_tuple = 0;
|
2000-02-07 05:41:04 +01:00
|
|
|
rel->joininfo = NIL;
|
2007-01-20 21:45:41 +01:00
|
|
|
rel->has_eclass_joins = false;
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2002-03-12 01:52:10 +01:00
|
|
|
/* Check type of rtable entry */
|
|
|
|
switch (rte->rtekind)
|
2000-09-29 20:21:41 +02:00
|
|
|
{
|
2002-03-12 01:52:10 +01:00
|
|
|
case RTE_RELATION:
|
2003-02-03 16:07:08 +01:00
|
|
|
/* Table --- retrieve statistics from the system catalogs */
|
2006-09-20 00:49:53 +02:00
|
|
|
get_relation_info(root, rte->relid, rte->inh, rel);
|
2003-02-03 16:07:08 +01:00
|
|
|
break;
|
2002-03-12 01:52:10 +01:00
|
|
|
case RTE_SUBQUERY:
|
2002-05-12 22:10:05 +02:00
|
|
|
case RTE_FUNCTION:
|
2006-08-02 03:59:48 +02:00
|
|
|
case RTE_VALUES:
|
2008-10-04 23:56:55 +02:00
|
|
|
case RTE_CTE:
|
2006-10-04 02:30:14 +02:00
|
|
|
|
2006-08-02 03:59:48 +02:00
|
|
|
/*
|
2006-10-04 02:30:14 +02:00
|
|
|
* Subquery, function, or values list --- set up attr range and
|
|
|
|
* arrays
|
2006-08-02 03:59:48 +02:00
|
|
|
*
|
|
|
|
* Note: 0 is included in range to support whole-row Vars
|
|
|
|
*/
|
2003-12-08 19:19:58 +01:00
|
|
|
rel->min_attr = 0;
|
2004-05-31 01:40:41 +02:00
|
|
|
rel->max_attr = list_length(rte->eref->colnames);
|
2004-12-01 20:00:56 +01:00
|
|
|
rel->attr_needed = (Relids *)
|
|
|
|
palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(Relids));
|
|
|
|
rel->attr_widths = (int32 *)
|
|
|
|
palloc0((rel->max_attr - rel->min_attr + 1) * sizeof(int32));
|
2002-03-12 01:52:10 +01:00
|
|
|
break;
|
|
|
|
default:
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "unrecognized RTE kind: %d",
|
2002-03-12 01:52:10 +01:00
|
|
|
(int) rte->rtekind);
|
|
|
|
break;
|
2000-09-29 20:21:41 +02:00
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2016-01-28 20:05:36 +01:00
|
|
|
/* For foreign tables get the user mapping */
|
|
|
|
if (rte->relkind == RELKIND_FOREIGN_TABLE)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This should match what ExecCheckRTEPerms() does.
|
|
|
|
*
|
|
|
|
* Note that if the plan ends up depending on the user OID in any
|
|
|
|
* way - e.g. if it depends on the computed user mapping OID - we must
|
|
|
|
* ensure that it gets invalidated in the case of a user OID change.
|
|
|
|
* See RevalidateCachedQuery and more generally the hasForeignJoin
|
|
|
|
* flags in PlannerGlobal and PlannedStmt.
|
|
|
|
*/
|
|
|
|
Oid userid;
|
|
|
|
|
|
|
|
userid = OidIsValid(rte->checkAsUser) ? rte->checkAsUser : GetUserId();
|
|
|
|
rel->umid = GetUserMappingId(userid, rel->serverid);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
rel->umid = InvalidOid;
|
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/* Save the finished struct in the query's simple_rel_array */
|
|
|
|
root->simple_rel_array[relid] = rel;
|
2005-06-06 06:13:36 +02:00
|
|
|
|
2006-09-20 00:49:53 +02:00
|
|
|
/*
|
|
|
|
* If this rel is an appendrel parent, recurse to build "other rel"
|
|
|
|
* RelOptInfos for its children. They are "other rels" because they are
|
|
|
|
* not in the main join tree, but we will need RelOptInfos to plan access
|
|
|
|
* to them.
|
|
|
|
*/
|
|
|
|
if (rte->inh)
|
|
|
|
{
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
foreach(l, root->append_rel_list)
|
|
|
|
{
|
|
|
|
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
|
|
|
|
|
|
|
|
/* append_rel_list contains all append rels; ignore others */
|
|
|
|
if (appinfo->parent_relid != relid)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
(void) build_simple_rel(root, appinfo->child_relid,
|
|
|
|
RELOPT_OTHER_MEMBER_REL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
return rel;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2001-05-20 22:28:20 +02:00
|
|
|
/*
|
|
|
|
* find_base_rel
|
2005-06-06 06:13:36 +02:00
|
|
|
* Find a base or other relation entry, which must already exist.
|
2001-05-20 22:28:20 +02:00
|
|
|
*/
|
|
|
|
RelOptInfo *
|
2005-06-06 00:32:58 +02:00
|
|
|
find_base_rel(PlannerInfo *root, int relid)
|
2001-05-20 22:28:20 +02:00
|
|
|
{
|
|
|
|
RelOptInfo *rel;
|
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
Assert(relid > 0);
|
2001-05-20 22:28:20 +02:00
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
if (relid < root->simple_rel_array_size)
|
2001-05-20 22:28:20 +02:00
|
|
|
{
|
2006-01-31 22:39:25 +01:00
|
|
|
rel = root->simple_rel_array[relid];
|
2005-06-06 06:13:36 +02:00
|
|
|
if (rel)
|
2001-05-20 22:28:20 +02:00
|
|
|
return rel;
|
|
|
|
}
|
|
|
|
|
2003-07-25 02:01:09 +02:00
|
|
|
elog(ERROR, "no relation entry for relid %d", relid);
|
2001-05-20 22:28:20 +02:00
|
|
|
|
|
|
|
return NULL; /* keep compiler quiet */
|
|
|
|
}
|
|
|
|
|
2005-06-09 01:02:05 +02:00
|
|
|
/*
|
|
|
|
* build_join_rel_hash
|
|
|
|
* Construct the auxiliary hash table for join relations.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
build_join_rel_hash(PlannerInfo *root)
|
|
|
|
{
|
|
|
|
HTAB *hashtab;
|
|
|
|
HASHCTL hash_ctl;
|
|
|
|
ListCell *l;
|
|
|
|
|
|
|
|
/* Create the hash table */
|
|
|
|
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
|
|
|
|
hash_ctl.keysize = sizeof(Relids);
|
|
|
|
hash_ctl.entrysize = sizeof(JoinHashEntry);
|
|
|
|
hash_ctl.hash = bitmap_hash;
|
|
|
|
hash_ctl.match = bitmap_match;
|
|
|
|
hash_ctl.hcxt = CurrentMemoryContext;
|
|
|
|
hashtab = hash_create("JoinRelHashTable",
|
|
|
|
256L,
|
|
|
|
&hash_ctl,
|
2005-10-15 04:49:52 +02:00
|
|
|
HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
|
2005-06-09 01:02:05 +02:00
|
|
|
|
|
|
|
/* Insert all the already-existing joinrels */
|
|
|
|
foreach(l, root->join_rel_list)
|
|
|
|
{
|
|
|
|
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
|
|
|
|
JoinHashEntry *hentry;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
hentry = (JoinHashEntry *) hash_search(hashtab,
|
|
|
|
&(rel->relids),
|
|
|
|
HASH_ENTER,
|
|
|
|
&found);
|
|
|
|
Assert(!found);
|
|
|
|
hentry->join_rel = rel;
|
|
|
|
}
|
|
|
|
|
|
|
|
root->join_rel_hash = hashtab;
|
|
|
|
}
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
|
|
|
* find_join_rel
|
2003-02-08 21:20:55 +01:00
|
|
|
* Returns relation entry corresponding to 'relids' (a set of RT indexes),
|
2000-02-07 05:41:04 +01:00
|
|
|
* or NULL if none exists. This is for join relations.
|
|
|
|
*/
|
2004-02-17 01:52:53 +01:00
|
|
|
RelOptInfo *
|
2005-06-06 00:32:58 +02:00
|
|
|
find_join_rel(PlannerInfo *root, Relids relids)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2005-06-09 01:02:05 +02:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Switch to using hash lookup when list grows "too long". The threshold
|
2005-06-09 01:02:05 +02:00
|
|
|
* is arbitrary and is known only here.
|
|
|
|
*/
|
|
|
|
if (!root->join_rel_hash && list_length(root->join_rel_list) > 32)
|
|
|
|
build_join_rel_hash(root);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2005-06-09 01:02:05 +02:00
|
|
|
/*
|
|
|
|
* Use either hashtable lookup or linear search, as appropriate.
|
|
|
|
*
|
2005-11-22 19:17:34 +01:00
|
|
|
* Note: the seemingly redundant hashkey variable is used to avoid taking
|
|
|
|
* the address of relids; unless the compiler is exceedingly smart, doing
|
|
|
|
* so would force relids out of a register and thus probably slow down the
|
2005-10-15 04:49:52 +02:00
|
|
|
* list-search case.
|
2005-06-09 01:02:05 +02:00
|
|
|
*/
|
|
|
|
if (root->join_rel_hash)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2005-06-09 01:02:05 +02:00
|
|
|
Relids hashkey = relids;
|
|
|
|
JoinHashEntry *hentry;
|
|
|
|
|
|
|
|
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
|
|
|
|
&hashkey,
|
|
|
|
HASH_FIND,
|
|
|
|
NULL);
|
|
|
|
if (hentry)
|
|
|
|
return hentry->join_rel;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ListCell *l;
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2005-06-09 01:02:05 +02:00
|
|
|
foreach(l, root->join_rel_list)
|
|
|
|
{
|
|
|
|
RelOptInfo *rel = (RelOptInfo *) lfirst(l);
|
|
|
|
|
|
|
|
if (bms_equal(rel->relids, relids))
|
|
|
|
return rel;
|
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2001-05-20 22:28:20 +02:00
|
|
|
* build_join_rel
|
2000-02-07 05:41:04 +01:00
|
|
|
* Returns relation entry corresponding to the union of two given rels,
|
|
|
|
* creating a new relation entry if none already exists.
|
|
|
|
*
|
2003-02-08 21:20:55 +01:00
|
|
|
* 'joinrelids' is the Relids set that uniquely identifies the join
|
2000-02-07 05:41:04 +01:00
|
|
|
* 'outer_rel' and 'inner_rel' are relation nodes for the relations to be
|
|
|
|
* joined
|
2008-08-14 20:48:00 +02:00
|
|
|
* 'sjinfo': join context info
|
2000-02-07 05:41:04 +01:00
|
|
|
* 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr
|
|
|
|
* receives the list of RestrictInfo nodes that apply to this
|
|
|
|
* particular pair of joinable relations.
|
|
|
|
*
|
|
|
|
* restrictlist_ptr makes the routine's API a little grotty, but it saves
|
|
|
|
* duplicated calculation of the restrictlist...
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-07-18 06:22:52 +02:00
|
|
|
RelOptInfo *
|
2005-06-06 00:32:58 +02:00
|
|
|
build_join_rel(PlannerInfo *root,
|
2003-02-08 21:20:55 +01:00
|
|
|
Relids joinrelids,
|
2001-05-20 22:28:20 +02:00
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel,
|
2008-08-14 20:48:00 +02:00
|
|
|
SpecialJoinInfo *sjinfo,
|
2001-05-20 22:28:20 +02:00
|
|
|
List **restrictlist_ptr)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-02-07 05:41:04 +01:00
|
|
|
RelOptInfo *joinrel;
|
|
|
|
List *restrictlist;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* See if we already have a joinrel for this set of base rels.
|
|
|
|
*/
|
|
|
|
joinrel = find_join_rel(root, joinrelids);
|
|
|
|
|
|
|
|
if (joinrel)
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Yes, so we only need to figure the restrictlist for this particular
|
|
|
|
* pair of component relations.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
|
|
|
if (restrictlist_ptr)
|
2001-10-18 18:11:42 +02:00
|
|
|
*restrictlist_ptr = build_joinrel_restrictlist(root,
|
|
|
|
joinrel,
|
2000-02-07 05:41:04 +01:00
|
|
|
outer_rel,
|
2007-01-20 21:45:41 +01:00
|
|
|
inner_rel);
|
2000-02-07 05:41:04 +01:00
|
|
|
return joinrel;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Nope, so make one.
|
|
|
|
*/
|
|
|
|
joinrel = makeNode(RelOptInfo);
|
2002-03-12 01:52:10 +01:00
|
|
|
joinrel->reloptkind = RELOPT_JOINREL;
|
2003-02-08 21:20:55 +01:00
|
|
|
joinrel->relids = bms_copy(joinrelids);
|
2000-02-07 05:41:04 +01:00
|
|
|
joinrel->rows = 0;
|
|
|
|
joinrel->width = 0;
|
2012-09-02 00:16:24 +02:00
|
|
|
/* cheap startup cost is interesting iff not all tuples to be retrieved */
|
|
|
|
joinrel->consider_startup = (root->tuple_fraction > 0);
|
Fix planner's cost estimation for SEMI/ANTI joins with inner indexscans.
When the inner side of a nestloop SEMI or ANTI join is an indexscan that
uses all the join clauses as indexquals, it can be presumed that both
matched and unmatched outer rows will be processed very quickly: for
matched rows, we'll stop after fetching one row from the indexscan, while
for unmatched rows we'll have an indexscan that finds no matching index
entries, which should also be quick. The planner already knew about this,
but it was nonetheless charging for at least one full run of the inner
indexscan, as a consequence of concerns about the behavior of materialized
inner scans --- but those concerns don't apply in the fast case. If the
inner side has low cardinality (many matching rows) this could make an
indexscan plan look far more expensive than it actually is. To fix,
rearrange the work in initial_cost_nestloop/final_cost_nestloop so that we
don't add the inner scan cost until we've inspected the indexquals, and
then we can add either the full-run cost or just the first tuple's cost as
appropriate.
Experimentation with this fix uncovered another problem: add_path and
friends were coded to disregard cheap startup cost when considering
parameterized paths. That's usually okay (and desirable, because it thins
the path herd faster); but in this fast case for SEMI/ANTI joins, it could
result in throwing away the desired plain indexscan path in favor of a
bitmap scan path before we ever get to the join costing logic. In the
many-matching-rows cases of interest here, a bitmap scan will do a lot more
work than required, so this is a problem. To fix, add a per-relation flag
consider_param_startup that works like the existing consider_startup flag,
but applies to parameterized paths, and set it for relations that are the
inside of a SEMI or ANTI join.
To make this patch reasonably safe to back-patch, care has been taken to
avoid changing the planner's behavior except in the very narrow case of
SEMI/ANTI joins with inner indexscans. There are places in
compare_path_costs_fuzzily and add_path_precheck that are not terribly
consistent with the new approach, but changing them will affect planner
decisions at the margins in other cases, so we'll leave that for a
HEAD-only fix.
Back-patch to 9.3; before that, the consider_startup flag didn't exist,
meaning that the second aspect of the patch would be too invasive.
Per a complaint from Peter Holzer and analysis by Tomas Vondra.
2015-06-03 17:58:47 +02:00
|
|
|
joinrel->consider_param_startup = false;
|
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating
whether a plan for that relation could conceivably be run inside of
a parallel worker. Right now, we're pretty conservative: for example,
it might be possible to defer applying a parallel-restricted qual
in a worker, and later do it in the leader, but right now we just
don't try to parallelize access to that relation. That's probably
the right decision in most cases, anyway.
Using the new flag, generate parallel sequential scan plans for plain
baserels, meaning that we now have parallel sequential scan in
PostgreSQL. The logic here is pretty unsophisticated right now: the
costing model probably isn't right in detail, and we can't push joins
beneath Gather nodes, so the number of plans that can actually benefit
from this is pretty limited right now. Lots more work is needed.
Nevertheless, it seems time to enable this functionality so that all
this code can actually be tested easily by users and developers.
Note that, if you wish to test this functionality, it will be
necessary to set max_parallel_degree to a value greater than the
default of 0. Once a few more loose ends have been tidied up here, we
might want to consider changing the default value of this GUC, but
I'm leaving it alone for now.
Along the way, fix a bug in cost_gather: the previous coding thought
that a Gather node's transfer overhead should be costed on the basis of
the relation size rather than the number of tuples that actually need
to be passed off to the leader.
Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
|
|
|
joinrel->consider_parallel = false;
|
2004-06-01 05:03:05 +02:00
|
|
|
joinrel->reltargetlist = NIL;
|
2000-02-07 05:41:04 +01:00
|
|
|
joinrel->pathlist = NIL;
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
joinrel->ppilist = NIL;
|
2016-01-20 20:29:22 +01:00
|
|
|
joinrel->partial_pathlist = NIL;
|
2000-02-15 21:49:31 +01:00
|
|
|
joinrel->cheapest_startup_path = NULL;
|
|
|
|
joinrel->cheapest_total_path = NULL;
|
2003-01-20 19:55:07 +01:00
|
|
|
joinrel->cheapest_unique_path = NULL;
|
2012-01-28 01:26:38 +01:00
|
|
|
joinrel->cheapest_parameterized_paths = NIL;
|
2015-12-11 21:52:16 +01:00
|
|
|
/* init direct_lateral_relids from children; we'll finish it up below */
|
|
|
|
joinrel->direct_lateral_relids =
|
|
|
|
bms_union(outer_rel->direct_lateral_relids,
|
|
|
|
inner_rel->direct_lateral_relids);
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
joinrel->lateral_relids = min_join_parameterization(root, joinrel->relids,
|
|
|
|
outer_rel, inner_rel);
|
2003-02-08 21:20:55 +01:00
|
|
|
joinrel->relid = 0; /* indicates not a baserel */
|
2002-05-12 22:10:05 +02:00
|
|
|
joinrel->rtekind = RTE_JOIN;
|
2003-06-30 01:05:05 +02:00
|
|
|
joinrel->min_attr = 0;
|
|
|
|
joinrel->max_attr = 0;
|
|
|
|
joinrel->attr_needed = NULL;
|
|
|
|
joinrel->attr_widths = NULL;
|
2012-08-27 04:48:55 +02:00
|
|
|
joinrel->lateral_vars = NIL;
|
2013-08-18 02:22:37 +02:00
|
|
|
joinrel->lateral_referencers = NULL;
|
2001-05-20 22:28:20 +02:00
|
|
|
joinrel->indexlist = NIL;
|
2000-02-07 05:41:04 +01:00
|
|
|
joinrel->pages = 0;
|
|
|
|
joinrel->tuples = 0;
|
2011-10-14 23:23:01 +02:00
|
|
|
joinrel->allvisfrac = 0;
|
2000-09-29 20:21:41 +02:00
|
|
|
joinrel->subplan = NULL;
|
2011-09-03 21:35:12 +02:00
|
|
|
joinrel->subroot = NULL;
|
Fix PARAM_EXEC assignment mechanism to be safe in the presence of WITH.
The planner previously assumed that parameter Vars having the same absolute
query level, varno, and varattno could safely be assigned the same runtime
PARAM_EXEC slot, even though they might be different Vars appearing in
different subqueries. This was (probably) safe before the introduction of
CTEs, but the lazy-evalution mechanism used for CTEs means that a CTE can
be executed during execution of some other subquery, causing the lifespan
of Params at the same syntactic nesting level as the CTE to overlap with
use of the same slots inside the CTE. In 9.1 we created additional hazards
by using the same parameter-assignment technology for nestloop inner scan
parameters, but it was broken before that, as illustrated by the added
regression test.
To fix, restructure the planner's management of PlannerParamItems so that
items having different semantic lifespans are kept rigorously separated.
This will probably result in complex queries using more runtime PARAM_EXEC
slots than before, but the slots are cheap enough that this hardly matters.
Also, stop generating PlannerParamItems containing Params for subquery
outputs: all we really need to do is reserve the PARAM_EXEC slot number,
and that now only takes incrementing a counter. The planning code is
simpler and probably faster than before, as well as being more correct.
Per report from Vik Reykja.
These changes will mostly also need to be made in the back branches, but
I'm going to hold off on that until after 9.2.0 wraps.
2012-09-05 18:54:03 +02:00
|
|
|
joinrel->subplan_params = NIL;
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
joinrel->serverid = InvalidOid;
|
2016-01-28 20:05:36 +01:00
|
|
|
joinrel->umid = InvalidOid;
|
Revise FDW planning API, again.
Further reflection shows that a single callback isn't very workable if we
desire to let FDWs generate multiple Paths, because that forces the FDW to
do all work necessary to generate a valid Plan node for each Path. Instead
split the former PlanForeignScan API into three steps: GetForeignRelSize,
GetForeignPaths, GetForeignPlan. We had already bit the bullet of breaking
the 9.1 FDW API for 9.2, so this shouldn't cause very much additional pain,
and it's substantially more flexible for complex FDWs.
Add an fdw_private field to RelOptInfo so that the new functions can save
state there rather than possibly having to recalculate information two or
three times.
In addition, we'd not thought through what would be needed to allow an FDW
to set up subexpressions of its choice for runtime execution. We could
treat ForeignScan.fdw_private as an executable expression but that seems
likely to break existing FDWs unnecessarily (in particular, it would
restrict the set of node types allowable in fdw_private to those supported
by expression_tree_walker). Instead, invent a separate field fdw_exprs
which will receive the postprocessing appropriate for expression trees.
(One field is enough since it can be a list of expressions; also, we assume
the corresponding expression state tree(s) will be held within fdw_state,
so we don't need to add anything to ForeignScanState.)
Per review of Hanada Shigeru's pgsql_fdw patch. We may need to tweak this
further as we continue to work on that patch, but to me it feels a lot
closer to being right now.
2012-03-09 18:48:48 +01:00
|
|
|
joinrel->fdwroutine = NULL;
|
|
|
|
joinrel->fdw_private = NULL;
|
2000-02-07 05:41:04 +01:00
|
|
|
joinrel->baserestrictinfo = NIL;
|
2003-01-12 23:35:29 +01:00
|
|
|
joinrel->baserestrictcost.startup = 0;
|
|
|
|
joinrel->baserestrictcost.per_tuple = 0;
|
2000-02-07 05:41:04 +01:00
|
|
|
joinrel->joininfo = NIL;
|
2007-01-20 21:45:41 +01:00
|
|
|
joinrel->has_eclass_joins = false;
|
2000-02-07 05:41:04 +01:00
|
|
|
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
/*
|
|
|
|
* Set up foreign-join fields if outer and inner relation are foreign
|
2016-01-28 20:05:36 +01:00
|
|
|
* tables (or joins) belonging to the same server and using the same
|
|
|
|
* user mapping.
|
|
|
|
*
|
|
|
|
* Otherwise those fields are left invalid, so FDW API will not be called
|
|
|
|
* for the join relation.
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
*/
|
|
|
|
if (OidIsValid(outer_rel->serverid) &&
|
2016-01-28 20:05:36 +01:00
|
|
|
inner_rel->serverid == outer_rel->serverid &&
|
|
|
|
inner_rel->umid == outer_rel->umid)
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
{
|
2016-01-28 20:05:36 +01:00
|
|
|
Assert(OidIsValid(outer_rel->umid));
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
joinrel->serverid = outer_rel->serverid;
|
2016-01-28 20:05:36 +01:00
|
|
|
joinrel->umid = outer_rel->umid;
|
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
2015-05-10 20:36:30 +02:00
|
|
|
joinrel->fdwroutine = outer_rel->fdwroutine;
|
|
|
|
}
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Create a new tlist containing just the vars that need to be output from
|
|
|
|
* this join (ie, are needed for higher joinclauses or final output).
|
2005-06-06 06:13:36 +02:00
|
|
|
*
|
2005-10-15 04:49:52 +02:00
|
|
|
* NOTE: the tlist order for a join rel will depend on which pair of outer
|
|
|
|
* and inner rels we first try to build it from. But the contents should
|
|
|
|
* be the same regardless.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2005-06-06 06:13:36 +02:00
|
|
|
build_joinrel_tlist(root, joinrel, outer_rel);
|
|
|
|
build_joinrel_tlist(root, joinrel, inner_rel);
|
2008-10-21 22:42:53 +02:00
|
|
|
add_placeholders_to_joinrel(root, joinrel);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2015-12-11 21:52:16 +01:00
|
|
|
/*
|
|
|
|
* add_placeholders_to_joinrel also took care of adding the ph_lateral
|
|
|
|
* sets of any PlaceHolderVars computed here to direct_lateral_relids, so
|
|
|
|
* now we can finish computing that. This is much like the computation of
|
|
|
|
* the transitively-closed lateral_relids in min_join_parameterization,
|
|
|
|
* except that here we *do* have to consider the added PHVs.
|
|
|
|
*/
|
|
|
|
joinrel->direct_lateral_relids =
|
|
|
|
bms_del_members(joinrel->direct_lateral_relids, joinrel->relids);
|
|
|
|
if (bms_is_empty(joinrel->direct_lateral_relids))
|
|
|
|
joinrel->direct_lateral_relids = NULL;
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2000-04-12 19:17:23 +02:00
|
|
|
* Construct restrict and join clause lists for the new joinrel. (The
|
2005-10-15 04:49:52 +02:00
|
|
|
* caller might or might not need the restrictlist, but I need it anyway
|
|
|
|
* for set_joinrel_size_estimates().)
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
restrictlist = build_joinrel_restrictlist(root, joinrel,
|
|
|
|
outer_rel, inner_rel);
|
2000-02-07 05:41:04 +01:00
|
|
|
if (restrictlist_ptr)
|
|
|
|
*restrictlist_ptr = restrictlist;
|
|
|
|
build_joinrel_joinlist(joinrel, outer_rel, inner_rel);
|
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
/*
|
|
|
|
* This is also the right place to check whether the joinrel has any
|
|
|
|
* pending EquivalenceClass joins.
|
|
|
|
*/
|
|
|
|
joinrel->has_eclass_joins = has_relevant_eclass_joinclause(root, joinrel);
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
|
|
|
* Set estimates of the joinrel's size.
|
|
|
|
*/
|
|
|
|
set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
|
2008-08-14 20:48:00 +02:00
|
|
|
sjinfo, restrictlist);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating
whether a plan for that relation could conceivably be run inside of
a parallel worker. Right now, we're pretty conservative: for example,
it might be possible to defer applying a parallel-restricted qual
in a worker, and later do it in the leader, but right now we just
don't try to parallelize access to that relation. That's probably
the right decision in most cases, anyway.
Using the new flag, generate parallel sequential scan plans for plain
baserels, meaning that we now have parallel sequential scan in
PostgreSQL. The logic here is pretty unsophisticated right now: the
costing model probably isn't right in detail, and we can't push joins
beneath Gather nodes, so the number of plans that can actually benefit
from this is pretty limited right now. Lots more work is needed.
Nevertheless, it seems time to enable this functionality so that all
this code can actually be tested easily by users and developers.
Note that, if you wish to test this functionality, it will be
necessary to set max_parallel_degree to a value greater than the
default of 0. Once a few more loose ends have been tidied up here, we
might want to consider changing the default value of this GUC, but
I'm leaving it alone for now.
Along the way, fix a bug in cost_gather: the previous coding thought
that a Gather node's transfer overhead should be costed on the basis of
the relation size rather than the number of tuples that actually need
to be passed off to the leader.
Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
|
|
|
/*
|
|
|
|
* Set the consider_parallel flag if this joinrel could potentially be
|
|
|
|
* scanned within a parallel worker. If this flag is false for either
|
|
|
|
* inner_rel or outer_rel, then it must be false for the joinrel also.
|
2015-12-08 00:56:14 +01:00
|
|
|
* Even if both are true, there might be parallel-restricted quals at our
|
|
|
|
* level.
|
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating
whether a plan for that relation could conceivably be run inside of
a parallel worker. Right now, we're pretty conservative: for example,
it might be possible to defer applying a parallel-restricted qual
in a worker, and later do it in the leader, but right now we just
don't try to parallelize access to that relation. That's probably
the right decision in most cases, anyway.
Using the new flag, generate parallel sequential scan plans for plain
baserels, meaning that we now have parallel sequential scan in
PostgreSQL. The logic here is pretty unsophisticated right now: the
costing model probably isn't right in detail, and we can't push joins
beneath Gather nodes, so the number of plans that can actually benefit
from this is pretty limited right now. Lots more work is needed.
Nevertheless, it seems time to enable this functionality so that all
this code can actually be tested easily by users and developers.
Note that, if you wish to test this functionality, it will be
necessary to set max_parallel_degree to a value greater than the
default of 0. Once a few more loose ends have been tidied up here, we
might want to consider changing the default value of this GUC, but
I'm leaving it alone for now.
Along the way, fix a bug in cost_gather: the previous coding thought
that a Gather node's transfer overhead should be costed on the basis of
the relation size rather than the number of tuples that actually need
to be passed off to the leader.
Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
|
|
|
*
|
2015-12-08 00:56:14 +01:00
|
|
|
* Note that if there are more than two rels in this relation, they could
|
|
|
|
* be divided between inner_rel and outer_rel in any arbitary way. We
|
|
|
|
* assume this doesn't matter, because we should hit all the same baserels
|
|
|
|
* and joinclauses while building up to this joinrel no matter which we
|
|
|
|
* take; therefore, we should make the same decision here however we get
|
|
|
|
* here.
|
Generate parallel sequential scan plans in simple cases.
Add a new flag, consider_parallel, to each RelOptInfo, indicating
whether a plan for that relation could conceivably be run inside of
a parallel worker. Right now, we're pretty conservative: for example,
it might be possible to defer applying a parallel-restricted qual
in a worker, and later do it in the leader, but right now we just
don't try to parallelize access to that relation. That's probably
the right decision in most cases, anyway.
Using the new flag, generate parallel sequential scan plans for plain
baserels, meaning that we now have parallel sequential scan in
PostgreSQL. The logic here is pretty unsophisticated right now: the
costing model probably isn't right in detail, and we can't push joins
beneath Gather nodes, so the number of plans that can actually benefit
from this is pretty limited right now. Lots more work is needed.
Nevertheless, it seems time to enable this functionality so that all
this code can actually be tested easily by users and developers.
Note that, if you wish to test this functionality, it will be
necessary to set max_parallel_degree to a value greater than the
default of 0. Once a few more loose ends have been tidied up here, we
might want to consider changing the default value of this GUC, but
I'm leaving it alone for now.
Along the way, fix a bug in cost_gather: the previous coding thought
that a Gather node's transfer overhead should be costed on the basis of
the relation size rather than the number of tuples that actually need
to be passed off to the leader.
Patch by me, reviewed in earlier versions by Amit Kapila.
2015-11-11 15:02:52 +01:00
|
|
|
*/
|
|
|
|
if (inner_rel->consider_parallel && outer_rel->consider_parallel &&
|
|
|
|
!has_parallel_hazard((Node *) restrictlist, false))
|
|
|
|
joinrel->consider_parallel = true;
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Add the joinrel to the query's joinrel list, and store it into the
|
|
|
|
* auxiliary hashtable if there is one. NB: GEQO requires us to append
|
|
|
|
* the new joinrel to the end of the list!
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2005-06-09 01:02:05 +02:00
|
|
|
root->join_rel_list = lappend(root->join_rel_list, joinrel);
|
|
|
|
|
|
|
|
if (root->join_rel_hash)
|
|
|
|
{
|
|
|
|
JoinHashEntry *hentry;
|
|
|
|
bool found;
|
|
|
|
|
|
|
|
hentry = (JoinHashEntry *) hash_search(root->join_rel_hash,
|
|
|
|
&(joinrel->relids),
|
|
|
|
HASH_ENTER,
|
|
|
|
&found);
|
|
|
|
Assert(!found);
|
|
|
|
hentry->join_rel = joinrel;
|
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2009-11-28 01:46:19 +01:00
|
|
|
/*
|
|
|
|
* Also, if dynamic-programming join search is active, add the new joinrel
|
2014-05-06 18:12:18 +02:00
|
|
|
* to the appropriate sublist. Note: you might think the Assert on number
|
2010-02-26 03:01:40 +01:00
|
|
|
* of members should be for equality, but some of the level 1 rels might
|
|
|
|
* have been joinrels already, so we can only assert <=.
|
2009-11-28 01:46:19 +01:00
|
|
|
*/
|
|
|
|
if (root->join_rel_level)
|
|
|
|
{
|
|
|
|
Assert(root->join_cur_level > 0);
|
|
|
|
Assert(root->join_cur_level <= bms_num_members(joinrel->relids));
|
|
|
|
root->join_rel_level[root->join_cur_level] =
|
|
|
|
lappend(root->join_rel_level[root->join_cur_level], joinrel);
|
|
|
|
}
|
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
return joinrel;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2015-12-07 23:41:45 +01:00
|
|
|
/*
|
|
|
|
* min_join_parameterization
|
|
|
|
*
|
|
|
|
* Determine the minimum possible parameterization of a joinrel, that is, the
|
|
|
|
* set of other rels it contains LATERAL references to. We save this value in
|
|
|
|
* the join's RelOptInfo. This function is split out of build_join_rel()
|
|
|
|
* because join_is_legal() needs the value to check a prospective join.
|
|
|
|
*/
|
|
|
|
Relids
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
min_join_parameterization(PlannerInfo *root,
|
|
|
|
Relids joinrelids,
|
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel)
|
2015-12-07 23:41:45 +01:00
|
|
|
{
|
|
|
|
Relids result;
|
|
|
|
|
|
|
|
/*
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
* Basically we just need the union of the inputs' lateral_relids, less
|
|
|
|
* whatever is already in the join.
|
|
|
|
*
|
|
|
|
* It's not immediately obvious that this is a valid way to compute the
|
|
|
|
* result, because it might seem that we're ignoring possible lateral refs
|
|
|
|
* of PlaceHolderVars that are due to be computed at the join but not in
|
|
|
|
* either input. However, because create_lateral_join_info() already
|
|
|
|
* charged all such PHV refs to each member baserel of the join, they'll
|
|
|
|
* be accounted for already in the inputs' lateral_relids. Likewise, we
|
|
|
|
* do not need to worry about doing transitive closure here, because that
|
|
|
|
* was already accounted for in the original baserel lateral_relids.
|
2015-12-07 23:41:45 +01:00
|
|
|
*/
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 20:22:20 +01:00
|
|
|
result = bms_union(outer_rel->lateral_relids, inner_rel->lateral_relids);
|
2015-12-07 23:41:45 +01:00
|
|
|
result = bms_del_members(result, joinrelids);
|
|
|
|
|
|
|
|
/* Maintain invariant that result is exactly NULL if empty */
|
|
|
|
if (bms_is_empty(result))
|
|
|
|
result = NULL;
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
1997-09-07 07:04:48 +02:00
|
|
|
/*
|
2003-06-30 01:05:05 +02:00
|
|
|
* build_joinrel_tlist
|
2008-10-21 22:42:53 +02:00
|
|
|
* Builds a join relation's target list from an input relation.
|
|
|
|
* (This is invoked twice to handle the two input relations.)
|
1997-09-07 07:04:48 +02:00
|
|
|
*
|
2003-06-30 01:05:05 +02:00
|
|
|
* The join's targetlist includes all Vars of its member relations that
|
2005-06-06 06:13:36 +02:00
|
|
|
* will still be needed above the join. This subroutine adds all such
|
|
|
|
* Vars from the specified input rel's tlist to the join rel's tlist.
|
2000-02-07 05:41:04 +01:00
|
|
|
*
|
2003-06-30 01:05:05 +02:00
|
|
|
* We also compute the expected width of the join's output, making use
|
|
|
|
* of data that was cached at the baserel level by set_rel_width().
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2003-06-30 01:05:05 +02:00
|
|
|
static void
|
2005-06-06 06:13:36 +02:00
|
|
|
build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
|
|
|
|
RelOptInfo *input_rel)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2003-06-30 01:05:05 +02:00
|
|
|
Relids relids = joinrel->relids;
|
2005-06-06 06:13:36 +02:00
|
|
|
ListCell *vars;
|
1999-08-16 04:17:58 +02:00
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
foreach(vars, input_rel->reltargetlist)
|
2003-06-30 01:05:05 +02:00
|
|
|
{
|
2012-08-27 04:48:55 +02:00
|
|
|
Var *var = (Var *) lfirst(vars);
|
2005-06-06 06:13:36 +02:00
|
|
|
RelOptInfo *baserel;
|
|
|
|
int ndx;
|
|
|
|
|
2008-10-21 22:42:53 +02:00
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* Ignore PlaceHolderVars in the input tlists; we'll make our own
|
|
|
|
* decisions about whether to copy them.
|
2008-10-21 22:42:53 +02:00
|
|
|
*/
|
2012-08-27 04:48:55 +02:00
|
|
|
if (IsA(var, PlaceHolderVar))
|
2008-10-21 22:42:53 +02:00
|
|
|
continue;
|
|
|
|
|
2006-01-31 22:39:25 +01:00
|
|
|
/*
|
2012-08-27 04:48:55 +02:00
|
|
|
* Otherwise, anything in a baserel or joinrel targetlist ought to be
|
|
|
|
* a Var. (More general cases can only appear in appendrel child
|
|
|
|
* rels, which will never be seen here.)
|
2006-01-31 22:39:25 +01:00
|
|
|
*/
|
2012-08-27 04:48:55 +02:00
|
|
|
if (!IsA(var, Var))
|
|
|
|
elog(ERROR, "unexpected node type in reltargetlist: %d",
|
|
|
|
(int) nodeTag(var));
|
2003-06-30 01:05:05 +02:00
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
/* Get the Var's original base rel */
|
|
|
|
baserel = find_base_rel(root, var->varno);
|
2003-06-30 01:05:05 +02:00
|
|
|
|
2005-06-06 06:13:36 +02:00
|
|
|
/* Is it still needed above this joinrel? */
|
|
|
|
ndx = var->varattno - baserel->min_attr;
|
|
|
|
if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
|
2003-06-30 01:05:05 +02:00
|
|
|
{
|
2005-06-06 06:13:36 +02:00
|
|
|
/* Yup, add it to the output */
|
2012-08-27 04:48:55 +02:00
|
|
|
joinrel->reltargetlist = lappend(joinrel->reltargetlist, var);
|
2005-06-06 06:13:36 +02:00
|
|
|
joinrel->width += baserel->attr_widths[ndx];
|
2003-06-30 01:05:05 +02:00
|
|
|
}
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* build_joinrel_restrictlist
|
|
|
|
* build_joinrel_joinlist
|
|
|
|
* These routines build lists of restriction and join clauses for a
|
|
|
|
* join relation from the joininfo lists of the relations it joins.
|
|
|
|
*
|
|
|
|
* These routines are separate because the restriction list must be
|
|
|
|
* built afresh for each pair of input sub-relations we consider, whereas
|
2005-06-09 06:19:00 +02:00
|
|
|
* the join list need only be computed once for any join RelOptInfo.
|
|
|
|
* The join list is fully determined by the set of rels making up the
|
2000-02-07 05:41:04 +01:00
|
|
|
* joinrel, so we should get the same results (up to ordering) from any
|
2014-05-06 18:12:18 +02:00
|
|
|
* candidate pair of sub-relations. But the restriction list is whatever
|
2000-02-07 05:41:04 +01:00
|
|
|
* is not handled in the sub-relations, so it depends on which
|
|
|
|
* sub-relations are considered.
|
|
|
|
*
|
|
|
|
* If a join clause from an input relation refers to base rels still not
|
|
|
|
* present in the joinrel, then it is still a join clause for the joinrel;
|
2005-06-09 06:19:00 +02:00
|
|
|
* we put it into the joininfo list for the joinrel. Otherwise,
|
2000-02-07 05:41:04 +01:00
|
|
|
* the clause is now a restrict clause for the joined relation, and we
|
|
|
|
* return it to the caller of build_joinrel_restrictlist() to be stored in
|
2014-05-06 18:12:18 +02:00
|
|
|
* join paths made from this pair of sub-relations. (It will not need to
|
2000-02-07 05:41:04 +01:00
|
|
|
* be considered further up the join tree.)
|
|
|
|
*
|
2007-01-20 21:45:41 +01:00
|
|
|
* In many case we will find the same RestrictInfos in both input
|
|
|
|
* relations' joinlists, so be careful to eliminate duplicates.
|
|
|
|
* Pointer equality should be a sufficient test for dups, since all
|
|
|
|
* the various joinlist entries ultimately refer to RestrictInfos
|
|
|
|
* pushed into them by distribute_restrictinfo_to_rels().
|
2001-10-18 18:11:42 +02:00
|
|
|
*
|
2000-02-07 05:41:04 +01:00
|
|
|
* 'joinrel' is a join relation node
|
|
|
|
* 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
|
|
|
|
* to form joinrel.
|
|
|
|
*
|
|
|
|
* build_joinrel_restrictlist() returns a list of relevant restrictinfos,
|
|
|
|
* whereas build_joinrel_joinlist() stores its results in the joinrel's
|
2005-06-09 06:19:00 +02:00
|
|
|
* joininfo list. One or the other must accept each given clause!
|
2000-02-07 05:41:04 +01:00
|
|
|
*
|
|
|
|
* NB: Formerly, we made deep(!) copies of each input RestrictInfo to pass
|
|
|
|
* up to the join relation. I believe this is no longer necessary, because
|
2014-05-06 18:12:18 +02:00
|
|
|
* RestrictInfo nodes are no longer context-dependent. Instead, just include
|
2000-02-07 05:41:04 +01:00
|
|
|
* the original nodes in the lists made for the join relation.
|
|
|
|
*/
|
|
|
|
static List *
|
2005-06-06 00:32:58 +02:00
|
|
|
build_joinrel_restrictlist(PlannerInfo *root,
|
2001-10-18 18:11:42 +02:00
|
|
|
RelOptInfo *joinrel,
|
2000-02-07 05:41:04 +01:00
|
|
|
RelOptInfo *outer_rel,
|
2007-01-20 21:45:41 +01:00
|
|
|
RelOptInfo *inner_rel)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2002-11-24 22:52:15 +01:00
|
|
|
List *result;
|
2001-10-18 18:11:42 +02:00
|
|
|
|
|
|
|
/*
|
2007-01-20 21:45:41 +01:00
|
|
|
* Collect all the clauses that syntactically belong at this level,
|
|
|
|
* eliminating any duplicates (important since we will see many of the
|
|
|
|
* same clauses arriving from both input relations).
|
2001-10-18 18:11:42 +02:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
result = subbuild_joinrel_restrictlist(joinrel, outer_rel->joininfo, NIL);
|
|
|
|
result = subbuild_joinrel_restrictlist(joinrel, inner_rel->joininfo, result);
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2000-02-07 05:41:04 +01:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Add on any clauses derived from EquivalenceClasses. These cannot be
|
2007-01-20 21:45:41 +01:00
|
|
|
* redundant with the clauses in the joininfo lists, so don't bother
|
|
|
|
* checking.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
result = list_concat(result,
|
|
|
|
generate_join_implied_equalities(root,
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
joinrel->relids,
|
|
|
|
outer_rel->relids,
|
2007-01-20 21:45:41 +01:00
|
|
|
inner_rel));
|
2001-10-18 18:11:42 +02:00
|
|
|
|
|
|
|
return result;
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
build_joinrel_joinlist(RelOptInfo *joinrel,
|
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel)
|
|
|
|
{
|
2007-01-20 21:45:41 +01:00
|
|
|
List *result;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Collect all the clauses that syntactically belong above this level,
|
|
|
|
* eliminating any duplicates (important since we will see many of the
|
|
|
|
* same clauses arriving from both input relations).
|
|
|
|
*/
|
|
|
|
result = subbuild_joinrel_joinlist(joinrel, outer_rel->joininfo, NIL);
|
|
|
|
result = subbuild_joinrel_joinlist(joinrel, inner_rel->joininfo, result);
|
|
|
|
|
|
|
|
joinrel->joininfo = result;
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static List *
|
|
|
|
subbuild_joinrel_restrictlist(RelOptInfo *joinrel,
|
2007-01-20 21:45:41 +01:00
|
|
|
List *joininfo_list,
|
|
|
|
List *new_restrictlist)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2005-06-09 06:19:00 +02:00
|
|
|
ListCell *l;
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2005-06-09 06:19:00 +02:00
|
|
|
foreach(l, joininfo_list)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2005-06-09 06:19:00 +02:00
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2005-06-09 06:19:00 +02:00
|
|
|
if (bms_is_subset(rinfo->required_relids, joinrel->relids))
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* This clause becomes a restriction clause for the joinrel, since
|
2007-01-20 21:45:41 +01:00
|
|
|
* it refers to no outside rels. Add it to the list, being
|
|
|
|
* careful to eliminate duplicates. (Since RestrictInfo nodes in
|
|
|
|
* different joinlists will have been multiply-linked rather than
|
|
|
|
* copied, pointer equality should be a sufficient test.)
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo);
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* This clause is still a join clause at this level, so we ignore
|
|
|
|
* it in this routine.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
return new_restrictlist;
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
|
|
|
|
2007-01-20 21:45:41 +01:00
|
|
|
static List *
|
2000-02-07 05:41:04 +01:00
|
|
|
subbuild_joinrel_joinlist(RelOptInfo *joinrel,
|
2007-01-20 21:45:41 +01:00
|
|
|
List *joininfo_list,
|
|
|
|
List *new_joininfo)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2005-06-09 06:19:00 +02:00
|
|
|
ListCell *l;
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2005-06-09 06:19:00 +02:00
|
|
|
foreach(l, joininfo_list)
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
2005-06-09 06:19:00 +02:00
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
2000-02-07 05:41:04 +01:00
|
|
|
|
2005-06-09 06:19:00 +02:00
|
|
|
if (bms_is_subset(rinfo->required_relids, joinrel->relids))
|
2000-02-07 05:41:04 +01:00
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* This clause becomes a restriction clause for the joinrel, since
|
|
|
|
* it refers to no outside rels. So we can ignore it in this
|
|
|
|
* routine.
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* This clause is still a join clause at this level, so add it to
|
2007-11-15 22:14:46 +01:00
|
|
|
* the new joininfo list, being careful to eliminate duplicates.
|
|
|
|
* (Since RestrictInfo nodes in different joinlists will have been
|
|
|
|
* multiply-linked rather than copied, pointer equality should be
|
|
|
|
* a sufficient test.)
|
2000-02-07 05:41:04 +01:00
|
|
|
*/
|
2007-01-20 21:45:41 +01:00
|
|
|
new_joininfo = list_append_unique_ptr(new_joininfo, rinfo);
|
2000-02-07 05:41:04 +01:00
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2007-01-20 21:45:41 +01:00
|
|
|
|
|
|
|
return new_joininfo;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
|
|
|
|
|
Simplify query_planner's API by having it return the top-level RelOptInfo.
Formerly, query_planner returned one or possibly two Paths for the topmost
join relation, so that grouping_planner didn't see the join RelOptInfo
(at least not directly; it didn't have any hesitation about examining
cheapest_path->parent, though). However, correct selection of the Paths
involved a significant amount of coupling between query_planner and
grouping_planner, a problem which has gotten worse over time. It seems
best to give up on this API choice and instead return the topmost
RelOptInfo explicitly. Then grouping_planner can pull out the Paths it
wants from the rel's path list. In this way we can remove all knowledge
of grouping behaviors from query_planner.
The only real benefit of the old way is that in the case of an empty
FROM clause, we never made any RelOptInfos at all, just a Path. Now
we have to gin up a dummy RelOptInfo to represent the empty FROM clause.
That's not a very big deal though.
While at it, simplify query_planner's API a bit more by having the caller
set up root->tuple_fraction and root->limit_tuples, rather than passing
those values as separate parameters. Since query_planner no longer does
anything with either value, requiring it to fill the PlannerInfo fields
seemed pretty arbitrary.
This patch just rearranges code; it doesn't (intentionally) change any
behaviors. Followup patches will do more interesting things.
2013-08-05 21:00:57 +02:00
|
|
|
/*
|
|
|
|
* build_empty_join_rel
|
|
|
|
* Build a dummy join relation describing an empty set of base rels.
|
|
|
|
*
|
|
|
|
* This is used for queries with empty FROM clauses, such as "SELECT 2+2" or
|
|
|
|
* "INSERT INTO foo VALUES(...)". We don't try very hard to make the empty
|
|
|
|
* joinrel completely valid, since no real planning will be done with it ---
|
|
|
|
* we just need it to carry a simple Result path out of query_planner().
|
|
|
|
*/
|
|
|
|
RelOptInfo *
|
|
|
|
build_empty_join_rel(PlannerInfo *root)
|
|
|
|
{
|
|
|
|
RelOptInfo *joinrel;
|
|
|
|
|
|
|
|
/* The dummy join relation should be the only one ... */
|
|
|
|
Assert(root->join_rel_list == NIL);
|
|
|
|
|
|
|
|
joinrel = makeNode(RelOptInfo);
|
|
|
|
joinrel->reloptkind = RELOPT_JOINREL;
|
|
|
|
joinrel->relids = NULL; /* empty set */
|
|
|
|
joinrel->rows = 1; /* we produce one row for such cases */
|
|
|
|
joinrel->width = 0; /* it contains no Vars */
|
|
|
|
joinrel->rtekind = RTE_JOIN;
|
|
|
|
|
|
|
|
root->join_rel_list = lappend(root->join_rel_list, joinrel);
|
|
|
|
|
|
|
|
return joinrel;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
/*
|
|
|
|
* find_childrel_appendrelinfo
|
|
|
|
* Get the AppendRelInfo associated with an appendrel child rel.
|
|
|
|
*
|
|
|
|
* This search could be eliminated by storing a link in child RelOptInfos,
|
Fix some more problems with nested append relations.
As of commit a87c72915 (which later got backpatched as far as 9.1),
we're explicitly supporting the notion that append relations can be
nested; this can occur when UNION ALL constructs are nested, or when
a UNION ALL contains a table with inheritance children.
Bug #11457 from Nelson Page, as well as an earlier report from Elvis
Pranskevichus, showed that there were still nasty bugs associated with such
cases: in particular the EquivalenceClass mechanism could try to generate
"join" clauses connecting an appendrel child to some grandparent appendrel,
which would result in assertion failures or bogus plans.
Upon investigation I concluded that all current callers of
find_childrel_appendrelinfo() need to be fixed to explicitly consider
multiple levels of parent appendrels. The most complex fix was in
processing of "broken" EquivalenceClasses, which are ECs for which we have
been unable to generate all the derived equality clauses we would like to
because of missing cross-type equality operators in the underlying btree
operator family. That code path is more or less entirely untested by
the regression tests to date, because no standard opfamilies have such
holes in them. So I wrote a new regression test script to try to exercise
it a bit, which turned out to be quite a worthwhile activity as it exposed
existing bugs in all supported branches.
The present patch is essentially the same as far back as 9.2, which is
where parameterized paths were introduced. In 9.0 and 9.1, we only need
to back-patch a small fragment of commit 5b7b5518d, which fixes failure to
propagate out the original WHERE clauses when a broken EC contains constant
members. (The regression test case results show that these older branches
are noticeably stupider than 9.2+ in terms of the quality of the plans
generated; but we don't really care about plan quality in such cases,
only that the plan not be outright wrong. A more invasive fix in the
older branches would not be a good idea anyway from a plan-stability
standpoint.)
2014-10-02 01:30:24 +02:00
|
|
|
* but for now it doesn't seem performance-critical. (Also, it might be
|
|
|
|
* difficult to maintain such a link during mutation of the append_rel_list.)
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
*/
|
|
|
|
AppendRelInfo *
|
|
|
|
find_childrel_appendrelinfo(PlannerInfo *root, RelOptInfo *rel)
|
|
|
|
{
|
|
|
|
Index relid = rel->relid;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Should only be called on child rels */
|
|
|
|
Assert(rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
|
|
|
|
|
|
|
|
foreach(lc, root->append_rel_list)
|
|
|
|
{
|
|
|
|
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
|
|
|
|
|
|
|
|
if (appinfo->child_relid == relid)
|
|
|
|
return appinfo;
|
|
|
|
}
|
|
|
|
/* should have found the entry ... */
|
|
|
|
elog(ERROR, "child rel %d not found in append_rel_list", relid);
|
|
|
|
return NULL; /* not reached */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Fix some more problems with nested append relations.
As of commit a87c72915 (which later got backpatched as far as 9.1),
we're explicitly supporting the notion that append relations can be
nested; this can occur when UNION ALL constructs are nested, or when
a UNION ALL contains a table with inheritance children.
Bug #11457 from Nelson Page, as well as an earlier report from Elvis
Pranskevichus, showed that there were still nasty bugs associated with such
cases: in particular the EquivalenceClass mechanism could try to generate
"join" clauses connecting an appendrel child to some grandparent appendrel,
which would result in assertion failures or bogus plans.
Upon investigation I concluded that all current callers of
find_childrel_appendrelinfo() need to be fixed to explicitly consider
multiple levels of parent appendrels. The most complex fix was in
processing of "broken" EquivalenceClasses, which are ECs for which we have
been unable to generate all the derived equality clauses we would like to
because of missing cross-type equality operators in the underlying btree
operator family. That code path is more or less entirely untested by
the regression tests to date, because no standard opfamilies have such
holes in them. So I wrote a new regression test script to try to exercise
it a bit, which turned out to be quite a worthwhile activity as it exposed
existing bugs in all supported branches.
The present patch is essentially the same as far back as 9.2, which is
where parameterized paths were introduced. In 9.0 and 9.1, we only need
to back-patch a small fragment of commit 5b7b5518d, which fixes failure to
propagate out the original WHERE clauses when a broken EC contains constant
members. (The regression test case results show that these older branches
are noticeably stupider than 9.2+ in terms of the quality of the plans
generated; but we don't really care about plan quality in such cases,
only that the plan not be outright wrong. A more invasive fix in the
older branches would not be a good idea anyway from a plan-stability
standpoint.)
2014-10-02 01:30:24 +02:00
|
|
|
/*
|
|
|
|
* find_childrel_top_parent
|
|
|
|
* Fetch the topmost appendrel parent rel of an appendrel child rel.
|
|
|
|
*
|
|
|
|
* Since appendrels can be nested, a child could have multiple levels of
|
|
|
|
* appendrel ancestors. This function locates the topmost ancestor,
|
|
|
|
* which will be a regular baserel not an otherrel.
|
|
|
|
*/
|
|
|
|
RelOptInfo *
|
|
|
|
find_childrel_top_parent(PlannerInfo *root, RelOptInfo *rel)
|
|
|
|
{
|
|
|
|
do
|
|
|
|
{
|
|
|
|
AppendRelInfo *appinfo = find_childrel_appendrelinfo(root, rel);
|
|
|
|
Index prelid = appinfo->parent_relid;
|
|
|
|
|
|
|
|
/* traverse up to the parent rel, loop if it's also a child rel */
|
|
|
|
rel = find_base_rel(root, prelid);
|
|
|
|
} while (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
|
|
|
|
|
|
|
|
Assert(rel->reloptkind == RELOPT_BASEREL);
|
|
|
|
|
|
|
|
return rel;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* find_childrel_parents
|
|
|
|
* Compute the set of parent relids of an appendrel child rel.
|
|
|
|
*
|
|
|
|
* Since appendrels can be nested, a child could have multiple levels of
|
|
|
|
* appendrel ancestors. This function computes a Relids set of all the
|
|
|
|
* parent relation IDs.
|
|
|
|
*/
|
|
|
|
Relids
|
|
|
|
find_childrel_parents(PlannerInfo *root, RelOptInfo *rel)
|
|
|
|
{
|
|
|
|
Relids result = NULL;
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
AppendRelInfo *appinfo = find_childrel_appendrelinfo(root, rel);
|
|
|
|
Index prelid = appinfo->parent_relid;
|
|
|
|
|
|
|
|
result = bms_add_member(result, prelid);
|
|
|
|
|
|
|
|
/* traverse up to the parent rel, loop if it's also a child rel */
|
|
|
|
rel = find_base_rel(root, prelid);
|
|
|
|
} while (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
|
|
|
|
|
|
|
|
Assert(rel->reloptkind == RELOPT_BASEREL);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
/*
|
|
|
|
* get_baserel_parampathinfo
|
|
|
|
* Get the ParamPathInfo for a parameterized path for a base relation,
|
|
|
|
* constructing one if we don't have one already.
|
|
|
|
*
|
|
|
|
* This centralizes estimating the rowcounts for parameterized paths.
|
|
|
|
* We need to cache those to be sure we use the same rowcount for all paths
|
|
|
|
* of the same parameterization for a given rel. This is also a convenient
|
|
|
|
* place to determine which movable join clauses the parameterized path will
|
|
|
|
* be responsible for evaluating.
|
|
|
|
*/
|
|
|
|
ParamPathInfo *
|
|
|
|
get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
|
|
|
|
Relids required_outer)
|
|
|
|
{
|
|
|
|
ParamPathInfo *ppi;
|
|
|
|
Relids joinrelids;
|
|
|
|
List *pclauses;
|
|
|
|
double rows;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Unparameterized paths have no ParamPathInfo */
|
|
|
|
if (bms_is_empty(required_outer))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
Assert(!bms_overlap(baserel->relids, required_outer));
|
|
|
|
|
|
|
|
/* If we already have a PPI for this parameterization, just return it */
|
|
|
|
foreach(lc, baserel->ppilist)
|
|
|
|
{
|
|
|
|
ppi = (ParamPathInfo *) lfirst(lc);
|
|
|
|
if (bms_equal(ppi->ppi_req_outer, required_outer))
|
|
|
|
return ppi;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Identify all joinclauses that are movable to this base rel given this
|
|
|
|
* parameterization.
|
|
|
|
*/
|
|
|
|
joinrelids = bms_union(baserel->relids, required_outer);
|
|
|
|
pclauses = NIL;
|
|
|
|
foreach(lc, baserel->joininfo)
|
|
|
|
{
|
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
|
|
|
|
|
|
|
if (join_clause_is_movable_into(rinfo,
|
|
|
|
baserel->relids,
|
|
|
|
joinrelids))
|
|
|
|
pclauses = lappend(pclauses, rinfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add in joinclauses generated by EquivalenceClasses, too. (These
|
|
|
|
* necessarily satisfy join_clause_is_movable_into.)
|
|
|
|
*/
|
|
|
|
pclauses = list_concat(pclauses,
|
|
|
|
generate_join_implied_equalities(root,
|
|
|
|
joinrelids,
|
|
|
|
required_outer,
|
|
|
|
baserel));
|
|
|
|
|
|
|
|
/* Estimate the number of rows returned by the parameterized scan */
|
|
|
|
rows = get_parameterized_baserel_size(root, baserel, pclauses);
|
|
|
|
|
|
|
|
/* And now we can build the ParamPathInfo */
|
|
|
|
ppi = makeNode(ParamPathInfo);
|
|
|
|
ppi->ppi_req_outer = required_outer;
|
|
|
|
ppi->ppi_rows = rows;
|
|
|
|
ppi->ppi_clauses = pclauses;
|
|
|
|
baserel->ppilist = lappend(baserel->ppilist, ppi);
|
|
|
|
|
|
|
|
return ppi;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_joinrel_parampathinfo
|
|
|
|
* Get the ParamPathInfo for a parameterized path for a join relation,
|
|
|
|
* constructing one if we don't have one already.
|
|
|
|
*
|
|
|
|
* This centralizes estimating the rowcounts for parameterized paths.
|
|
|
|
* We need to cache those to be sure we use the same rowcount for all paths
|
|
|
|
* of the same parameterization for a given rel. This is also a convenient
|
|
|
|
* place to determine which movable join clauses the parameterized path will
|
|
|
|
* be responsible for evaluating.
|
|
|
|
*
|
|
|
|
* outer_path and inner_path are a pair of input paths that can be used to
|
|
|
|
* construct the join, and restrict_clauses is the list of regular join
|
|
|
|
* clauses (including clauses derived from EquivalenceClasses) that must be
|
|
|
|
* applied at the join node when using these inputs.
|
|
|
|
*
|
|
|
|
* Unlike the situation for base rels, the set of movable join clauses to be
|
|
|
|
* enforced at a join varies with the selected pair of input paths, so we
|
|
|
|
* must calculate that and pass it back, even if we already have a matching
|
|
|
|
* ParamPathInfo. We handle this by adding any clauses moved down to this
|
|
|
|
* join to *restrict_clauses, which is an in/out parameter. (The addition
|
|
|
|
* is done in such a way as to not modify the passed-in List structure.)
|
|
|
|
*
|
|
|
|
* Note: when considering a nestloop join, the caller must have removed from
|
|
|
|
* restrict_clauses any movable clauses that are themselves scheduled to be
|
|
|
|
* pushed into the right-hand path. We do not do that here since it's
|
|
|
|
* unnecessary for other join types.
|
|
|
|
*/
|
|
|
|
ParamPathInfo *
|
|
|
|
get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
|
|
|
|
Path *outer_path,
|
|
|
|
Path *inner_path,
|
|
|
|
SpecialJoinInfo *sjinfo,
|
|
|
|
Relids required_outer,
|
|
|
|
List **restrict_clauses)
|
|
|
|
{
|
|
|
|
ParamPathInfo *ppi;
|
|
|
|
Relids join_and_req;
|
|
|
|
Relids outer_and_req;
|
|
|
|
Relids inner_and_req;
|
|
|
|
List *pclauses;
|
|
|
|
List *eclauses;
|
|
|
|
double rows;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Unparameterized paths have no ParamPathInfo or extra join clauses */
|
|
|
|
if (bms_is_empty(required_outer))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
Assert(!bms_overlap(joinrel->relids, required_outer));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Identify all joinclauses that are movable to this join rel given this
|
|
|
|
* parameterization. These are the clauses that are movable into this
|
|
|
|
* join, but not movable into either input path. Treat an unparameterized
|
|
|
|
* input path as not accepting parameterized clauses (because it won't,
|
|
|
|
* per the shortcut exit above), even though the joinclause movement rules
|
|
|
|
* might allow the same clauses to be moved into a parameterized path for
|
|
|
|
* that rel.
|
|
|
|
*/
|
|
|
|
join_and_req = bms_union(joinrel->relids, required_outer);
|
|
|
|
if (outer_path->param_info)
|
|
|
|
outer_and_req = bms_union(outer_path->parent->relids,
|
|
|
|
PATH_REQ_OUTER(outer_path));
|
|
|
|
else
|
2012-06-10 21:20:04 +02:00
|
|
|
outer_and_req = NULL; /* outer path does not accept parameters */
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
if (inner_path->param_info)
|
|
|
|
inner_and_req = bms_union(inner_path->parent->relids,
|
|
|
|
PATH_REQ_OUTER(inner_path));
|
|
|
|
else
|
2012-06-10 21:20:04 +02:00
|
|
|
inner_and_req = NULL; /* inner path does not accept parameters */
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
|
|
|
|
pclauses = NIL;
|
|
|
|
foreach(lc, joinrel->joininfo)
|
|
|
|
{
|
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
|
|
|
|
|
|
|
if (join_clause_is_movable_into(rinfo,
|
|
|
|
joinrel->relids,
|
|
|
|
join_and_req) &&
|
|
|
|
!join_clause_is_movable_into(rinfo,
|
|
|
|
outer_path->parent->relids,
|
|
|
|
outer_and_req) &&
|
|
|
|
!join_clause_is_movable_into(rinfo,
|
|
|
|
inner_path->parent->relids,
|
|
|
|
inner_and_req))
|
|
|
|
pclauses = lappend(pclauses, rinfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Consider joinclauses generated by EquivalenceClasses, too */
|
|
|
|
eclauses = generate_join_implied_equalities(root,
|
|
|
|
join_and_req,
|
|
|
|
required_outer,
|
|
|
|
joinrel);
|
|
|
|
/* We only want ones that aren't movable to lower levels */
|
|
|
|
foreach(lc, eclauses)
|
|
|
|
{
|
|
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
|
|
|
|
2015-07-28 19:20:39 +02:00
|
|
|
/*
|
|
|
|
* In principle, join_clause_is_movable_into() should accept anything
|
|
|
|
* returned by generate_join_implied_equalities(); but because its
|
|
|
|
* analysis is only approximate, sometimes it doesn't. So we
|
|
|
|
* currently cannot use this Assert; instead just assume it's okay to
|
|
|
|
* apply the joinclause at this level.
|
|
|
|
*/
|
|
|
|
#ifdef NOT_USED
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
Assert(join_clause_is_movable_into(rinfo,
|
|
|
|
joinrel->relids,
|
|
|
|
join_and_req));
|
2015-07-28 19:20:39 +02:00
|
|
|
#endif
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
if (!join_clause_is_movable_into(rinfo,
|
|
|
|
outer_path->parent->relids,
|
|
|
|
outer_and_req) &&
|
|
|
|
!join_clause_is_movable_into(rinfo,
|
|
|
|
inner_path->parent->relids,
|
|
|
|
inner_and_req))
|
|
|
|
pclauses = lappend(pclauses, rinfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now, attach the identified moved-down clauses to the caller's
|
|
|
|
* restrict_clauses list. By using list_concat in this order, we leave
|
|
|
|
* the original list structure of restrict_clauses undamaged.
|
|
|
|
*/
|
|
|
|
*restrict_clauses = list_concat(pclauses, *restrict_clauses);
|
|
|
|
|
|
|
|
/* If we already have a PPI for this parameterization, just return it */
|
|
|
|
foreach(lc, joinrel->ppilist)
|
|
|
|
{
|
|
|
|
ppi = (ParamPathInfo *) lfirst(lc);
|
|
|
|
if (bms_equal(ppi->ppi_req_outer, required_outer))
|
|
|
|
return ppi;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Estimate the number of rows returned by the parameterized join */
|
|
|
|
rows = get_parameterized_joinrel_size(root, joinrel,
|
|
|
|
outer_path->rows,
|
|
|
|
inner_path->rows,
|
|
|
|
sjinfo,
|
|
|
|
*restrict_clauses);
|
|
|
|
|
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* And now we can build the ParamPathInfo. No point in saving the
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* input-pair-dependent clause list, though.
|
|
|
|
*
|
|
|
|
* Note: in GEQO mode, we'll be called in a temporary memory context, but
|
|
|
|
* the joinrel structure is there too, so no problem.
|
|
|
|
*/
|
|
|
|
ppi = makeNode(ParamPathInfo);
|
|
|
|
ppi->ppi_req_outer = required_outer;
|
|
|
|
ppi->ppi_rows = rows;
|
|
|
|
ppi->ppi_clauses = NIL;
|
|
|
|
joinrel->ppilist = lappend(joinrel->ppilist, ppi);
|
|
|
|
|
|
|
|
return ppi;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get_appendrel_parampathinfo
|
|
|
|
* Get the ParamPathInfo for a parameterized path for an append relation.
|
|
|
|
*
|
|
|
|
* For an append relation, the rowcount estimate will just be the sum of
|
2014-05-06 18:12:18 +02:00
|
|
|
* the estimates for its children. However, we still need a ParamPathInfo
|
|
|
|
* to flag the fact that the path requires parameters. So this just creates
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 21:52:46 +02:00
|
|
|
* a suitable struct with zero ppi_rows (and no ppi_clauses either, since
|
|
|
|
* the Append node isn't responsible for checking quals).
|
|
|
|
*/
|
|
|
|
ParamPathInfo *
|
|
|
|
get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer)
|
|
|
|
{
|
|
|
|
ParamPathInfo *ppi;
|
|
|
|
ListCell *lc;
|
|
|
|
|
|
|
|
/* Unparameterized paths have no ParamPathInfo */
|
|
|
|
if (bms_is_empty(required_outer))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
Assert(!bms_overlap(appendrel->relids, required_outer));
|
|
|
|
|
|
|
|
/* If we already have a PPI for this parameterization, just return it */
|
|
|
|
foreach(lc, appendrel->ppilist)
|
|
|
|
{
|
|
|
|
ppi = (ParamPathInfo *) lfirst(lc);
|
|
|
|
if (bms_equal(ppi->ppi_req_outer, required_outer))
|
|
|
|
return ppi;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Else build the ParamPathInfo */
|
|
|
|
ppi = makeNode(ParamPathInfo);
|
|
|
|
ppi->ppi_req_outer = required_outer;
|
|
|
|
ppi->ppi_rows = 0;
|
|
|
|
ppi->ppi_clauses = NIL;
|
|
|
|
appendrel->ppilist = lappend(appendrel->ppilist, ppi);
|
|
|
|
|
|
|
|
return ppi;
|
|
|
|
}
|